MergeSort - N/2 to 1 blocks - 1 thread per block - Int array with size = 2^n

PHP Code:

#include <iostream>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <fstream>

using namespace std;

// standard merge function
__device__ void merge (int * first_array, int size1, int * second_array, int size2){

    int i,j,k;
    int * third_array;
    //cudaMalloc ((void**)&third_array, (size1 + size2) * sizeof(int));
    third_array = (int *) malloc((size1 + size2) * sizeof(int));

    for(i=0, j=0, k=-1 ; (i<size1) && (j<size2) ;){
        if ( *(first_array + i) <= *(second_array + j)  ){
            ++k;
            *(third_array + k) =  *(first_array + i);
            ++i;
        } else {
            ++k;
            *(third_array + k) =  *(second_array + j);
            ++j;
        }
    }
    if( i>= size1 ){
        for(;j<size2;){
            ++k;
            *(third_array + k) =  *(second_array + j);
            ++j;
        }
    } else {
        for(;i<size1;){
            ++k;
            *(third_array + k) =  *(first_array + i);
            ++i;
        }
    }


    for(i=0; i<size1 ;++i){
        *(first_array + i) = *(third_array + i);
    }
    j=i;
    for(i=0; i<size2 ;++i){
        *(second_array + i) = *(third_array + i + j);
    }
    //cudaFree (third_array) ;
    free (third_array) ;
}


__global__ void mergesort(int * pointer, int factor){

    int * pointer_to_pass = pointer + ( blockIdx.x * factor); // pointer to pass to each merge

    merge( pointer_to_pass, (factor/2), pointer_to_pass + (factor/2), (factor/2) );
}




int main( void ) {

    fstream fileout ("data.txt",fstream::out); // file to write the non-ordered and ordered array
    fstream fileout_bin ("data_bin.txt",fstream::out | fstream::binary ); // ordered array in binary mode

    srand (time(NULL));

    int n = 32768;
    int * numeri = new int[n];

    for (int i =0; i< n; ++i){
    *(numeri + i) = rand() % 2000 +  rand() % 2000 + rand() % 2000;  //fill the array with random data
    }

    fileout << "pre" << endl; // print to file the non-ordered array
    for(int i = 0;i<n;++i){
    fileout << numeri[i]<< endl;
    }


    int* d_numeri;
    cudaMalloc((void **)&d_numeri,n*sizeof(int)); //allocating n*sizeof(int) bytes on gpu ram

    cudaMemcpy(d_numeri,numeri,n*sizeof(int),cudaMemcpyHostToDevice); // copy array from host to gpu

    for(int i = 2 ; i<=n ;i=i*2) {
        mergesort<<<(n/i),1>>>(d_numeri,i); // launch instances of mergesort and run in parallel
    }

    cudaMemcpy(numeri,d_numeri,n*sizeof(int),cudaMemcpyDeviceToHost); // copy the ordered array from device to host
    
    
    fileout << "after" << endl; // write in file the ordered array
    for(int i = 0;i<n;++i){
    fileout << numeri[i]<< endl;
    }

    fileout_bin.write((char *) numeri, n * sizeof(int)); // write to file the ordered array in binary mode


    
return 0;
}
Announcement

MergeSort - N/2 to 1 blocks - 1 thread per block - Int array with size = 2^n

MergeSort - N/2 to 1 blocks - 1 thread per block - Int array with size = 2^n

Google Profile