PHP Code:
#include <iostream>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <fstream>
using namespace std;
// standard merge function
__device__ void merge (int * first_array, int size1, int * second_array, int size2){
int i,j,k;
int * third_array;
//cudaMalloc ((void**)&third_array, (size1 + size2) * sizeof(int));
third_array = (int *) malloc((size1 + size2) * sizeof(int));
for(i=0, j=0, k=-1 ; (i<size1) && (j<size2) ;){
if ( *(first_array + i) <= *(second_array + j) ){
++k;
*(third_array + k) = *(first_array + i);
++i;
} else {
++k;
*(third_array + k) = *(second_array + j);
++j;
}
}
if( i>= size1 ){
for(;j<size2;){
++k;
*(third_array + k) = *(second_array + j);
++j;
}
} else {
for(;i<size1;){
++k;
*(third_array + k) = *(first_array + i);
++i;
}
}
for(i=0; i<size1 ;++i){
*(first_array + i) = *(third_array + i);
}
j=i;
for(i=0; i<size2 ;++i){
*(second_array + i) = *(third_array + i + j);
}
//cudaFree (third_array) ;
free (third_array) ;
}
__global__ void mergesort(int * pointer, int factor){
int * pointer_to_pass = pointer + ( blockIdx.x * factor); // pointer to pass to each merge
merge( pointer_to_pass, (factor/2), pointer_to_pass + (factor/2), (factor/2) );
}
int main( void ) {
fstream fileout ("data.txt",fstream::out); // file to write the non-ordered and ordered array
fstream fileout_bin ("data_bin.txt",fstream::out | fstream::binary ); // ordered array in binary mode
srand (time(NULL));
int n = 32768;
int * numeri = new int[n];
for (int i =0; i< n; ++i){
*(numeri + i) = rand() % 2000 + rand() % 2000 + rand() % 2000; //fill the array with random data
}
fileout << "pre" << endl; // print to file the non-ordered array
for(int i = 0;i<n;++i){
fileout << numeri[i]<< endl;
}
int* d_numeri;
cudaMalloc((void **)&d_numeri,n*sizeof(int)); //allocating n*sizeof(int) bytes on gpu ram
cudaMemcpy(d_numeri,numeri,n*sizeof(int),cudaMemcpyHostToDevice); // copy array from host to gpu
for(int i = 2 ; i<=n ;i=i*2) {
mergesort<<<(n/i),1>>>(d_numeri,i); // launch instances of mergesort and run in parallel
}
cudaMemcpy(numeri,d_numeri,n*sizeof(int),cudaMemcpyDeviceToHost); // copy the ordered array from device to host
fileout << "after" << endl; // write in file the ordered array
for(int i = 0;i<n;++i){
fileout << numeri[i]<< endl;
}
fileout_bin.write((char *) numeri, n * sizeof(int)); // write to file the ordered array in binary mode
return 0;
}