<?xml version="1.0" encoding="windows-1252"?>
<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:content="http://purl.org/rss/1.0/modules/content/">
	<channel>
		<title>Giovanni Di Grezia - VFX Artist Forum - CUDA C / C++</title>
		<link>https://www.xgiovio.com/forum/</link>
		<description>The parallel computing. The future</description>
		<language>en</language>
		<lastBuildDate>Sat, 04 Apr 2026 21:44:36 GMT</lastBuildDate>
		<generator>vBulletin</generator>
		<ttl>60</ttl>
		<image>
			<url>images/misc/rss.png</url>
			<title>Giovanni Di Grezia - VFX Artist Forum - CUDA C / C++</title>
			<link>https://www.xgiovio.com/forum/</link>
		</image>
		<item>
			<title>Dot Product - Shared cache, reduction and final partial sum on host</title>
			<link>https://www.xgiovio.com/forum/forum/programming/cuda-c-c/446-dot-product-shared-cache-reduction-and-final-partial-sum-on-host</link>
			<pubDate>Sat, 16 Nov 2013 13:02:35 GMT</pubDate>
			<description><![CDATA[#include &lt;iostream&gt; 
 
#define minimo(a,b) a&lt;b?a:b 
#define MAXBLOCKS 32 
#define NTHREADS 256 // must be a power of 2 
 
 
__global__ void dot (int...]]></description>
			<content:encoded><![CDATA[
<div class="bbcode_container">
	<div class="bbcode_description">PHP Code:</div>
	
	<div class="bbcode_code" style="max-height:calc(2147483647 *  + 12px + 20px);"><code><span style="color: #000000">
<span style="color: #0000BB"></span><span style="color: #FF8000">#include&nbsp;&lt;iostream&gt;<br /><br />#define&nbsp;minimo(a,b)&nbsp;a&lt;b?a:b<br />#define&nbsp;MAXBLOCKS&nbsp;32<br />#define&nbsp;NTHREADS&nbsp;256&nbsp;//&nbsp;must&nbsp;be&nbsp;a&nbsp;power&nbsp;of&nbsp;2<br /><br /><br /></span><span style="color: #0000BB">__global__&nbsp;void&nbsp;dot&nbsp;</span><span style="color: #007700">(</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">a</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">b</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*</span><span style="color: #0000BB">c</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">int&nbsp;size</span><span style="color: #007700">){<br /><br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;id&nbsp;</span><span style="color: #007700">=&nbsp;(</span><span style="color: #0000BB">blockIdx</span><span style="color: #007700">.</span><span style="color: #0000BB">x&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">blockDim</span><span style="color: #007700">.</span><span style="color: #0000BB">x</span><span style="color: #007700">)&nbsp;+&nbsp;</span><span style="color: #0000BB">threadIdx</span><span style="color: #007700">.</span><span style="color: #0000BB">x</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;nextid&nbsp;</span><span style="color: #007700">=&nbsp;</span><span style="color: #0000BB">gridDim</span><span style="color: #007700">.</span><span style="color: #0000BB">x&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">blockDim</span><span style="color: #007700">.</span><span style="color: #0000BB">x</span><span style="color: #007700">;<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #FF8000">////&nbsp;reducted&nbsp;output<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">__shared__&nbsp;int&nbsp;shared_cache&nbsp;</span><span style="color: #007700">&#91;</span><span style="color: #0000BB">NTHREADS</span><span style="color: #007700">&#93;;<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;sum&nbsp;</span><span style="color: #007700">=&nbsp;</span><span style="color: #0000BB">0</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;for&nbsp;(;</span><span style="color: #0000BB">id&nbsp;</span><span style="color: #007700">&lt;&nbsp;</span><span style="color: #0000BB">size&nbsp;</span><span style="color: #007700">;){<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">sum&nbsp;</span><span style="color: #007700">+=&nbsp;(*(</span><span style="color: #0000BB">a</span><span style="color: #007700">+</span><span style="color: #0000BB">id</span><span style="color: #007700">))&nbsp;*&nbsp;(*(</span><span style="color: #0000BB">b</span><span style="color: #007700">+</span><span style="color: #0000BB">id</span><span style="color: #007700">));<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">id</span><span style="color: #007700">+=&nbsp;</span><span style="color: #0000BB">nextid</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;}<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;*(</span><span style="color: #0000BB">shared_cache&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">threadIdx</span><span style="color: #007700">.</span><span style="color: #0000BB">x</span><span style="color: #007700">)&nbsp;=&nbsp;</span><span style="color: #0000BB">sum</span><span style="color: #007700">;<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">__syncthreads</span><span style="color: #007700">();<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #FF8000">/////////&nbsp;sum&nbsp;of&nbsp;internal&nbsp;cache<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;i</span><span style="color: #007700">;&nbsp;&nbsp;&nbsp;&nbsp;<br />&nbsp;&nbsp;&nbsp;&nbsp;<br />&nbsp;&nbsp;&nbsp;&nbsp;for&nbsp;(</span><span style="color: #0000BB">i</span><span style="color: #007700">=(</span><span style="color: #0000BB">NTHREADS&nbsp;</span><span style="color: #007700">/</span><span style="color: #0000BB">2</span><span style="color: #007700">);&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">&gt;</span><span style="color: #0000BB">0&nbsp;</span><span style="color: #007700">;&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">=&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">/</span><span style="color: #0000BB">2</span><span style="color: #007700">){<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;(</span><span style="color: #0000BB">threadIdx</span><span style="color: #007700">.</span><span style="color: #0000BB">x&nbsp;</span><span style="color: #007700">&lt;&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">){<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;*(</span><span style="color: #0000BB">shared_cache&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">threadIdx</span><span style="color: #007700">.</span><span style="color: #0000BB">x</span><span style="color: #007700">)&nbsp;+=&nbsp;*(</span><span style="color: #0000BB">shared_cache&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">threadIdx</span><span style="color: #007700">.</span><span style="color: #0000BB">x&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">);<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;}<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">__syncthreads</span><span style="color: #007700">();<br />&nbsp;&nbsp;&nbsp;&nbsp;}<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;(&nbsp;</span><span style="color: #0000BB">threadIdx</span><span style="color: #007700">.</span><span style="color: #0000BB">x&nbsp;</span><span style="color: #007700">==&nbsp;</span><span style="color: #0000BB">0</span><span style="color: #007700">){<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;*(</span><span style="color: #0000BB">c</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">blockIdx</span><span style="color: #007700">.</span><span style="color: #0000BB">x</span><span style="color: #007700">)&nbsp;=&nbsp;</span><span style="color: #0000BB">shared_cache</span><span style="color: #007700">&#91;</span><span style="color: #0000BB">0</span><span style="color: #007700">&#93;;<br />&nbsp;&nbsp;&nbsp;&nbsp;}<br />}<br /><br /><br /><br /><br /></span><span style="color: #0000BB">int&nbsp;main&nbsp;</span><span style="color: #007700">()&nbsp;{<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;n&nbsp;</span><span style="color: #007700">=&nbsp;</span><span style="color: #0000BB">100</span><span style="color: #007700">;&nbsp;</span><span style="color: #FF8000">///&nbsp;size&nbsp;of&nbsp;arrays&nbsp;to&nbsp;multiply<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;n_blocks&nbsp;</span><span style="color: #007700">=&nbsp;</span><span style="color: #0000BB">minimo</span><span style="color: #007700">(&nbsp;</span><span style="color: #0000BB">MAXBLOCKS</span><span style="color: #007700">,&nbsp;((</span><span style="color: #0000BB">n</span><span style="color: #007700">+</span><span style="color: #0000BB">NTHREADS</span><span style="color: #007700">-</span><span style="color: #0000BB">1</span><span style="color: #007700">)/</span><span style="color: #0000BB">NTHREADS</span><span style="color: #007700">));<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">a&nbsp;</span><span style="color: #007700">=&nbsp;new&nbsp;</span><span style="color: #0000BB">int</span><span style="color: #007700">&#91;</span><span style="color: #0000BB">n</span><span style="color: #007700">&#93;;<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">b&nbsp;</span><span style="color: #007700">=&nbsp;new&nbsp;</span><span style="color: #0000BB">int</span><span style="color: #007700">&#91;</span><span style="color: #0000BB">n</span><span style="color: #007700">&#93;;<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">c&nbsp;</span><span style="color: #007700">=&nbsp;new&nbsp;</span><span style="color: #0000BB">int</span><span style="color: #007700">&#91;</span><span style="color: #0000BB">n_blocks</span><span style="color: #007700">&#93;;<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #FF8000">////////////////////////sample&nbsp;data&nbsp;allocated&nbsp;on&nbsp;host<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #007700">for&nbsp;(</span><span style="color: #0000BB">int&nbsp;i</span><span style="color: #007700">=</span><span style="color: #0000BB">0&nbsp;</span><span style="color: #007700">;&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">&lt;&nbsp;</span><span style="color: #0000BB">n&nbsp;</span><span style="color: #007700">;&nbsp;++</span><span style="color: #0000BB">i</span><span style="color: #007700">){<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;*(</span><span style="color: #0000BB">a</span><span style="color: #007700">+</span><span style="color: #0000BB">i</span><span style="color: #007700">)=</span><span style="color: #0000BB">1</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;*(</span><span style="color: #0000BB">b</span><span style="color: #007700">+</span><span style="color: #0000BB">i</span><span style="color: #007700">)=</span><span style="color: #0000BB">2</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;}<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">d_a</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">d_b</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">d_c</span><span style="color: #007700">;<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">cudaMalloc</span><span style="color: #007700">((</span><span style="color: #0000BB">void</span><span style="color: #007700">**)&nbsp;&amp;</span><span style="color: #0000BB">d_a</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">n&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">sizeof</span><span style="color: #007700">(int));<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">cudaMalloc</span><span style="color: #007700">((</span><span style="color: #0000BB">void</span><span style="color: #007700">**)&nbsp;&amp;</span><span style="color: #0000BB">d_b</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">n&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">sizeof</span><span style="color: #007700">(int));<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">cudaMalloc</span><span style="color: #007700">((</span><span style="color: #0000BB">void</span><span style="color: #007700">**)&nbsp;&amp;</span><span style="color: #0000BB">d_c</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">n_blocks&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">sizeof</span><span style="color: #007700">(int));<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">cudaMemcpy</span><span style="color: #007700">(</span><span style="color: #0000BB">d_a&nbsp;</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">a</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">n&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">sizeof</span><span style="color: #007700">(int),</span><span style="color: #0000BB">cudaMemcpyHostToDevice</span><span style="color: #007700">);<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">cudaMemcpy</span><span style="color: #007700">(</span><span style="color: #0000BB">d_b&nbsp;</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">b</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">n&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">sizeof</span><span style="color: #007700">(int),</span><span style="color: #0000BB">cudaMemcpyHostToDevice</span><span style="color: #007700">);<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">dot</span><span style="color: #007700">&lt;&lt;&lt;&nbsp;</span><span style="color: #0000BB">n_blocks&nbsp;</span><span style="color: #007700">,</span><span style="color: #0000BB">NTHREADS</span><span style="color: #007700">&gt;&gt;&gt;(</span><span style="color: #0000BB">d_a</span><span style="color: #007700">,</span><span style="color: #0000BB">d_b</span><span style="color: #007700">,</span><span style="color: #0000BB">d_c</span><span style="color: #007700">,</span><span style="color: #0000BB">n</span><span style="color: #007700">);<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">cudaMemcpy</span><span style="color: #007700">(</span><span style="color: #0000BB">c</span><span style="color: #007700">,</span><span style="color: #0000BB">d_c</span><span style="color: #007700">,</span><span style="color: #0000BB">n_blocks</span><span style="color: #007700">*</span><span style="color: #0000BB">sizeof</span><span style="color: #007700">(int),</span><span style="color: #0000BB">cudaMemcpyDeviceToHost</span><span style="color: #007700">);<br /><br /><br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #FF8000">////&nbsp;final&nbsp;sum&nbsp;on&nbsp;host<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;final_result&nbsp;</span><span style="color: #007700">=&nbsp;</span><span style="color: #0000BB">0</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;for&nbsp;(</span><span style="color: #0000BB">int&nbsp;i</span><span style="color: #007700">=</span><span style="color: #0000BB">0&nbsp;</span><span style="color: #007700">;&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">&lt;&nbsp;</span><span style="color: #0000BB">n_blocks&nbsp;</span><span style="color: #007700">;&nbsp;++</span><span style="color: #0000BB">i</span><span style="color: #007700">){<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">final_result&nbsp;</span><span style="color: #007700">+=&nbsp;*(</span><span style="color: #0000BB">c</span><span style="color: #007700">+</span><span style="color: #0000BB">i</span><span style="color: #007700">);<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;}<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">std</span><span style="color: #007700">::</span><span style="color: #0000BB">cout&nbsp;</span><span style="color: #007700">&lt;&lt;&nbsp;</span><span style="color: #0000BB">final_result&nbsp;</span><span style="color: #007700">&lt;&lt;&nbsp;</span><span style="color: #0000BB">std</span><span style="color: #007700">::</span><span style="color: #0000BB">endl</span><span style="color: #007700">;<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">std</span><span style="color: #007700">::</span><span style="color: #0000BB">cin</span><span style="color: #007700">.</span><span style="color: #0000BB">get</span><span style="color: #007700">();<br /><br />return&nbsp;</span><span style="color: #0000BB">0</span><span style="color: #007700">;}&nbsp;<br /></span><span style="color: #0000BB"></span>
</span>
</code></div>
</div>]]></content:encoded>
			<category domain="https://www.xgiovio.com/forum/forum/programming/cuda-c-c">CUDA C / C++</category>
			<dc:creator>xgiovio</dc:creator>
			<guid isPermaLink="true">https://www.xgiovio.com/forum/forum/programming/cuda-c-c/446-dot-product-shared-cache-reduction-and-final-partial-sum-on-host</guid>
		</item>
		<item>
			<title>Mergesort - 2^n int array - blocks + threads</title>
			<link>https://www.xgiovio.com/forum/forum/programming/cuda-c-c/445-mergesort-2-n-int-array-blocks-threads</link>
			<pubDate>Fri, 15 Nov 2013 14:07:13 GMT</pubDate>
			<description><![CDATA[#include &lt;iostream&gt; 
#include &lt;stdlib.h&gt; 
#include &lt;time.h&gt; 
#include &lt;math.h&gt; 
#include &lt;fstream&gt; 
 
using namespace std; 
 
// standard merge...]]></description>
			<content:encoded><![CDATA[
<div class="bbcode_container">
	<div class="bbcode_description">PHP Code:</div>
	
	<div class="bbcode_code" style="max-height:calc(2147483647 *  + 12px + 20px);"><code><span style="color: #000000">
<span style="color: #0000BB"></span><span style="color: #FF8000">#include&nbsp;&lt;iostream&gt;<br />#include&nbsp;&lt;stdlib.h&gt;<br />#include&nbsp;&lt;time.h&gt;<br />#include&nbsp;&lt;math.h&gt;<br />#include&nbsp;&lt;fstream&gt;<br /><br /></span><span style="color: #0000BB">using&nbsp;</span><span style="color: #007700">namespace&nbsp;</span><span style="color: #0000BB">std</span><span style="color: #007700">;<br /><br /></span><span style="color: #FF8000">//&nbsp;standard&nbsp;merge&nbsp;function<br /></span><span style="color: #0000BB">__device__&nbsp;void&nbsp;merge&nbsp;</span><span style="color: #007700">(</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">first_array</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">int&nbsp;size1</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">second_array</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">int&nbsp;size2</span><span style="color: #007700">){<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;i</span><span style="color: #007700">,</span><span style="color: #0000BB">j</span><span style="color: #007700">,</span><span style="color: #0000BB">k</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">third_array</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #FF8000">//cudaMalloc&nbsp;((void**)&amp;third_array,&nbsp;(size1&nbsp;+&nbsp;size2)&nbsp;*&nbsp;sizeof(int));<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">third_array&nbsp;</span><span style="color: #007700">=&nbsp;(</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*)&nbsp;</span><span style="color: #0000BB">malloc</span><span style="color: #007700">((</span><span style="color: #0000BB">size1&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">size2</span><span style="color: #007700">)&nbsp;*&nbsp;</span><span style="color: #0000BB">sizeof</span><span style="color: #007700">(int));<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;for(</span><span style="color: #0000BB">i</span><span style="color: #007700">=</span><span style="color: #0000BB">0</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">j</span><span style="color: #007700">=</span><span style="color: #0000BB">0</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">k</span><span style="color: #007700">=-</span><span style="color: #0000BB">1&nbsp;</span><span style="color: #007700">;&nbsp;(</span><span style="color: #0000BB">i</span><span style="color: #007700">&lt;</span><span style="color: #0000BB">size1</span><span style="color: #007700">)&nbsp;&amp;&amp;&nbsp;(</span><span style="color: #0000BB">j</span><span style="color: #007700">&lt;</span><span style="color: #0000BB">size2</span><span style="color: #007700">)&nbsp;;){<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;(&nbsp;*(</span><span style="color: #0000BB">first_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">)&nbsp;&lt;=&nbsp;*(</span><span style="color: #0000BB">second_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">j</span><span style="color: #007700">)&nbsp;&nbsp;){<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;++</span><span style="color: #0000BB">k</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;*(</span><span style="color: #0000BB">third_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">k</span><span style="color: #007700">)&nbsp;=&nbsp;&nbsp;*(</span><span style="color: #0000BB">first_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">);<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;++</span><span style="color: #0000BB">i</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;}&nbsp;else&nbsp;{<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;++</span><span style="color: #0000BB">k</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;*(</span><span style="color: #0000BB">third_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">k</span><span style="color: #007700">)&nbsp;=&nbsp;&nbsp;*(</span><span style="color: #0000BB">second_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">j</span><span style="color: #007700">);<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;++</span><span style="color: #0000BB">j</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;}<br />&nbsp;&nbsp;&nbsp;&nbsp;}<br />&nbsp;&nbsp;&nbsp;&nbsp;if(&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">&gt;=&nbsp;</span><span style="color: #0000BB">size1&nbsp;</span><span style="color: #007700">){<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;for(;</span><span style="color: #0000BB">j</span><span style="color: #007700">&lt;</span><span style="color: #0000BB">size2</span><span style="color: #007700">;){<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;++</span><span style="color: #0000BB">k</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;*(</span><span style="color: #0000BB">third_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">k</span><span style="color: #007700">)&nbsp;=&nbsp;&nbsp;*(</span><span style="color: #0000BB">second_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">j</span><span style="color: #007700">);<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;++</span><span style="color: #0000BB">j</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;}<br />&nbsp;&nbsp;&nbsp;&nbsp;}&nbsp;else&nbsp;{<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;for(;</span><span style="color: #0000BB">i</span><span style="color: #007700">&lt;</span><span style="color: #0000BB">size1</span><span style="color: #007700">;){<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;++</span><span style="color: #0000BB">k</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;*(</span><span style="color: #0000BB">third_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">k</span><span style="color: #007700">)&nbsp;=&nbsp;&nbsp;*(</span><span style="color: #0000BB">first_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">);<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;++</span><span style="color: #0000BB">i</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;}<br />&nbsp;&nbsp;&nbsp;&nbsp;}<br /><br /><br />&nbsp;&nbsp;&nbsp;&nbsp;for(</span><span style="color: #0000BB">i</span><span style="color: #007700">=</span><span style="color: #0000BB">0</span><span style="color: #007700">;&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">&lt;</span><span style="color: #0000BB">size1&nbsp;</span><span style="color: #007700">;++</span><span style="color: #0000BB">i</span><span style="color: #007700">){<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;*(</span><span style="color: #0000BB">first_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">)&nbsp;=&nbsp;*(</span><span style="color: #0000BB">third_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">);<br />&nbsp;&nbsp;&nbsp;&nbsp;}<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">j</span><span style="color: #007700">=</span><span style="color: #0000BB">i</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;for(</span><span style="color: #0000BB">i</span><span style="color: #007700">=</span><span style="color: #0000BB">0</span><span style="color: #007700">;&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">&lt;</span><span style="color: #0000BB">size2&nbsp;</span><span style="color: #007700">;++</span><span style="color: #0000BB">i</span><span style="color: #007700">){<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;*(</span><span style="color: #0000BB">second_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">)&nbsp;=&nbsp;*(</span><span style="color: #0000BB">third_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">i&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">j</span><span style="color: #007700">);<br />&nbsp;&nbsp;&nbsp;&nbsp;}<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #FF8000">//cudaFree&nbsp;(third_array)&nbsp;;<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">free&nbsp;</span><span style="color: #007700">(</span><span style="color: #0000BB">third_array</span><span style="color: #007700">)&nbsp;;<br />}<br /><br /><br /></span><span style="color: #0000BB">__global__&nbsp;void&nbsp;mergesort</span><span style="color: #007700">(</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">pointer</span><span style="color: #007700">,</span><span style="color: #0000BB">int&nbsp;size</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">int&nbsp;factor</span><span style="color: #007700">){<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;id&nbsp;</span><span style="color: #007700">=&nbsp;(&nbsp;</span><span style="color: #0000BB">blockIdx</span><span style="color: #007700">.</span><span style="color: #0000BB">x&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">blockDim</span><span style="color: #007700">.</span><span style="color: #0000BB">x</span><span style="color: #007700">)&nbsp;+&nbsp;</span><span style="color: #0000BB">threadIdx</span><span style="color: #007700">.</span><span style="color: #0000BB">x</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">pointer_to_pass&nbsp;</span><span style="color: #007700">=&nbsp;</span><span style="color: #0000BB">NULL</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;for&nbsp;(;</span><span style="color: #0000BB">id</span><span style="color: #007700">&lt;&nbsp;(</span><span style="color: #0000BB">size</span><span style="color: #007700">/</span><span style="color: #0000BB">factor</span><span style="color: #007700">);&nbsp;</span><span style="color: #0000BB">id&nbsp;</span><span style="color: #007700">+=&nbsp;</span><span style="color: #0000BB">gridDim</span><span style="color: #007700">.</span><span style="color: #0000BB">x&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">blockDim</span><span style="color: #007700">.</span><span style="color: #0000BB">x</span><span style="color: #007700">){<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">pointer_to_pass&nbsp;</span><span style="color: #007700">=&nbsp;</span><span style="color: #0000BB">pointer&nbsp;</span><span style="color: #007700">+&nbsp;(&nbsp;</span><span style="color: #0000BB">id&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">factor</span><span style="color: #007700">);&nbsp;</span><span style="color: #FF8000">//&nbsp;pointer&nbsp;to&nbsp;pass&nbsp;to&nbsp;each&nbsp;merge<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">merge</span><span style="color: #007700">(&nbsp;</span><span style="color: #0000BB">pointer_to_pass</span><span style="color: #007700">,&nbsp;(</span><span style="color: #0000BB">factor</span><span style="color: #007700">/</span><span style="color: #0000BB">2</span><span style="color: #007700">),&nbsp;</span><span style="color: #0000BB">pointer_to_pass&nbsp;</span><span style="color: #007700">+&nbsp;(</span><span style="color: #0000BB">factor</span><span style="color: #007700">/</span><span style="color: #0000BB">2</span><span style="color: #007700">),&nbsp;(</span><span style="color: #0000BB">factor</span><span style="color: #007700">/</span><span style="color: #0000BB">2</span><span style="color: #007700">)&nbsp;);<br />&nbsp;&nbsp;&nbsp;&nbsp;}<br />}<br /><br /><br /><br /><br /></span><span style="color: #0000BB">int&nbsp;main</span><span style="color: #007700">(&nbsp;</span><span style="color: #0000BB">void&nbsp;</span><span style="color: #007700">)&nbsp;{<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #FF8000">/*<br />&nbsp;&nbsp;&nbsp;&nbsp;fstream&nbsp;fileout&nbsp;("data.txt",fstream::out);&nbsp;//&nbsp;file&nbsp;to&nbsp;write&nbsp;the&nbsp;non-ordered&nbsp;and&nbsp;ordered&nbsp;array<br />&nbsp;&nbsp;&nbsp;&nbsp;fstream&nbsp;fileout_bin&nbsp;("data_bin.txt",fstream::out&nbsp;|&nbsp;fstream::binary&nbsp;);&nbsp;//&nbsp;ordered&nbsp;array&nbsp;in&nbsp;binary&nbsp;mode<br />&nbsp;&nbsp;&nbsp;&nbsp;*/<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">srand&nbsp;</span><span style="color: #007700">(</span><span style="color: #0000BB">time</span><span style="color: #007700">(</span><span style="color: #0000BB">NULL</span><span style="color: #007700">));<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;n&nbsp;</span><span style="color: #007700">=&nbsp;</span><span style="color: #0000BB">65536</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">numeri&nbsp;</span><span style="color: #007700">=&nbsp;new&nbsp;</span><span style="color: #0000BB">int</span><span style="color: #007700">&#91;</span><span style="color: #0000BB">n</span><span style="color: #007700">&#93;;<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;for&nbsp;(</span><span style="color: #0000BB">int&nbsp;i&nbsp;</span><span style="color: #007700">=</span><span style="color: #0000BB">0</span><span style="color: #007700">;&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">&lt;&nbsp;</span><span style="color: #0000BB">n</span><span style="color: #007700">;&nbsp;++</span><span style="color: #0000BB">i</span><span style="color: #007700">){<br />&nbsp;&nbsp;&nbsp;&nbsp;*(</span><span style="color: #0000BB">numeri&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">)&nbsp;=&nbsp;</span><span style="color: #0000BB">rand</span><span style="color: #007700">()&nbsp;+&nbsp;&nbsp;</span><span style="color: #0000BB">rand</span><span style="color: #007700">()&nbsp;+&nbsp;</span><span style="color: #0000BB">rand</span><span style="color: #007700">()&nbsp;+&nbsp;</span><span style="color: #0000BB">rand</span><span style="color: #007700">()&nbsp;+&nbsp;&nbsp;</span><span style="color: #0000BB">rand</span><span style="color: #007700">()&nbsp;+&nbsp;</span><span style="color: #0000BB">rand</span><span style="color: #007700">()&nbsp;;&nbsp;&nbsp;</span><span style="color: #FF8000">//fill&nbsp;the&nbsp;array&nbsp;with&nbsp;random&nbsp;data<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #007700">}<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #FF8000">/*<br />&nbsp;&nbsp;&nbsp;&nbsp;fileout&nbsp;&lt;&lt;&nbsp;"pre"&nbsp;&lt;&lt;&nbsp;endl;&nbsp;//&nbsp;print&nbsp;to&nbsp;file&nbsp;the&nbsp;non-ordered&nbsp;array<br />&nbsp;&nbsp;&nbsp;&nbsp;for(int&nbsp;i&nbsp;=&nbsp;0;i&lt;n;++i){<br />&nbsp;&nbsp;&nbsp;&nbsp;fileout&nbsp;&lt;&lt;&nbsp;numeri&#91;i&#93;&lt;&lt;&nbsp;endl;<br />&nbsp;&nbsp;&nbsp;&nbsp;}<br />&nbsp;&nbsp;&nbsp;&nbsp;*/<br />&nbsp;&nbsp;&nbsp;&nbsp;<br /><br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">d_numeri</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">cudaMalloc</span><span style="color: #007700">((</span><span style="color: #0000BB">void&nbsp;</span><span style="color: #007700">**)&amp;</span><span style="color: #0000BB">d_numeri</span><span style="color: #007700">,</span><span style="color: #0000BB">n</span><span style="color: #007700">*</span><span style="color: #0000BB">sizeof</span><span style="color: #007700">(int));&nbsp;</span><span style="color: #FF8000">//allocating&nbsp;n*sizeof(int)&nbsp;bytes&nbsp;on&nbsp;gpu&nbsp;ram<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">cudaMemcpy</span><span style="color: #007700">(</span><span style="color: #0000BB">d_numeri</span><span style="color: #007700">,</span><span style="color: #0000BB">numeri</span><span style="color: #007700">,</span><span style="color: #0000BB">n</span><span style="color: #007700">*</span><span style="color: #0000BB">sizeof</span><span style="color: #007700">(int),</span><span style="color: #0000BB">cudaMemcpyHostToDevice</span><span style="color: #007700">);&nbsp;</span><span style="color: #FF8000">//&nbsp;copy&nbsp;array&nbsp;from&nbsp;host&nbsp;to&nbsp;gpu<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #007700">for(</span><span style="color: #0000BB">int&nbsp;i&nbsp;</span><span style="color: #007700">=&nbsp;</span><span style="color: #0000BB">2&nbsp;</span><span style="color: #007700">;&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">&lt;=</span><span style="color: #0000BB">n&nbsp;</span><span style="color: #007700">;</span><span style="color: #0000BB">i</span><span style="color: #007700">=</span><span style="color: #0000BB">i</span><span style="color: #007700">*</span><span style="color: #0000BB">2</span><span style="color: #007700">)&nbsp;{<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">mergesort</span><span style="color: #007700">&lt;&lt;&lt;</span><span style="color: #0000BB">128</span><span style="color: #007700">,</span><span style="color: #0000BB">128</span><span style="color: #007700">&gt;&gt;&gt;(</span><span style="color: #0000BB">d_numeri</span><span style="color: #007700">,</span><span style="color: #0000BB">n</span><span style="color: #007700">,</span><span style="color: #0000BB">i</span><span style="color: #007700">);&nbsp;</span><span style="color: #FF8000">//&nbsp;launch&nbsp;instances&nbsp;of&nbsp;mergesort&nbsp;and&nbsp;run&nbsp;in&nbsp;parallel<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #007700">}<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">cudaMemcpy</span><span style="color: #007700">(</span><span style="color: #0000BB">numeri</span><span style="color: #007700">,</span><span style="color: #0000BB">d_numeri</span><span style="color: #007700">,</span><span style="color: #0000BB">n</span><span style="color: #007700">*</span><span style="color: #0000BB">sizeof</span><span style="color: #007700">(int),</span><span style="color: #0000BB">cudaMemcpyDeviceToHost</span><span style="color: #007700">);&nbsp;</span><span style="color: #FF8000">//&nbsp;copy&nbsp;the&nbsp;ordered&nbsp;array&nbsp;from&nbsp;device&nbsp;to&nbsp;host<br />&nbsp;&nbsp;&nbsp;&nbsp;<br />&nbsp;&nbsp;&nbsp;&nbsp;/*<br />&nbsp;&nbsp;&nbsp;&nbsp;fileout&nbsp;&lt;&lt;&nbsp;"after"&nbsp;&lt;&lt;&nbsp;endl;&nbsp;//&nbsp;write&nbsp;in&nbsp;file&nbsp;the&nbsp;ordered&nbsp;array<br />&nbsp;&nbsp;&nbsp;&nbsp;for(int&nbsp;i&nbsp;=&nbsp;0;i&lt;n;++i){<br />&nbsp;&nbsp;&nbsp;&nbsp;fileout&nbsp;&lt;&lt;&nbsp;numeri&#91;i&#93;&lt;&lt;&nbsp;endl;<br />&nbsp;&nbsp;&nbsp;&nbsp;}<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;fileout_bin.write((char&nbsp;*)&nbsp;numeri,&nbsp;n&nbsp;*&nbsp;sizeof(int));&nbsp;//&nbsp;write&nbsp;to&nbsp;file&nbsp;the&nbsp;ordered&nbsp;array&nbsp;in&nbsp;binary&nbsp;mode<br />&nbsp;&nbsp;&nbsp;&nbsp;*/<br /><br /><br />&nbsp;&nbsp;&nbsp;&nbsp;<br /></span><span style="color: #007700">return&nbsp;</span><span style="color: #0000BB">0</span><span style="color: #007700">;<br />}&nbsp;<br /></span><span style="color: #0000BB"></span>
</span>
</code></div>
</div>]]></content:encoded>
			<category domain="https://www.xgiovio.com/forum/forum/programming/cuda-c-c">CUDA C / C++</category>
			<dc:creator>xgiovio</dc:creator>
			<guid isPermaLink="true">https://www.xgiovio.com/forum/forum/programming/cuda-c-c/445-mergesort-2-n-int-array-blocks-threads</guid>
		</item>
		<item>
			<title>MergeSort - N/2 to 1 blocks - 1 thread per block - Int array with size = 2^n</title>
			<link>https://www.xgiovio.com/forum/forum/programming/cuda-c-c/444-mergesort-n-2-to-1-blocks-1-thread-per-block-int-array-with-size-2-n</link>
			<pubDate>Wed, 13 Nov 2013 20:32:32 GMT</pubDate>
			<description><![CDATA[MergeSort - N/2 to 1 blocks - 1 thread per block - Int array with size = 2^n 
 
 
#include &lt;iostream&gt; 
#include &lt;stdlib.h&gt; 
#include &lt;time.h&gt;...]]></description>
			<content:encoded><![CDATA[MergeSort - N/2 to 1 blocks - 1 thread per block - Int array with size = 2^n<br />

<div class="bbcode_container">
	<div class="bbcode_description">PHP Code:</div>
	
	<div class="bbcode_code" style="max-height:calc(2147483647 *  + 12px + 20px);"><code><span style="color: #000000">
<span style="color: #0000BB"></span><span style="color: #FF8000">#include&nbsp;&lt;iostream&gt;<br />#include&nbsp;&lt;stdlib.h&gt;<br />#include&nbsp;&lt;time.h&gt;<br />#include&nbsp;&lt;math.h&gt;<br />#include&nbsp;&lt;fstream&gt;<br /><br /></span><span style="color: #0000BB">using&nbsp;</span><span style="color: #007700">namespace&nbsp;</span><span style="color: #0000BB">std</span><span style="color: #007700">;<br /><br /></span><span style="color: #FF8000">//&nbsp;standard&nbsp;merge&nbsp;function<br /></span><span style="color: #0000BB">__device__&nbsp;void&nbsp;merge&nbsp;</span><span style="color: #007700">(</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">first_array</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">int&nbsp;size1</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">second_array</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">int&nbsp;size2</span><span style="color: #007700">){<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;i</span><span style="color: #007700">,</span><span style="color: #0000BB">j</span><span style="color: #007700">,</span><span style="color: #0000BB">k</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">third_array</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #FF8000">//cudaMalloc&nbsp;((void**)&amp;third_array,&nbsp;(size1&nbsp;+&nbsp;size2)&nbsp;*&nbsp;sizeof(int));<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">third_array&nbsp;</span><span style="color: #007700">=&nbsp;(</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*)&nbsp;</span><span style="color: #0000BB">malloc</span><span style="color: #007700">((</span><span style="color: #0000BB">size1&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">size2</span><span style="color: #007700">)&nbsp;*&nbsp;</span><span style="color: #0000BB">sizeof</span><span style="color: #007700">(int));<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;for(</span><span style="color: #0000BB">i</span><span style="color: #007700">=</span><span style="color: #0000BB">0</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">j</span><span style="color: #007700">=</span><span style="color: #0000BB">0</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">k</span><span style="color: #007700">=-</span><span style="color: #0000BB">1&nbsp;</span><span style="color: #007700">;&nbsp;(</span><span style="color: #0000BB">i</span><span style="color: #007700">&lt;</span><span style="color: #0000BB">size1</span><span style="color: #007700">)&nbsp;&amp;&amp;&nbsp;(</span><span style="color: #0000BB">j</span><span style="color: #007700">&lt;</span><span style="color: #0000BB">size2</span><span style="color: #007700">)&nbsp;;){<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;(&nbsp;*(</span><span style="color: #0000BB">first_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">)&nbsp;&lt;=&nbsp;*(</span><span style="color: #0000BB">second_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">j</span><span style="color: #007700">)&nbsp;&nbsp;){<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;++</span><span style="color: #0000BB">k</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;*(</span><span style="color: #0000BB">third_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">k</span><span style="color: #007700">)&nbsp;=&nbsp;&nbsp;*(</span><span style="color: #0000BB">first_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">);<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;++</span><span style="color: #0000BB">i</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;}&nbsp;else&nbsp;{<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;++</span><span style="color: #0000BB">k</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;*(</span><span style="color: #0000BB">third_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">k</span><span style="color: #007700">)&nbsp;=&nbsp;&nbsp;*(</span><span style="color: #0000BB">second_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">j</span><span style="color: #007700">);<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;++</span><span style="color: #0000BB">j</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;}<br />&nbsp;&nbsp;&nbsp;&nbsp;}<br />&nbsp;&nbsp;&nbsp;&nbsp;if(&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">&gt;=&nbsp;</span><span style="color: #0000BB">size1&nbsp;</span><span style="color: #007700">){<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;for(;</span><span style="color: #0000BB">j</span><span style="color: #007700">&lt;</span><span style="color: #0000BB">size2</span><span style="color: #007700">;){<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;++</span><span style="color: #0000BB">k</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;*(</span><span style="color: #0000BB">third_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">k</span><span style="color: #007700">)&nbsp;=&nbsp;&nbsp;*(</span><span style="color: #0000BB">second_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">j</span><span style="color: #007700">);<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;++</span><span style="color: #0000BB">j</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;}<br />&nbsp;&nbsp;&nbsp;&nbsp;}&nbsp;else&nbsp;{<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;for(;</span><span style="color: #0000BB">i</span><span style="color: #007700">&lt;</span><span style="color: #0000BB">size1</span><span style="color: #007700">;){<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;++</span><span style="color: #0000BB">k</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;*(</span><span style="color: #0000BB">third_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">k</span><span style="color: #007700">)&nbsp;=&nbsp;&nbsp;*(</span><span style="color: #0000BB">first_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">);<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;++</span><span style="color: #0000BB">i</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;}<br />&nbsp;&nbsp;&nbsp;&nbsp;}<br /><br /><br />&nbsp;&nbsp;&nbsp;&nbsp;for(</span><span style="color: #0000BB">i</span><span style="color: #007700">=</span><span style="color: #0000BB">0</span><span style="color: #007700">;&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">&lt;</span><span style="color: #0000BB">size1&nbsp;</span><span style="color: #007700">;++</span><span style="color: #0000BB">i</span><span style="color: #007700">){<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;*(</span><span style="color: #0000BB">first_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">)&nbsp;=&nbsp;*(</span><span style="color: #0000BB">third_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">);<br />&nbsp;&nbsp;&nbsp;&nbsp;}<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">j</span><span style="color: #007700">=</span><span style="color: #0000BB">i</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;for(</span><span style="color: #0000BB">i</span><span style="color: #007700">=</span><span style="color: #0000BB">0</span><span style="color: #007700">;&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">&lt;</span><span style="color: #0000BB">size2&nbsp;</span><span style="color: #007700">;++</span><span style="color: #0000BB">i</span><span style="color: #007700">){<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;*(</span><span style="color: #0000BB">second_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">)&nbsp;=&nbsp;*(</span><span style="color: #0000BB">third_array&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">i&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">j</span><span style="color: #007700">);<br />&nbsp;&nbsp;&nbsp;&nbsp;}<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #FF8000">//cudaFree&nbsp;(third_array)&nbsp;;<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">free&nbsp;</span><span style="color: #007700">(</span><span style="color: #0000BB">third_array</span><span style="color: #007700">)&nbsp;;<br />}<br /><br /><br /></span><span style="color: #0000BB">__global__&nbsp;void&nbsp;mergesort</span><span style="color: #007700">(</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">pointer</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">int&nbsp;factor</span><span style="color: #007700">){<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">pointer_to_pass&nbsp;</span><span style="color: #007700">=&nbsp;</span><span style="color: #0000BB">pointer&nbsp;</span><span style="color: #007700">+&nbsp;(&nbsp;</span><span style="color: #0000BB">blockIdx</span><span style="color: #007700">.</span><span style="color: #0000BB">x&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">factor</span><span style="color: #007700">);&nbsp;</span><span style="color: #FF8000">//&nbsp;pointer&nbsp;to&nbsp;pass&nbsp;to&nbsp;each&nbsp;merge<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">merge</span><span style="color: #007700">(&nbsp;</span><span style="color: #0000BB">pointer_to_pass</span><span style="color: #007700">,&nbsp;(</span><span style="color: #0000BB">factor</span><span style="color: #007700">/</span><span style="color: #0000BB">2</span><span style="color: #007700">),&nbsp;</span><span style="color: #0000BB">pointer_to_pass&nbsp;</span><span style="color: #007700">+&nbsp;(</span><span style="color: #0000BB">factor</span><span style="color: #007700">/</span><span style="color: #0000BB">2</span><span style="color: #007700">),&nbsp;(</span><span style="color: #0000BB">factor</span><span style="color: #007700">/</span><span style="color: #0000BB">2</span><span style="color: #007700">)&nbsp;);<br />}<br /><br /><br /><br /><br /></span><span style="color: #0000BB">int&nbsp;main</span><span style="color: #007700">(&nbsp;</span><span style="color: #0000BB">void&nbsp;</span><span style="color: #007700">)&nbsp;{<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">fstream&nbsp;fileout&nbsp;</span><span style="color: #007700">(</span><span style="color: #DD0000">"data.txt"</span><span style="color: #007700">,</span><span style="color: #0000BB">fstream</span><span style="color: #007700">::</span><span style="color: #0000BB">out</span><span style="color: #007700">);&nbsp;</span><span style="color: #FF8000">//&nbsp;file&nbsp;to&nbsp;write&nbsp;the&nbsp;non-ordered&nbsp;and&nbsp;ordered&nbsp;array<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">fstream&nbsp;fileout_bin&nbsp;</span><span style="color: #007700">(</span><span style="color: #DD0000">"data_bin.txt"</span><span style="color: #007700">,</span><span style="color: #0000BB">fstream</span><span style="color: #007700">::</span><span style="color: #0000BB">out&nbsp;</span><span style="color: #007700">|&nbsp;</span><span style="color: #0000BB">fstream</span><span style="color: #007700">::</span><span style="color: #0000BB">binary&nbsp;</span><span style="color: #007700">);&nbsp;</span><span style="color: #FF8000">//&nbsp;ordered&nbsp;array&nbsp;in&nbsp;binary&nbsp;mode<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">srand&nbsp;</span><span style="color: #007700">(</span><span style="color: #0000BB">time</span><span style="color: #007700">(</span><span style="color: #0000BB">NULL</span><span style="color: #007700">));<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;n&nbsp;</span><span style="color: #007700">=&nbsp;</span><span style="color: #0000BB">32768</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">numeri&nbsp;</span><span style="color: #007700">=&nbsp;new&nbsp;</span><span style="color: #0000BB">int</span><span style="color: #007700">&#91;</span><span style="color: #0000BB">n</span><span style="color: #007700">&#93;;<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;for&nbsp;(</span><span style="color: #0000BB">int&nbsp;i&nbsp;</span><span style="color: #007700">=</span><span style="color: #0000BB">0</span><span style="color: #007700">;&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">&lt;&nbsp;</span><span style="color: #0000BB">n</span><span style="color: #007700">;&nbsp;++</span><span style="color: #0000BB">i</span><span style="color: #007700">){<br />&nbsp;&nbsp;&nbsp;&nbsp;*(</span><span style="color: #0000BB">numeri&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">)&nbsp;=&nbsp;</span><span style="color: #0000BB">rand</span><span style="color: #007700">()&nbsp;%&nbsp;</span><span style="color: #0000BB">2000&nbsp;</span><span style="color: #007700">+&nbsp;&nbsp;</span><span style="color: #0000BB">rand</span><span style="color: #007700">()&nbsp;%&nbsp;</span><span style="color: #0000BB">2000&nbsp;</span><span style="color: #007700">+&nbsp;</span><span style="color: #0000BB">rand</span><span style="color: #007700">()&nbsp;%&nbsp;</span><span style="color: #0000BB">2000</span><span style="color: #007700">;&nbsp;&nbsp;</span><span style="color: #FF8000">//fill&nbsp;the&nbsp;array&nbsp;with&nbsp;random&nbsp;data<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #007700">}<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">fileout&nbsp;</span><span style="color: #007700">&lt;&lt;&nbsp;</span><span style="color: #DD0000">"pre"&nbsp;</span><span style="color: #007700">&lt;&lt;&nbsp;</span><span style="color: #0000BB">endl</span><span style="color: #007700">;&nbsp;</span><span style="color: #FF8000">//&nbsp;print&nbsp;to&nbsp;file&nbsp;the&nbsp;non-ordered&nbsp;array<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #007700">for(</span><span style="color: #0000BB">int&nbsp;i&nbsp;</span><span style="color: #007700">=&nbsp;</span><span style="color: #0000BB">0</span><span style="color: #007700">;</span><span style="color: #0000BB">i</span><span style="color: #007700">&lt;</span><span style="color: #0000BB">n</span><span style="color: #007700">;++</span><span style="color: #0000BB">i</span><span style="color: #007700">){<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">fileout&nbsp;</span><span style="color: #007700">&lt;&lt;&nbsp;</span><span style="color: #0000BB">numeri</span><span style="color: #007700">&#91;</span><span style="color: #0000BB">i</span><span style="color: #007700">&#93;&lt;&lt;&nbsp;</span><span style="color: #0000BB">endl</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;}<br /><br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">d_numeri</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">cudaMalloc</span><span style="color: #007700">((</span><span style="color: #0000BB">void&nbsp;</span><span style="color: #007700">**)&amp;</span><span style="color: #0000BB">d_numeri</span><span style="color: #007700">,</span><span style="color: #0000BB">n</span><span style="color: #007700">*</span><span style="color: #0000BB">sizeof</span><span style="color: #007700">(int));&nbsp;</span><span style="color: #FF8000">//allocating&nbsp;n*sizeof(int)&nbsp;bytes&nbsp;on&nbsp;gpu&nbsp;ram<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">cudaMemcpy</span><span style="color: #007700">(</span><span style="color: #0000BB">d_numeri</span><span style="color: #007700">,</span><span style="color: #0000BB">numeri</span><span style="color: #007700">,</span><span style="color: #0000BB">n</span><span style="color: #007700">*</span><span style="color: #0000BB">sizeof</span><span style="color: #007700">(int),</span><span style="color: #0000BB">cudaMemcpyHostToDevice</span><span style="color: #007700">);&nbsp;</span><span style="color: #FF8000">//&nbsp;copy&nbsp;array&nbsp;from&nbsp;host&nbsp;to&nbsp;gpu<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #007700">for(</span><span style="color: #0000BB">int&nbsp;i&nbsp;</span><span style="color: #007700">=&nbsp;</span><span style="color: #0000BB">2&nbsp;</span><span style="color: #007700">;&nbsp;</span><span style="color: #0000BB">i</span><span style="color: #007700">&lt;=</span><span style="color: #0000BB">n&nbsp;</span><span style="color: #007700">;</span><span style="color: #0000BB">i</span><span style="color: #007700">=</span><span style="color: #0000BB">i</span><span style="color: #007700">*</span><span style="color: #0000BB">2</span><span style="color: #007700">)&nbsp;{<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">mergesort</span><span style="color: #007700">&lt;&lt;&lt;(</span><span style="color: #0000BB">n</span><span style="color: #007700">/</span><span style="color: #0000BB">i</span><span style="color: #007700">),</span><span style="color: #0000BB">1</span><span style="color: #007700">&gt;&gt;&gt;(</span><span style="color: #0000BB">d_numeri</span><span style="color: #007700">,</span><span style="color: #0000BB">i</span><span style="color: #007700">);&nbsp;</span><span style="color: #FF8000">//&nbsp;launch&nbsp;instances&nbsp;of&nbsp;mergesort&nbsp;and&nbsp;run&nbsp;in&nbsp;parallel<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #007700">}<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">cudaMemcpy</span><span style="color: #007700">(</span><span style="color: #0000BB">numeri</span><span style="color: #007700">,</span><span style="color: #0000BB">d_numeri</span><span style="color: #007700">,</span><span style="color: #0000BB">n</span><span style="color: #007700">*</span><span style="color: #0000BB">sizeof</span><span style="color: #007700">(int),</span><span style="color: #0000BB">cudaMemcpyDeviceToHost</span><span style="color: #007700">);&nbsp;</span><span style="color: #FF8000">//&nbsp;copy&nbsp;the&nbsp;ordered&nbsp;array&nbsp;from&nbsp;device&nbsp;to&nbsp;host<br />&nbsp;&nbsp;&nbsp;&nbsp;<br />&nbsp;&nbsp;&nbsp;&nbsp;<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">fileout&nbsp;</span><span style="color: #007700">&lt;&lt;&nbsp;</span><span style="color: #DD0000">"after"&nbsp;</span><span style="color: #007700">&lt;&lt;&nbsp;</span><span style="color: #0000BB">endl</span><span style="color: #007700">;&nbsp;</span><span style="color: #FF8000">//&nbsp;write&nbsp;in&nbsp;file&nbsp;the&nbsp;ordered&nbsp;array<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #007700">for(</span><span style="color: #0000BB">int&nbsp;i&nbsp;</span><span style="color: #007700">=&nbsp;</span><span style="color: #0000BB">0</span><span style="color: #007700">;</span><span style="color: #0000BB">i</span><span style="color: #007700">&lt;</span><span style="color: #0000BB">n</span><span style="color: #007700">;++</span><span style="color: #0000BB">i</span><span style="color: #007700">){<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">fileout&nbsp;</span><span style="color: #007700">&lt;&lt;&nbsp;</span><span style="color: #0000BB">numeri</span><span style="color: #007700">&#91;</span><span style="color: #0000BB">i</span><span style="color: #007700">&#93;&lt;&lt;&nbsp;</span><span style="color: #0000BB">endl</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;}<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">fileout_bin</span><span style="color: #007700">.</span><span style="color: #0000BB">write</span><span style="color: #007700">((</span><span style="color: #0000BB">char&nbsp;</span><span style="color: #007700">*)&nbsp;</span><span style="color: #0000BB">numeri</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">n&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">sizeof</span><span style="color: #007700">(int));&nbsp;</span><span style="color: #FF8000">//&nbsp;write&nbsp;to&nbsp;file&nbsp;the&nbsp;ordered&nbsp;array&nbsp;in&nbsp;binary&nbsp;mode<br /><br /><br />&nbsp;&nbsp;&nbsp;&nbsp;<br /></span><span style="color: #007700">return&nbsp;</span><span style="color: #0000BB">0</span><span style="color: #007700">;<br />}&nbsp;<br /></span><span style="color: #0000BB"></span>
</span>
</code></div>
</div>]]></content:encoded>
			<category domain="https://www.xgiovio.com/forum/forum/programming/cuda-c-c">CUDA C / C++</category>
			<dc:creator>xgiovio</dc:creator>
			<guid isPermaLink="true">https://www.xgiovio.com/forum/forum/programming/cuda-c-c/444-mergesort-n-2-to-1-blocks-1-thread-per-block-int-array-with-size-2-n</guid>
		</item>
		<item>
			<title>A simple Sum</title>
			<link>https://www.xgiovio.com/forum/forum/programming/cuda-c-c/443-a-simple-sum</link>
			<pubDate>Mon, 11 Nov 2013 18:03:19 GMT</pubDate>
			<description><![CDATA[#include &lt;iostream&gt; 
 
__global__ void sum (int * a, int *b, int* c){ 
 
    *c = (*a + *b); 
} 
 
 
int main( void ) {]]></description>
			<content:encoded><![CDATA[
<div class="bbcode_container">
	<div class="bbcode_description">PHP Code:</div>
	
	<div class="bbcode_code" style="max-height:calc(2147483647 *  + 12px + 20px);"><code><span style="color: #000000">
<span style="color: #0000BB"></span><span style="color: #FF8000">#include&nbsp;&lt;iostream&gt;<br /><br /></span><span style="color: #0000BB">__global__&nbsp;void&nbsp;sum&nbsp;</span><span style="color: #007700">(</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">a</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*</span><span style="color: #0000BB">b</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">int</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">c</span><span style="color: #007700">){<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;*</span><span style="color: #0000BB">c&nbsp;</span><span style="color: #007700">=&nbsp;(*</span><span style="color: #0000BB">a&nbsp;</span><span style="color: #007700">+&nbsp;*</span><span style="color: #0000BB">b</span><span style="color: #007700">);<br />}<br /><br /><br /></span><span style="color: #0000BB">int&nbsp;main</span><span style="color: #007700">(&nbsp;</span><span style="color: #0000BB">void&nbsp;</span><span style="color: #007700">)&nbsp;{<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;a</span><span style="color: #007700">,</span><span style="color: #0000BB">b</span><span style="color: #007700">,</span><span style="color: #0000BB">c</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">a</span><span style="color: #007700">=</span><span style="color: #0000BB">10</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">b</span><span style="color: #007700">=</span><span style="color: #0000BB">20</span><span style="color: #007700">;<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">d_a_pointer</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">d_b_pointer</span><span style="color: #007700">;<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">int&nbsp;</span><span style="color: #007700">*&nbsp;</span><span style="color: #0000BB">d_c_pointer</span><span style="color: #007700">;<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">cudaMalloc&nbsp;</span><span style="color: #007700">((</span><span style="color: #0000BB">void&nbsp;</span><span style="color: #007700">**)&amp;</span><span style="color: #0000BB">d_a_pointer</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">sizeof</span><span style="color: #007700">(int));<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">cudaMalloc&nbsp;</span><span style="color: #007700">((</span><span style="color: #0000BB">void&nbsp;</span><span style="color: #007700">**)&amp;</span><span style="color: #0000BB">d_b_pointer</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">sizeof</span><span style="color: #007700">(int));<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">cudaMalloc&nbsp;</span><span style="color: #007700">((</span><span style="color: #0000BB">void&nbsp;</span><span style="color: #007700">**)&amp;</span><span style="color: #0000BB">d_c_pointer</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">sizeof</span><span style="color: #007700">(int));<br /><br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">cudaMemcpy</span><span style="color: #007700">(&nbsp;</span><span style="color: #0000BB">d_a_pointer</span><span style="color: #007700">,&nbsp;&amp;</span><span style="color: #0000BB">a</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">sizeof</span><span style="color: #007700">(int),&nbsp;</span><span style="color: #0000BB">cudaMemcpyHostToDevice</span><span style="color: #007700">);<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">cudaMemcpy</span><span style="color: #007700">(&nbsp;</span><span style="color: #0000BB">d_b_pointer</span><span style="color: #007700">,&nbsp;&amp;</span><span style="color: #0000BB">b</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">sizeof</span><span style="color: #007700">(int),&nbsp;</span><span style="color: #0000BB">cudaMemcpyHostToDevice</span><span style="color: #007700">);<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">sum</span><span style="color: #007700">&lt;&lt;&lt;</span><span style="color: #0000BB">1</span><span style="color: #007700">,</span><span style="color: #0000BB">1</span><span style="color: #007700">&gt;&gt;&gt;(</span><span style="color: #0000BB">d_a_pointer</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">d_b_pointer</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">d_c_pointer</span><span style="color: #007700">);<br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">cudaMemcpy</span><span style="color: #007700">(&nbsp;&amp;</span><span style="color: #0000BB">c</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">d_c_pointer&nbsp;</span><span style="color: #007700">,&nbsp;</span><span style="color: #0000BB">sizeof</span><span style="color: #007700">(int),&nbsp;</span><span style="color: #0000BB">cudaMemcpyDeviceToHost</span><span style="color: #007700">);<br /><br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">std</span><span style="color: #007700">::</span><span style="color: #0000BB">cout&nbsp;</span><span style="color: #007700">&lt;&lt;&nbsp;</span><span style="color: #0000BB">c</span><span style="color: #007700">;<br /><br />&nbsp;&nbsp;&nbsp;&nbsp;</span><span style="color: #0000BB">std</span><span style="color: #007700">::</span><span style="color: #0000BB">cin</span><span style="color: #007700">.</span><span style="color: #0000BB">get</span><span style="color: #007700">();<br /><br /><br />return&nbsp;</span><span style="color: #0000BB">0</span><span style="color: #007700">;<br />}&nbsp;<br /></span><span style="color: #0000BB"></span>
</span>
</code></div>
</div>]]></content:encoded>
			<category domain="https://www.xgiovio.com/forum/forum/programming/cuda-c-c">CUDA C / C++</category>
			<dc:creator>xgiovio</dc:creator>
			<guid isPermaLink="true">https://www.xgiovio.com/forum/forum/programming/cuda-c-c/443-a-simple-sum</guid>
		</item>
	</channel>
</rss>
