/** * @file sample64_kernel.cu * * @brief Sample Program for CUDA 4.0 * * sample kernel program which call tinymt64_double(). */ #include "tinymt64_kernel.cuh" /** * kernel function. * This function generates double precision floating point numbers * and sum up the numbers for each thread. * * -# set parameters * -# initialize the structure * -# generate * * @param param_array parameters array of TinyMT * @param sum_array the array of the result * @param size number of output data sum up for each thread. */ __global__ void sample_double_sum_kernel(uint64_t * param_array, double* sum_array, int size) { const int tid = blockDim.x * blockIdx.x + threadIdx.x; const unsigned long long *p = ¶m_array[tid * 3]; tinymt64_status_t tinymt64; double sum = 0.0; // set parameters tinymt64.mat1 = *p; tinymt64.mat2 = *(p + 1); tinymt64.tmat = *(p + 2); // initialize tinymt64_init(&tinymt64, tid); for (int i = 0; i < size; i++) { // generate sum += tinymt64_double(&tinymt64); //sum += tinymt64_double12(&tinymt64); //sum += tinymt64_uint64(&tinymt64); } sum_array[tid] = sum; }