mtgp32-cuda.cu File Reference
Sample Program for CUDA 2.2.
More...
#include <stdio.h>
#include <cutil.h>
#include <stdint.h>
#include <inttypes.h>
#include <errno.h>
#include <stdlib.h>
#include "mtgp32-fast.h"
Data Structures |
| struct | mtgp32_kernel_status_t |
| | kernel I/O This structure must be initialized before first use. More...
|
Defines |
| #define | __STDC_FORMAT_MACROS 1 |
| #define | __STDC_CONSTANT_MACROS 1 |
| #define | MEXP 23209 |
| #define | N 726 |
| #define | THREAD_NUM 512 |
| #define | LARGE_SIZE (THREAD_NUM * 3) |
| #define | BLOCK_NUM 32 |
| #define | TBL_SIZE 16 |
Functions |
| __device__ uint32_t | para_rec (uint32_t X1, uint32_t X2, uint32_t Y, int bid) |
| | The function of the recursion formula calculation.
|
| __device__ uint32_t | temper (uint32_t V, uint32_t T, int bid) |
| | The tempering function.
|
| __device__ uint32_t | temper_single (uint32_t V, uint32_t T, int bid) |
| | The tempering and converting function.
|
| __device__ void | status_read (uint32_t status[LARGE_SIZE], const mtgp32_kernel_status_t *d_status, int bid, int tid) |
| | Read the internal state vector from kernel I/O data, and put them into shared memory.
|
| __device__ void | status_write (mtgp32_kernel_status_t *d_status, const uint32_t status[LARGE_SIZE], int bid, int tid) |
| | Read the internal state vector from shared memory, and write them into kernel I/O data.
|
| __global__ void | mtgp32_uint32_kernel (mtgp32_kernel_status_t *d_status, uint32_t *d_data, int size) |
| | kernel function.
|
| __global__ void | mtgp32_single_kernel (mtgp32_kernel_status_t *d_status, uint32_t *d_data, int size) |
| | kernel function.
|
| void | make_constant (const mtgp32_params_fast_t params[]) |
| | This function sets constants in device memory.
|
| void | make_kernel_data (mtgp32_kernel_status_t *d_status, mtgp32_params_fast_t params[]) |
| | This function initializes kernel I/O data.
|
| void | print_float_array (const float array[], int size, int block) |
| | This function is used to compare the outputs with C program's.
|
| void | print_uint32_array (uint32_t array[], int size, int block) |
| | This function is used to compare the outputs with C program's.
|
| void | make_uint32_random (mtgp32_kernel_status_t *d_status, int num_data) |
| | host function.
|
| void | make_single_random (mtgp32_kernel_status_t *d_status, int num_data) |
| | host function.
|
| int | main (int argc, char **argv) |
Variables |
| __constant__ uint32_t | param_tbl [BLOCK_NUM][TBL_SIZE] |
| __constant__ uint32_t | temper_tbl [BLOCK_NUM][TBL_SIZE] |
| __constant__ uint32_t | single_temper_tbl [BLOCK_NUM][TBL_SIZE] |
| __constant__ uint32_t | pos_tbl [BLOCK_NUM] |
| __constant__ uint32_t | sh1_tbl [BLOCK_NUM] |
| __constant__ uint32_t | sh2_tbl [BLOCK_NUM] |
| __constant__ uint32_t | mask = 0xff800000 |
| __shared__ uint32_t | status [LARGE_SIZE] |
| | Shared memory The generator's internal status vector.
|
Detailed Description
Sample Program for CUDA 2.2.
MTGP32-23209 This program generates 32-bit unsigned integers. The period of generated integers is 223209-1.
This also generates single precision floating point numbers uniformly distributed in the range [1, 2). (float r; 1.0 <= r < 2.0)
- Author:
- Mutsuo Saito (Hiroshima University)
-
Makoto Matsumoto (Hiroshima University)
Copyright (C) 2009 Mutsuo Saito, Makoto Matsumoto and Hiroshima University. All rights reserved.
The new BSD License is applied to this software, see LICENSE.txt
Define Documentation
| #define __STDC_CONSTANT_MACROS 1 |
| #define __STDC_FORMAT_MACROS 1 |
| #define LARGE_SIZE (THREAD_NUM * 3) |
Function Documentation
| int main |
( |
int |
argc, |
|
|
char ** |
argv | |
|
) |
| | |
This function sets constants in device memory.
- Parameters:
-
| [in] | params | input, MTGP32 parameters. |
References BLOCK_NUM, MTGP32_PARAMS_FAST_T::flt_tmp_tbl, mask, MTGP32_PARAMS_FAST_T::mask, param_tbl, MTGP32_PARAMS_FAST_T::pos, pos_tbl, MTGP32_PARAMS_FAST_T::sh1, sh1_tbl, MTGP32_PARAMS_FAST_T::sh2, sh2_tbl, single_temper_tbl, MTGP32_PARAMS_FAST_T::tbl, TBL_SIZE, temper_tbl, and MTGP32_PARAMS_FAST_T::tmp_tbl.
This function initializes kernel I/O data.
- Parameters:
-
| [out] | d_status | output kernel I/O data. |
| [in] | params | MTGP32 parameters. needed for the initialization. |
References BLOCK_NUM, mtgp32_init_state(), and status.
host function.
This function calls corresponding kernel function.
- Parameters:
-
| [in] | d_status | kernel I/O data. |
| [in] | num_data | number of data to be generated. |
References BLOCK_NUM, and print_float_array().
Referenced by main().
host function.
This function calls corresponding kernel function.
- Parameters:
-
| [in] | d_status | kernel I/O data. |
| [in] | num_data | number of data to be generated. |
References BLOCK_NUM, and print_uint32_array().
Referenced by main().
kernel function.
This function generates 32-bit unsigned integers in d_data
- Parameters:
-
| [in,out] | d_status | kernel I/O data |
| [out] | d_data | output |
| [in] | size | number of output data requested. |
References LARGE_SIZE, mask, N, para_rec(), param_tbl, pos_tbl, sh1_tbl, sh2_tbl, status, status_read(), status_write(), temper(), and THREAD_NUM.
| __device__ uint32_t para_rec |
( |
uint32_t |
X1, |
|
|
uint32_t |
X2, |
|
|
uint32_t |
Y, |
|
|
int |
bid | |
|
) |
| | |
The function of the recursion formula calculation.
- Parameters:
-
| [in] | X1 | the farthest part of state array. |
| [in] | X2 | the second farthest part of state array. |
| [in] | Y | a part of state array. |
| [in] | bid | block id. |
- Returns:
- output
References mask, param_tbl, sh1_tbl, and sh2_tbl.
| void print_float_array |
( |
const float |
array[], |
|
|
int |
size, |
|
|
int |
block | |
|
) |
| | |
This function is used to compare the outputs with C program's.
- Parameters:
-
| [in] | array | data to be printed. |
| [in] | size | size of array. |
| [in] | block | number of blocks. |
Referenced by make_single_random().
| void print_uint32_array |
( |
uint32_t |
array[], |
|
|
int |
size, |
|
|
int |
block | |
|
) |
| | |
This function is used to compare the outputs with C program's.
- Parameters:
-
| [in] | array | data to be printed. |
| [in] | size | size of array. |
| [in] | block | number of blocks. |
Referenced by make_uint32_random().
| __device__ void status_read |
( |
uint32_t |
status[LARGE_SIZE], |
|
|
const mtgp32_kernel_status_t * |
d_status, |
|
|
int |
bid, |
|
|
int |
tid | |
|
) |
| | |
Read the internal state vector from kernel I/O data, and put them into shared memory.
- Parameters:
-
| [out] | status | shared memory. |
| [in] | d_status | kernel I/O data |
| [in] | bid | block id |
| [in] | tid | thread id |
References N, mtgp32_kernel_status_t::status, status, and THREAD_NUM.
| __device__ void status_write |
( |
mtgp32_kernel_status_t * |
d_status, |
|
|
const uint32_t |
status[LARGE_SIZE], |
|
|
int |
bid, |
|
|
int |
tid | |
|
) |
| | |
Read the internal state vector from shared memory, and write them into kernel I/O data.
- Parameters:
-
| [out] | d_status | kernel I/O data |
| [in] | status | shared memory. |
| [in] | bid | block id |
| [in] | tid | thread id |
References N, status, mtgp32_kernel_status_t::status, and THREAD_NUM.
| __device__ uint32_t temper |
( |
uint32_t |
V, |
|
|
uint32_t |
T, |
|
|
int |
bid | |
|
) |
| | |
The tempering function.
- Parameters:
-
| [in] | V | the output value should be tempered. |
| [in] | T | the tempering helper value. |
| [in] | bid | block id. |
- Returns:
- the tempered value.
References temper_tbl.
| __device__ uint32_t temper_single |
( |
uint32_t |
V, |
|
|
uint32_t |
T, |
|
|
int |
bid | |
|
) |
| | |
The tempering and converting function.
By using the preset-ted table, converting to IEEE format and tempering are done simultaneously.
- Parameters:
-
| [in] | V | the output value should be tempered. |
| [in] | T | the tempering helper value. |
| [in] | bid | block id. |
- Returns:
- the tempered and converted value.
References single_temper_tbl.
Referenced by mtgp32_single_kernel().
Variable Documentation
| __constant__ uint32_t mask = 0xff800000 |
| __constant__ uint32_t param_tbl[BLOCK_NUM][TBL_SIZE] |
| __constant__ uint32_t pos_tbl[BLOCK_NUM] |
| __constant__ uint32_t sh1_tbl[BLOCK_NUM] |
| __constant__ uint32_t sh2_tbl[BLOCK_NUM] |
| __shared__ uint32_t status[LARGE_SIZE] |
Shared memory The generator's internal status vector.
| __constant__ uint32_t temper_tbl[BLOCK_NUM][TBL_SIZE] |