Mercurial > hg > Game > Cerium
view example/Cuda/main.cc @ 1927:4eefec26e3e2 draft
add file
author | Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 31 Jan 2014 07:15:07 +0900 |
parents | cd5bbd8ec5d6 |
children | 67e50779feb4 |
line wrap: on
line source
#include <stdio.h> #include <cuda.h> #define LENGTH 1000 void check_data(float* A,float* B,float* C) { for (int i=0; i<LENGTH; i++) { if (A[i]*B[i]!=C[i]) { puts("failure."); return; } } puts("success."); return; } void print_result(float* C) { for (int i=0; i<LENGTH; i++) { printf("%f\n",C[i]); } } int main() { CUdevice device; CUcontext context; CUmodule module; CUfunction function; CUstream stream; cuInit(0); cuDeviceGet(&device, 0); cuCtxCreate(&context, 0, device); cuModuleLoad(&module, "multiply.ptx"); cuModuleGetFunction(&function, module, "multiply"); cuStreamCreate(&stream,0); float* A = new float[LENGTH]; float* B = new float[LENGTH]; float* C = new float[LENGTH]; for (int i=0; i<LENGTH; i++) { A[i] = (float)(i+1000); B[i] = (float)(i+1)/10.f; } CUdeviceptr devA,devB,devC; cuMemAlloc(&devA, LENGTH*sizeof(float)); cuMemAlloc(&devB, LENGTH*sizeof(float)); cuMemAlloc(&devC, LENGTH*sizeof(float)); cuMemcpyHtoDAsync(devA, A, LENGTH*sizeof(float), stream); cuMemcpyHtoDAsync(devB, B, LENGTH*sizeof(float), stream); cuMemcpyHtoDAsync(devC, C, LENGTH*sizeof(float), stream); // void* args[] = {&devA, &devB, &devC}; void** args=(void**)malloc(sizeof(void*)*3); args[0] = &devA; args[1] = &devB; args[2] = &devC; cuLaunchKernel(function, LENGTH, 1, 1, 1, 1, 1, 0, stream, args, NULL); cuMemcpyDtoHAsync(C, devC, LENGTH*sizeof(float), stream); // print_result(C); check_data(A, B, C); delete[] A; delete[] B; delete[] C; cuMemFree(devA); cuMemFree(devB); cuMemFree(devC); cuModuleUnload(module); cuStreamDestroy(stream); cuCtxDestroy(context); return 0; }