Mercurial > hg > Game > Cerium
view example/Cuda/main.cc @ 1963:6988e5478a8c draft
fix CudaScheduler
author | Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 12 Feb 2014 17:56:40 +0900 |
parents | 67e50779feb4 |
children | a68dbdf9b429 |
line wrap: on
line source
#include <stdio.h> #include <cuda.h> #define LENGTH 1000 void check_data(float* A,float* B,float* C) { for (int i=0; i<LENGTH; i++) { if (A[i]*B[i]!=C[i]) { puts("failure."); return; } } puts("success."); return; } void print_result(float* C) { for (int i=0; i<LENGTH; i++) { printf("%f\n",C[i]); } } int main() { CUdevice device; CUcontext context; CUmodule module; CUfunction function; cuInit(0); cuDeviceGet(&device, 0); cuCtxCreate(&context, 0, device); cuModuleLoad(&module, "multiply.ptx"); cuModuleGetFunction(&function, module, "multiply"); CUresult ret; int size = 8; CUstream stream1[size]; for (int i=0;i<size;i++) { ret=cuStreamCreate(&stream1[i],0); } printf("%d\n",ret); float* A = new float[LENGTH]; float* B = new float[LENGTH]; float* C = new float[LENGTH]; for (int i=0; i<LENGTH; i++) { A[i] = (float)(i+1000); B[i] = (float)(i+1)/10.f; } CUdeviceptr devA,devB,devC; cuMemAlloc(&devA, LENGTH*sizeof(float)); cuMemAlloc(&devB, LENGTH*sizeof(float)); cuMemAlloc(&devC, LENGTH*sizeof(float)); cuMemcpyHtoDAsync(devA, A, LENGTH*sizeof(float), stream1[0]); cuMemcpyHtoDAsync(devB, B, LENGTH*sizeof(float), stream1[0]); // void* args[] = {&devA, &devB, &devC}; void** args=NULL; // args=(void**)malloc(sizeof(void*)*8); // args[0] = &devA; // args[1] = &devB; // args[2] = &devC; ret=cuLaunchKernel(function, LENGTH, 1, 1, 1, 1, 1, 0, stream1[0], args, NULL); printf("%d\n",ret); cuMemcpyDtoHAsync(C, devC, LENGTH*sizeof(float), stream1[0]); // print_result(C); check_data(A, B, C); delete[] A; delete[] B; delete[] C; cuMemFree(devA); cuMemFree(devB); cuMemFree(devC); cuModuleUnload(module); cuStreamDestroy(stream1[0]); cuCtxDestroy(context); return 0; }