view example/Cuda/main.cc @ 1918:15e8c50ed570 draft

add cuda sample, not running
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Fri, 24 Jan 2014 07:16:26 +0900
parents
children d6e033734c12
line wrap: on
line source

#include <stdio.h>

#include <cuda.h>

#define LENGTH 1000

void check_data(float* A,float* B,float* C) {
    for (int i=0; i<LENGTH; i++) {
        if (A[i]*B[i]!=C[i]) {
            puts("failure.");
            return;
        }
    }
    puts("success.");
    return;
}



int main() {
    CUdevice device;
    CUcontext context;
    CUmodule module;
    CUfunction function;
    //    CUresult result;

    cuInit(0);
    cuDeviceGet(&device, 0);
    cuCtxCreate(&context, 0, device);
    cuModuleLoad(&module, "multiply.cu");
    cuModuleGetFunction(&function, module, "multiply");

    float* A = new float[LENGTH];
    float* B = new float[LENGTH];
    float* C = new float[LENGTH];

    for (int i=0; i<LENGTH; i++) {
        A[i] = (float)(i+1000);
        B[i] = (float)(i+1)/10.f;
    }

    CUdeviceptr devA,devB,devC;

    cuMemAlloc(&devA, LENGTH*sizeof(float));
    cuMemAlloc(&devB, LENGTH*sizeof(float));
    cuMemAlloc(&devC, LENGTH*sizeof(float));

    cuMemcpyHtoD(devA, A, LENGTH*sizeof(float));
    cuMemcpyHtoD(devB, B, LENGTH*sizeof(float));
    cuMemcpyHtoD(devC, C, LENGTH*sizeof(float));

    cuParamSetv(function, 0, A, LENGTH*sizeof(float));
    cuParamSetv(function, 0, B, LENGTH*sizeof(float));
    cuParamSetv(function, 0, C, LENGTH*sizeof(float));

    cuLaunchKernel(function,
                   LENGTH, 1, 1,
                   1, 1, 1,
                   0, NULL, NULL, NULL);

    cuMemcpyDtoH(C, devC, LENGTH*sizeof(float));

    check_data(A, B, C);

    delete[] A;
    delete[] B;
    delete[] C;
    cuModuleUnload(module);
    cuMemFree(devA);
    cuMemFree(devB);
    cuMemFree(devC);
    

    return 0;
}