view example/Cuda/main.cc @ 1927:4eefec26e3e2 draft

add file
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Fri, 31 Jan 2014 07:15:07 +0900
parents cd5bbd8ec5d6
children 67e50779feb4
line wrap: on
line source

#include <stdio.h>

#include <cuda.h>

#define LENGTH 1000

void check_data(float* A,float* B,float* C) {
    for (int i=0; i<LENGTH; i++) {
        if (A[i]*B[i]!=C[i]) {
            puts("failure.");
            return;
        }
    }
    puts("success.");
    return;
}

void print_result(float* C) {
    for (int i=0; i<LENGTH; i++) {
        printf("%f\n",C[i]);
    }
}

int main() {
    CUdevice device;
    CUcontext context;
    CUmodule module;
    CUfunction function;
    CUstream stream;

    cuInit(0);
    cuDeviceGet(&device, 0);
    cuCtxCreate(&context, 0, device);
    cuModuleLoad(&module, "multiply.ptx");
    cuModuleGetFunction(&function, module, "multiply");
    
    cuStreamCreate(&stream,0);


    float* A = new float[LENGTH];
    float* B = new float[LENGTH];
    float* C = new float[LENGTH];
    
    for (int i=0; i<LENGTH; i++) {
        A[i] = (float)(i+1000);
        B[i] = (float)(i+1)/10.f;
    }

    CUdeviceptr devA,devB,devC;

    cuMemAlloc(&devA, LENGTH*sizeof(float));
    cuMemAlloc(&devB, LENGTH*sizeof(float));
    cuMemAlloc(&devC, LENGTH*sizeof(float));

    cuMemcpyHtoDAsync(devA, A, LENGTH*sizeof(float), stream);
    cuMemcpyHtoDAsync(devB, B, LENGTH*sizeof(float), stream);
    cuMemcpyHtoDAsync(devC, C, LENGTH*sizeof(float), stream);

    //    void* args[] = {&devA, &devB, &devC};
    void** args=(void**)malloc(sizeof(void*)*3);
    args[0] = &devA;
    args[1] = &devB;
    args[2] = &devC;
    
    cuLaunchKernel(function,
                   LENGTH, 1, 1,
                   1, 1, 1,
                   0, stream, args, NULL);
    
    cuMemcpyDtoHAsync(C, devC, LENGTH*sizeof(float), stream);

    //    print_result(C);
    check_data(A, B, C);

    delete[] A;
    delete[] B;
    delete[] C;
    cuMemFree(devA);
    cuMemFree(devB);
    cuMemFree(devC);
    cuModuleUnload(module);
    cuStreamDestroy(stream);
    cuCtxDestroy(context);

    return 0;
}