view example/Cuda/main.cc @ 1963:6988e5478a8c draft

fix CudaScheduler
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Wed, 12 Feb 2014 17:56:40 +0900
parents 67e50779feb4
children a68dbdf9b429
line wrap: on
line source

#include <stdio.h>

#include <cuda.h>

#define LENGTH 1000

void check_data(float* A,float* B,float* C) {
    for (int i=0; i<LENGTH; i++) {
        if (A[i]*B[i]!=C[i]) {
            puts("failure.");
            return;
        }
    }
    puts("success.");
    return;
}

void print_result(float* C) {
    for (int i=0; i<LENGTH; i++) {
        printf("%f\n",C[i]);
    }
}

int main() {
    CUdevice device;
    CUcontext context;
    CUmodule module;
    CUfunction function;

    cuInit(0);
    cuDeviceGet(&device, 0);
    cuCtxCreate(&context, 0, device);
    cuModuleLoad(&module, "multiply.ptx");
    cuModuleGetFunction(&function, module, "multiply");
    
    CUresult ret;
    int size = 8;
    CUstream stream1[size];

    for (int i=0;i<size;i++) {
        ret=cuStreamCreate(&stream1[i],0);
    }
    
    printf("%d\n",ret);


    float* A = new float[LENGTH];
    float* B = new float[LENGTH];
    float* C = new float[LENGTH];
    
    for (int i=0; i<LENGTH; i++) {
        A[i] = (float)(i+1000);
        B[i] = (float)(i+1)/10.f;
    }

    CUdeviceptr devA,devB,devC;

    cuMemAlloc(&devA, LENGTH*sizeof(float));
    cuMemAlloc(&devB, LENGTH*sizeof(float));
    cuMemAlloc(&devC, LENGTH*sizeof(float));

    cuMemcpyHtoDAsync(devA, A, LENGTH*sizeof(float), stream1[0]);
    cuMemcpyHtoDAsync(devB, B, LENGTH*sizeof(float), stream1[0]);
    
    //    void* args[] = {&devA, &devB, &devC};
    void** args=NULL;
    // args=(void**)malloc(sizeof(void*)*8);
    // args[0] = &devA;
    // args[1] = &devB;
    // args[2] = &devC;
    
    ret=cuLaunchKernel(function,
                       LENGTH, 1, 1,
                       1, 1, 1,
                       0, stream1[0], args, NULL);
    printf("%d\n",ret);
    
    cuMemcpyDtoHAsync(C, devC, LENGTH*sizeof(float), stream1[0]);

    //    print_result(C);
    check_data(A, B, C);

    delete[] A;
    delete[] B;
    delete[] C;
    cuMemFree(devA);
    cuMemFree(devB);
    cuMemFree(devC);
    cuModuleUnload(module);
    cuStreamDestroy(stream1[0]);
    cuCtxDestroy(context);

    return 0;
}