view example/many_task/gpu/sort_test.cc @ 1520:031f26b15ae6 draft

add many_task/gpu
author Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
date Sat, 10 Nov 2012 19:42:22 +0900
parents
children fac06524090b
line wrap: on
line source

#include <stdlib.h>
#include <OpenCL/opencl.h>
#include <stdio.h>
#include <fcntl.h>
#include <string.h>
#include <sys/time.h>
#include <sys/stat.h>
#include "sort.h"
#include "sort_test.h"
#define DEFAULT 432

extern int data_length;
extern DataPtr data;

// 計測用
static double st_time;
static double ed_time;
static int length = DEFAULT;

int
init(int argc, char **argv)
{
    for (int i = 1; argv[i]; ++i) {
        if (strcmp(argv[i], "--length") == 0 || strcmp(argv[i], "-l") == 0) {
            length = atoi(argv[++i]);
        }
    }

    return 0;
}

int
get_split_num(int len, int num)
{
    if (len / num < MAX_BLOCK_SIZE) {
        return num;
    } else {
        // 切り上げ
        return (len + MAX_BLOCK_SIZE - 1) / MAX_BLOCK_SIZE;
    }
}


static double
getTime()
{
    struct timeval tv;
    gettimeofday(&tv, NULL);
    return tv.tv_sec + (double)tv.tv_usec*1e-6;
}

void
show( Data *data, int size )
{
    puts("-----------------------------------------------");
    for(int i=0; i<=size; i++) printf("data[%02d].index = %d\n", i, data[i].index);
    puts("-----------------------------------------------");
    return;
}

Sort sorter;

static void
check_data()
{
    for(int i=0; i< sorter.data_length-1;i++) {
        if (sorter.data[i].index>sorter.data[i+1].index)  {
            printf("Data are not sorted at %d. %d > %d \n",i, sorter.data[i].index,sorter.data[i+1].index);
            return;
        }
    }
    printf("Data are sorted\n");
}

void
gpu_init()
{
    clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
    clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id,
                   &ret_num_devices);

    context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
    command_queue = clCreateCommandQueue(context, device_id, 0, &ret);

    //ファイルオープン

    const char* filename = "QuickSort.cl";
    const char* functionname = "quick_sort";

    int fp = open(filename, O_RDONLY);

    if (!fp) {
        fprintf(stderr, "Failed to load kernel.\n");
        exit(1);
    }

    struct stat stats;
    fstat(fp,&stats);
    off_t size = stats.st_size;

    if (!size) {
        fprintf(stderr, "Failed to load kernel.\n");
        exit(1);
    }

    char *kernel_src_str = new char[size];
    size_t kernel_code_size = read(fp, kernel_src_str, size);
    close(fp);

    program = clCreateProgramWithSource(context, 1, (const char **)&kernel_src_str,
                                                   (const size_t *)&kernel_code_size, &ret);
    clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
    kernel = clCreateKernel(program,functionname, &ret);
}

void
sort_start(Sort s){

    Sort sorter = s;
    int length = sorter.data_length;

    //メモリバッファの作成
    cl_mem mem_count = clCreateBuffer(context, CL_MEM_READ_ONLY,sizeof(int),NULL, &ret);
    cl_mem mem_data = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(Data)*length, NULL, &ret);

    st_time = getTime();

    //メモリバッファに入力データを書き込み
    ret = clEnqueueWriteBuffer(command_queue, mem_count, CL_TRUE, 0,
                               sizeof(int), &length, 0, NULL, NULL);
    ret = clEnqueueWriteBuffer(command_queue, mem_data, CL_TRUE, 0,
                               sizeof(Data)*length, sorter.data, 0, NULL, NULL);

    //print_data(data, count, "before");
    clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&mem_count);
    clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&mem_data);

    ev = NULL;

    ret = clEnqueueTask(command_queue, kernel, 0, NULL, &ev);

    //メモリバッファから結果を取得
    ret = clEnqueueReadBuffer(command_queue, mem_data, CL_TRUE, 0,sizeof(Data)*length, sorter.data, 1, &ev, NULL);
    clFlush(command_queue);
    ed_time = getTime();
    show(sorter.data, length-1);
    check_data();
    printf("Time: %0.6f\n",ed_time-st_time);

    clReleaseKernel(kernel);
    clReleaseProgram(program);
    clReleaseMemObject(mem_data);
    clReleaseEvent(ev);
    clReleaseCommandQueue(command_queue);
    clReleaseContext(context);

}

int main(int argc, char *argv[]) {

    // 無効な引数ならデフォルトの値として432を設定


    if (argc>1) {
        if (init(argc,argv) < 0) {
            return -1;
        }
    }

    gpu_init();

    sorter.data = new Data[length];
    sorter.data_length = length;
    sorter.split_num = get_split_num(sorter.data_length, 1); // (length, cpu_num)

    for (int i = 0; i < length; i++) {
        sorter.data[i].index = rand()%10000;
        sorter.data[i].ptr   = i;
    }

    sort_start(sorter);
    return 0;
}