view example/word_count/cuda/Exec_Data_Parallel.cu @ 1941:f19885ea776d draft

add wordcount for cuda. fix CudaScheduler. add makefile
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Tue, 04 Feb 2014 02:18:07 +0900
parents
children
line wrap: on
line source

extern "C" {
    __global__ void
    wordcount_parallel(long *param,
                       char *rbuf,
                       unsigned long *wbuf)
    {
        long task_spwaned = param[0];
        long division_size = param[1];
        long length = param[2];
        long out_size = param[3];
        int allocation = (int)task_spwaned + (blockIdx.x * blockDim.x + threadIdx.x);
        char *i_data = rbuf + allocation*division_size;
        unsigned long *o_data = wbuf + allocation*out_size;
        unsigned long *head_tail_flag = o_data+2;
        long word_flag = 0;
        long word_num = 0;
        long line_num = 0;
        long i = 0;
        
        head_tail_flag[0] = (i_data[0] != 0x20) && (i_data[0] != 0x0A);
        word_num -= 1-head_tail_flag[0];
        
        for (; i < length; i++) {
            if (i_data[i] == 0x20) {
                word_flag = 1;
            } else if (i_data[i] == 0x0A) {
                line_num += 1;
                word_flag = 1;
            } else {
                word_num += word_flag;
                word_flag = 0;
            }
        }
        
        word_num += word_flag;
        head_tail_flag[1] = (i_data[i-1] != 0x20) && (i_data[i-1] != 0x0A);
        
        o_data[0] = (unsigned long)word_num;
        o_data[1] = (unsigned long)line_num;
    }
}