Mercurial > hg > Game > Cerium
changeset 1969:a68dbdf9b429 draft
fix GpuScheduler
author | Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 25 Feb 2014 13:43:25 +0900 |
parents | d45b7223515b |
children | e211424ac950 |
files | TaskManager/Gpu/GpuScheduler.cc TaskManager/kernel/ppe/CpuThreads.h example/Cuda/Makefile example/Cuda/main.cc example/Cuda/multiply.cu example/OpenCL/twice.cc example/word_count/main.cc |
diffstat | 7 files changed, 282 insertions(+), 157 deletions(-) [+] |
line wrap: on
line diff
--- a/TaskManager/Gpu/GpuScheduler.cc Thu Feb 13 14:57:04 2014 +0900 +++ b/TaskManager/Gpu/GpuScheduler.cc Tue Feb 25 13:43:25 2014 +0900 @@ -37,7 +37,9 @@ } context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret); for (int i=0;i<STAGE;i++) { - command_queue[i] = clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &ret); + command_queue[i] = clCreateCommandQueue(context, device_id, + CL_QUEUE_PROFILING_ENABLE, + &ret); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); @@ -105,10 +107,10 @@ */ void GpuScheduler::wait_for_event(cl_event* kernel_event, GpuBufferPtr memout, TaskListPtr taskList, int cur) { - if (kernel_event[cur-1] == NOP_REPLY) { + if (kernel_event[cur] == NOP_REPLY) { - } else if (kernel_event[cur-1] != NULL) { - int ret=clWaitForEvents(1,&kernel_event[cur-1]); + } else if (kernel_event[cur] != NULL) { + int ret=clWaitForEvents(1,&kernel_event[cur]); if (ret<0) { error(convert_error_status(ret)); @@ -116,25 +118,24 @@ if (taskList!=NULL){ cl_ulong start = 0; cl_ulong end = 0; - clGetEventProfilingInfo(kernel_event[cur-1],CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL); - clGetEventProfilingInfo(kernel_event[cur-1],CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL); + clGetEventProfilingInfo(kernel_event[cur],CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL); + clGetEventProfilingInfo(kernel_event[cur],CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL); if (taskList->task_start_time == 0) taskList->task_start_time = start; taskList->task_end_time = end; } - clReleaseEvent(kernel_event[cur-1]); - kernel_event[cur-1] = 0; - + clReleaseEvent(kernel_event[cur]); + kernel_event[cur] = 0; } - if (memout[cur-1].size > 0) { - int ret=clWaitForEvents(memout[cur-1].size, memout[cur-1].event); + if (memout[cur].size > 0) { + int ret=clWaitForEvents(memout[cur].size, memout[cur].event); if (ret<0) error(convert_error_status(ret)); - release_buf_event(cur-1,memout); + release_buf_event(cur,memout); } - if (memin[cur-1].size > 0) { - release_buf_event(cur-1,memin); + if (memin[cur].size > 0) { + release_buf_event(cur,memin); } if(reply) { connector->mail_write(reply); @@ -189,8 +190,6 @@ if ((memaddr)params_addr == (memaddr)MY_SPE_COMMAND_EXIT) { // wait_for_envet was called, so all kernel,buf,event have been released. - for (int i=0;i<STAGE;i++) - clFinish(command_queue[i]); for (int i=0;i<STAGE;i++) { destroyGpuBuffer(&memin[i]); destroyGpuBuffer(&memout[i]); @@ -238,8 +237,8 @@ if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } // parameter is passed as first kernel arg - ret = clEnqueueWriteBuffer(command_queue[cur], memparam, CL_TRUE, 0,sizeof(memaddr)*nextTask->param_count, - nextTask->param(0), 0, NULL, &memin[cur].event[0]); + ret = clEnqueueWriteBuffer(command_queue[cur], memparam, CL_FALSE, 0,sizeof(memaddr)*nextTask->param_count, + nextTask->param(0), 0, NULL, NULL); if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } ret = clSetKernelArg(kernel[cur], 0, sizeof(memaddr),(void *)&memin[cur].buf[0]); @@ -256,7 +255,7 @@ if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } ret = clEnqueueWriteBuffer(command_queue[cur], memin[cur].buf[param], CL_TRUE, 0, input_buf->size, input_buf->addr, 0, - NULL, &memin[cur].event[param]); + NULL, NULL); if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } ret = clSetKernelArg(kernel[cur], param, sizeof(memaddr), (void *)&memin[cur].buf[param]); if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } @@ -281,10 +280,10 @@ if (tasklist->dim > 0) { ret = clEnqueueNDRangeKernel(command_queue[cur], kernel[cur], tasklist->dim, - NULL, &tasklist->x, 0, memin[cur].size, memin[cur].event, &kernel_event[cur]); + NULL, &tasklist->x, 0, 0, NULL, NULL); } else { - ret = clEnqueueTask(command_queue[cur], kernel[cur], memin[cur].size, - memin[cur].event, &kernel_event[cur]); + ret = clEnqueueTask(command_queue[cur], kernel[cur], 0, + NULL, NULL); } if (ret<0) { gpuTaskError(cur, tasklist, ret); continue; } @@ -295,29 +294,22 @@ int i0 = flag[cur].flip ? i+1 : i ; // flip use memin buffer and memout event ret = clEnqueueReadBuffer(command_queue[cur], mem[cur].buf[i0], CL_FALSE, 0, - output_buf->size, output_buf->addr, 1, &kernel_event[cur], &memout[cur].event[i]); + output_buf->size, output_buf->addr, 0, NULL, &memout[cur].event[i]); if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } } - // wait kernel[1-cur] and write[1-cur] - // pipeline : cur - // to stop pipeline set 1-cur - if (cur == 0) { - wait_for_event(kernel_event, memout, tasklist, STAGE); - } else { - wait_for_event(kernel_event, memout, tasklist, cur); - } - cur++; - if (STAGE <= cur) - cur = 0; + cur++; // wait write[cur+1] + if (STAGE <= cur) // to stop pipeline move to after wait_for_event + cur = 0; // + wait_for_event(kernel_event, memout, tasklist, cur); } reply = (memaddr)tasklist->waiter; params_addr = (memaddr)tasklist->next; } - if (cur == 0) { - wait_for_event(kernel_event, memout, tasklist, STAGE); - } else { - wait_for_event(kernel_event, memout, tasklist, cur); - } + for (int i=0;i<STAGE;i++) + clFinish(command_queue[i]); + + wait_for_event(kernel_event, memout, tasklist, cur); + unsigned long long wait = 0; (*connector->end_dmawait_profile)(&wait, &(connector->start_time), &(connector->stop_time)); connector->mail_write((memaddr)MY_SPE_STATUS_READY);
--- a/TaskManager/kernel/ppe/CpuThreads.h Thu Feb 13 14:57:04 2014 +0900 +++ b/TaskManager/kernel/ppe/CpuThreads.h Tue Feb 25 13:43:25 2014 +0900 @@ -4,7 +4,9 @@ #include <pthread.h> #include "Threads.h" #include "GpuThreads.h" +#ifdef __CERIUM_CUDA__ #include "CudaThreads.h" +#endif #include "TaskManagerImpl.h" #include "MainScheduler.h" #include "Sem.h"
--- a/example/Cuda/Makefile Thu Feb 13 14:57:04 2014 +0900 +++ b/example/Cuda/Makefile Tue Feb 25 13:43:25 2014 +0900 @@ -15,14 +15,15 @@ LIBS = -F/Library/Frameworks -framework CUDA INCLUDE = -I/Developer/NVIDIA/CUDA-5.5/include -.SUFFIXES: .cc .o +.SUFFIXES: .cc .o .cu .ptx .cc.o: $(CC) $(CFLAGS) $(INCLUDE) -c $< -o $@ +.cu.ptx: + $(NVCC) $(NVCCFLAGS) $< all: $(TARGET) -$(TARGET): $(OBJS) $(TASK_OBJS) $(CUDA_SRCS_TMP) - $(NVCC) $(NVCCFLAGS) $(CUDA_SRCS_TMP) +$(TARGET): $(OBJS) $(TASK_OBJS) $(CUDA_OBJS) $(CC) -o $@ $(OBJS) $(TASK_OBJS) $(LIBS) link:
--- a/example/Cuda/main.cc Thu Feb 13 14:57:04 2014 +0900 +++ b/example/Cuda/main.cc Tue Feb 25 13:43:25 2014 +0900 @@ -1,94 +1,145 @@ #include <stdio.h> - +#include <sys/time.h> +#include <string.h> #include <cuda.h> -#define LENGTH 1000 +#define LENGTH 10000 +#define THREAD 1000 -void check_data(float* A,float* B,float* C) { - for (int i=0; i<LENGTH; i++) { - if (A[i]*B[i]!=C[i]) { - puts("failure."); +static double +getTime() { + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec + (double)tv.tv_usec*1e-6; +} + +void +check_data(float* A, float B, float* C) { + for (int i=0; i<LENGTH*THREAD; i++) { + if (A[i]*B!=C[i]) { + puts("multiply failure."); return; } } puts("success."); - return; } void print_result(float* C) { - for (int i=0; i<LENGTH; i++) { + for (int i=0; i<LENGTH*THREAD; i++) { printf("%f\n",C[i]); } } -int main() { +int main(int args, char* argv[]) { + int num_stream = 1; // number of stream + int num_exec = 16; // number of executed kernel + + for (int i=1;argv[i];i++) { + if (strcmp(argv[i], "--stream") == 0 || strcmp(argv[i], "-s") == 0) { + num_stream = atoi(argv[++i]); + } + } + + // initialize and load kernel CUdevice device; CUcontext context; CUmodule module; CUfunction function; + CUstream stream[num_stream]; cuInit(0); cuDeviceGet(&device, 0); - cuCtxCreate(&context, 0, device); + cuCtxCreate(&context, CU_CTX_SCHED_SPIN, device); cuModuleLoad(&module, "multiply.ptx"); cuModuleGetFunction(&function, module, "multiply"); - - CUresult ret; - int size = 8; - CUstream stream1[size]; + for (int i=0;i<num_stream;i++) + cuStreamCreate(&stream[i],0); - for (int i=0;i<size;i++) { - ret=cuStreamCreate(&stream1[i],0); + // memory allocate + CUdeviceptr devA; + CUdeviceptr devB[num_exec]; + CUdeviceptr devOut[num_exec]; + + cuMemAlloc(&devA, LENGTH*THREAD*sizeof(float)); + for (int i=0;i<num_exec;i++) { + cuMemAlloc(&devB[i], sizeof(float)); + cuMemAlloc(&devOut[i], LENGTH*THREAD*sizeof(float)); } - - printf("%d\n",ret); + + // input buffer + float* A = new float[LENGTH*THREAD]; + float* B = new float[num_exec]; + for (int i=0; i<LENGTH*THREAD; i++) + A[i] = (float)(i+1000); + + // output buffer + float** result = new float* [num_exec]; - float* A = new float[LENGTH]; - float* B = new float[LENGTH]; - float* C = new float[LENGTH]; - - for (int i=0; i<LENGTH; i++) { - A[i] = (float)(i+1000); - B[i] = (float)(i+1)/10.f; + for (int i=0;i<num_exec;i++) + result[i] = new float[LENGTH*THREAD]; + + // Synchronous data transfer(host to device) + cuMemcpyHtoD(devA, A, LENGTH*THREAD*sizeof(float)); + + // Asynchronous data transfer(host to device) + int cur = 0; + + for (int i=0;i<num_exec;i++,cur++) { + if (num_stream <= cur) + cur = 0; + B[i] = (float)(i+1); + cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur]); } - CUdeviceptr devA,devB,devC; + cur = 0; - cuMemAlloc(&devA, LENGTH*sizeof(float)); - cuMemAlloc(&devB, LENGTH*sizeof(float)); - cuMemAlloc(&devC, LENGTH*sizeof(float)); + // Asynchronous launch kernel + for (int i=0;i<num_exec;i++,cur++) { + if (num_stream <= cur) + cur=0; + void* args[] = {&devA, &devB[i], &devOut[i]}; + cuLaunchKernel(function, + LENGTH, 1, 1, + THREAD, 1, 1, + 0, 0, args, NULL); + } - cuMemcpyHtoDAsync(devA, A, LENGTH*sizeof(float), stream1[0]); - cuMemcpyHtoDAsync(devB, B, LENGTH*sizeof(float), stream1[0]); + cur = 0; + - // void* args[] = {&devA, &devB, &devC}; - void** args=NULL; - // args=(void**)malloc(sizeof(void*)*8); - // args[0] = &devA; - // args[1] = &devB; - // args[2] = &devC; + // Asynchronous data transfer(device to host) + for (int i=0;i<num_exec;i++,cur++) { + if (num_stream <= cur) + cur = 0; + cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]); + } + + // wait for stream + for (int i=0;i<num_stream;i++) + cuStreamSynchronize(stream[i]); - ret=cuLaunchKernel(function, - LENGTH, 1, 1, - 1, 1, 1, - 0, stream1[0], args, NULL); - printf("%d\n",ret); - - cuMemcpyDtoHAsync(C, devC, LENGTH*sizeof(float), stream1[0]); + //printf("%0.6f\n",getTime()-start); + + for (int i=0;i<num_exec;i++) + check_data(A,(float)(i+1),result[i]); - // print_result(C); - check_data(A, B, C); + // memory release + cuMemFree(devA); + for (int i=0;i<num_exec;i++) { + cuMemFree(devB[i]); + cuMemFree(devOut[i]); + } + for (int i=0;i<num_stream;i++) + cuStreamDestroy(stream[i]); + cuModuleUnload(module); + cuCtxDestroy(context); delete[] A; delete[] B; - delete[] C; - cuMemFree(devA); - cuMemFree(devB); - cuMemFree(devC); - cuModuleUnload(module); - cuStreamDestroy(stream1[0]); - cuCtxDestroy(context); + for (int i=0;i<num_exec;i++) + delete[] result[i]; + delete[] result; return 0; }
--- a/example/Cuda/multiply.cu Thu Feb 13 14:57:04 2014 +0900 +++ b/example/Cuda/multiply.cu Tue Feb 25 13:43:25 2014 +0900 @@ -1,7 +1,6 @@ extern "C" { - __global__ void multiply(/*float* A, float* B, float* C*/) { + __global__ void multiply(float* A, float* B, float* C) { int index = blockIdx.x * blockDim.x + threadIdx.x; - //C[index] = A[index] * B[index]; - printf("%d\n",index); + C[index] = A[index] * B[0]; } }
--- a/example/OpenCL/twice.cc Thu Feb 13 14:57:04 2014 +0900 +++ b/example/OpenCL/twice.cc Tue Feb 25 13:43:25 2014 +0900 @@ -3,46 +3,73 @@ #include <stdio.h> #include <fcntl.h> #include <sys/stat.h> -#define DEFAULT 432 +#include <sys/time.h> +#include <string.h> + +#define WORKS 10000000 + +static double +getTime() { + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec + (double)tv.tv_usec*1e-6; +} void -print_data(int *data, int size, const char *title) +check_data(float* A, float B,float* C) { + for (int i=0;i<WORKS;i++) { + if (A[i]*B!=C[i]) { + puts("multiply failure."); + return; + } + } + puts("success."); +} + +void +print_data(float *data, int size, const char *title) { printf("%s ---\n", title); for ( int i = 0; i < size; i++) { - printf("%2d ", data[i]); + printf("%2f\n", data[i]); } printf("\n"); } int main(int argc, char *argv[]) { - // 無効な引数ならデフォルトの値として432を設定 - int task_array_num = DEFAULT; - - if (argc>1) { - if (atoi(argv[1])) { - task_array_num = atoi(argv[1]); + int num_command_queue = 1; // number of command_queue + int num_exec = 16; // number of kernel + size_t num_work = WORKS; + + for (int i=1;argv[i];i++) { + if (strcmp(argv[i], "--command_queue") == 0 || strcmp(argv[i], "-c") == 0) { + num_command_queue = atoi(argv[++i]); } } + printf("%d\n",num_command_queue); + // initialize cl_platform_id platform_id = NULL; cl_uint ret_num_platforms = 0; cl_device_id device_id = NULL; cl_uint ret_num_devices = 0L; cl_int ret; + cl_event write_event[num_exec]; + cl_event exec_event[num_exec]; - clGetPlatformIDs(1, &platform_id, &ret_num_platforms); - clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, - &ret_num_devices); - + ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms); + ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, + &ret_num_devices); + cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret); - cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret); - - //ファイルオープン - - const char* filename = "twice.cl"; - const char* functionname = "twice"; + cl_command_queue command_queue[num_command_queue]; + for (int i=0;i<num_command_queue;i++) + command_queue[i] = clCreateCommandQueue(context, device_id, 0, &ret); + + // load kernel + const char* filename = "multiply.cl"; + const char* functionname = "multiply"; int fp = open(filename, O_RDONLY); @@ -59,63 +86,116 @@ fprintf(stderr, "Failed to load kernel.\n"); } - char *kernel_src_str = new char[size]; + char *kernel_src_str = (char*)alloca(size+1); size_t kernel_code_size = read(fp, kernel_src_str, size); close(fp); + kernel_src_str[size] = 0; cl_program program = clCreateProgramWithSource(context, 1, (const char **)&kernel_src_str, - (const size_t *)&kernel_code_size, &ret); - clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); - cl_kernel kernel = clCreateKernel(program,functionname, &ret); + 0/*(const size_t *)&kernel_code_size*/, &ret); + ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); + if(ret<0) { + size_t size; + clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL ,&size); + + char* log = new char[size]; + clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, size, log, NULL); + printf("%s\n",log); + delete[] log; + exit(0); + } + cl_kernel multiply = clCreateKernel(program,functionname, &ret); - int *data = new int[task_array_num]; - int *output_data = new int[task_array_num]; - - int count = 0; - for (int c = 0; c < task_array_num ; count++,c++){ - data[c] = c; + // memory allcate + cl_mem memA = clCreateBuffer(context, CL_MEM_READ_WRITE, WORKS*sizeof(float), NULL, &ret); + cl_mem memB[num_exec]; + cl_mem memOut[num_exec]; + for (int i=0;i<num_exec;i++) { + memB[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float), NULL, &ret); + memOut[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, WORKS*sizeof(float), NULL, &ret); } - //メモリバッファの作成 - cl_mem data_count = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int)*count, NULL, &ret); - cl_mem memobj_in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int)*count, NULL, &ret); - cl_mem memobj_out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(int)*count, NULL, &ret); + // input buffer + float* A = new float[WORKS]; + float* B = new float[num_exec]; + + for (int i=0;i<WORKS;i++) + A[i] = (float)(i+1000); + // output buffer + float** result = new float* [num_exec]; + + for (int i=0;i<num_exec;i++) + result[i] = new float[WORKS]; + + double start = getTime(); - //メモリバッファに入力データを書き込み - ret = clEnqueueWriteBuffer(command_queue, data_count, CL_TRUE, 0, - sizeof(count), &count, 0, NULL, NULL); + // Synchronous data transfer(host to device) + clEnqueueWriteBuffer(command_queue[0], memA, CL_TRUE, 0, + WORKS*sizeof(float), A, 0, NULL, NULL); + + // Asynchronous data transfer(host to device) + int cur = 0; - ret = clEnqueueWriteBuffer(command_queue, memobj_in, CL_TRUE, 0, - sizeof(int)*count, data, 0, NULL, NULL); + for (int i = 0;i<num_exec;i++,cur++){ + if (num_command_queue <= cur) + cur = 0; + B[i] = (float)(i+1); + clEnqueueWriteBuffer(command_queue[cur], memB[i], CL_FALSE, 0, + sizeof(float), &B[i], 0, NULL, NULL); + } - print_data(data, count, "before"); + cur = 0; - clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&data_count); - clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&memobj_in); - clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&memobj_out); + // Asynchronous launch kernel + for (int i = 0;i<num_exec;i++,cur++){ + if (num_command_queue <= cur) + cur = 0; + ret = clSetKernelArg(multiply, 0, sizeof(cl_mem), &memA); + ret = clSetKernelArg(multiply, 1, sizeof(cl_mem), &memB[i]); + ret = clSetKernelArg(multiply, 2, sizeof(cl_mem), &memOut[i]); + ret = clEnqueueNDRangeKernel(command_queue[cur], multiply, 1, NULL, + &num_work, NULL, 0, NULL, NULL); + } + cur = 0; - cl_event ev = NULL; - ret = clEnqueueTask(command_queue, kernel, 0, NULL, &ev); + // Asynchronous data transfer(device to host) + for (int i=0;i<num_exec;i++,cur++) { + if (num_command_queue <= cur) + cur = 0; + clEnqueueReadBuffer(command_queue[cur], memOut[i], CL_FALSE, 0, + WORKS*sizeof(float), result[i], 0, NULL, NULL); + } - //メモリバッファから結果を取得 - ret = clEnqueueReadBuffer(command_queue, memobj_out, CL_TRUE, 0, - sizeof(int)*count, output_data, 1, &ev, NULL); + // wait for command_queue + for (int i=0;i<num_command_queue;i++) + clFlush(command_queue[i]); + + printf("%06f\n",getTime()-start); - print_data(output_data, count, "after"); + // for (int i=0;i<num_exec;i++) { + // // //print_data(result[i],WORKS,"hoge"); + // check_data(A,(float)(i+1),result[i]); + // } - clReleaseKernel(kernel); + // memory release + clReleaseMemObject(memA); + for (int i=0;i<num_exec;i++) { + clReleaseMemObject(memB[i]); + clReleaseMemObject(memOut[i]); + } + for (int i=0;i<num_command_queue;i++) + clReleaseCommandQueue(command_queue[i]); + clReleaseKernel(multiply); clReleaseProgram(program); - clReleaseMemObject(data_count); - clReleaseMemObject(memobj_in); - clReleaseMemObject(memobj_out); - clReleaseCommandQueue(command_queue); clReleaseContext(context); - delete [] kernel_src_str; - delete [] data; - delete [] output_data; + delete[] A; + delete[] B; + for (int i=0;i<num_exec;i++) + delete[] result[i]; + delete[] result; return 0; }
--- a/example/word_count/main.cc Thu Feb 13 14:57:04 2014 +0900 +++ b/example/word_count/main.cc Tue Feb 25 13:43:25 2014 +0900 @@ -63,7 +63,7 @@ st_mmap_t st_mmap; struct stat sb; - if ((fd=open(filename,O_RDONLY,0666))==0) { + if ((fd=open(filename,O_RDWR,0666))==0) { fprintf(stderr,"can't open %s\n",filename); } @@ -78,7 +78,7 @@ printf("fix 4096byte file size %d\n",(int)st_mmap.size); - st_mmap.file_mmap = (char*)mmap(NULL,st_mmap.size,PROT_READ,map,fd,(off_t)0); + st_mmap.file_mmap = (char*)mmap(NULL,st_mmap.size,PROT_READ|PROT_WRITE,map,fd,(off_t)0); if (st_mmap.file_mmap == (caddr_t)-1) { fprintf(stderr,"Can't mmap file\n"); perror(NULL); @@ -294,7 +294,7 @@ t_print = manager->create_task(TASK_PRINT, (memaddr)&w->self,sizeof(memaddr),0,0); w->t_print = t_print; - for(int i=0;i<1;i++) { + for(int i=0;i<4;i++) { /* Task を task_blocks ずつ起動する Task */ /* serialize されていると仮定する... */ HTaskPtr t_exec = manager->create_task(RUN_TASK_BLOCKS,