changeset 381:b81492c74d2b

Create examples directory
author Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
date Mon, 24 Jul 2017 16:52:09 +0900
parents 783017f6dfbe
children f1d111e293c4
files src/parallel_execution/CMakeLists.txt src/parallel_execution/context.h src/parallel_execution/examples/CUDAtwice.cbc src/parallel_execution/examples/CUDAtwice.cu src/parallel_execution/examples/InitIntegerDataGears.cbc src/parallel_execution/examples/SortArray.cbc src/parallel_execution/examples/add.cbc src/parallel_execution/examples/bitonicSort.cbc src/parallel_execution/examples/bitonicSort/SortArray.cbc src/parallel_execution/examples/bitonicSort/bitonicSort.cbc src/parallel_execution/examples/bitonicSort/sort.cbc src/parallel_execution/examples/bitonicSort/swap.cbc src/parallel_execution/examples/calc.cbc src/parallel_execution/examples/calc/add.cbc src/parallel_execution/examples/calc/calc.cbc src/parallel_execution/examples/calc/initIntegerDataGears.cbc src/parallel_execution/examples/calc/mult.cbc src/parallel_execution/examples/mult.cbc src/parallel_execution/examples/sort.cbc src/parallel_execution/examples/swap.cbc src/parallel_execution/examples/twice.cbc src/parallel_execution/examples/twice/CUDAtwice.cbc src/parallel_execution/examples/twice/CUDAtwice.cu src/parallel_execution/examples/twice/twice.cbc
diffstat 24 files changed, 855 insertions(+), 855 deletions(-) [+]
line wrap: on
line diff
--- a/src/parallel_execution/CMakeLists.txt	Sun Jul 23 07:28:32 2017 +0900
+++ b/src/parallel_execution/CMakeLists.txt	Mon Jul 24 16:52:09 2017 +0900
@@ -59,21 +59,21 @@
   TARGET
       twice
   SOURCES
-      examples/twice.cbc CPUWorker.cbc twice.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc TimeImpl.cbc
+      examples/twice/twice.cbc CPUWorker.cbc twice.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc TimeImpl.cbc
 )
 
 GearsCommand(
   TARGET
       calc
   SOURCES
-      examples/calc.cbc examples/add.cbc examples/mult.cbc examples/InitIntegerDataGears.cbc CPUWorker.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc
+      examples/calc/calc.cbc examples/calc/add.cbc examples/calc/mult.cbc examples/calc/initIntegerDataGears.cbc CPUWorker.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc
 )
 
 GearsCommand(
   TARGET
       bitonicSort
   SOURCES
-      examples/bitonicSort.cbc examples/swap.cbc CPUWorker.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc OneDimIterator.cbc TimeImpl.cbc
+      examples/bitonicSort/bitonicSort.cbc examples/bitonicSort/swap.cbc CPUWorker.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc OneDimIterator.cbc TimeImpl.cbc
 )
 
 if (${USE_CUDA})
@@ -81,7 +81,7 @@
       TARGET
           CUDAtwice
       SOURCES 
-          main.cbc RedBlackTree.cbc compare.c SingleLinkedStack.cbc CPUWorker.cbc time.cbc twice.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc SemaphoreImpl.cbc  CUDAWorker.cbc examples/CUDAtwice.cbc examples/CUDAtwice.cu cuda.c
+          main.cbc RedBlackTree.cbc compare.c SingleLinkedStack.cbc CPUWorker.cbc time.cbc twice.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc SemaphoreImpl.cbc  CUDAWorker.cbc examples/twice/CUDAtwice.cbc examples/twice/CUDAtwice.cu cuda.c
     )
     set_target_properties(CUDAtwice PROPERTIES COMPILE_FLAGS "-Wall -g -DUSE_CUDAWorker=1 -DUSE_CUDA_MAIN_THREAD")
 endif()
@@ -104,12 +104,12 @@
   TARGET
   oneDimIterator_test
   SOURCES
-      test/oneDimIterator_test.cbc examples/InitIntegerDataGears.cbc CPUWorker.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc OneDimIterator.cbc TimeImpl.cbc
+      test/oneDimIterator_test.cbc CPUWorker.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc OneDimIterator.cbc TimeImpl.cbc
 )
 
 GearsCommand(
   TARGET
       sort
   SOURCES
-      examples/sort.cbc
+      examples/bitonicSort/sort.cbc
 )
--- a/src/parallel_execution/context.h	Sun Jul 23 07:28:32 2017 +0900
+++ b/src/parallel_execution/context.h	Mon Jul 24 16:52:09 2017 +0900
@@ -234,10 +234,10 @@
         struct Element* next;
     } Element;
     struct Array {
-        union Data* Array;
-        union Data* data;
-        enum Code get;
-        enum Code set;
+        int size; 
+        int index; 
+        int prefix; 
+        int* array;
     } Array;
     struct Tree {
         union Data* tree;
--- a/src/parallel_execution/examples/CUDAtwice.cbc	Sun Jul 23 07:28:32 2017 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
-#include <stdio.h>
-#include "../../context.h"
-
-
-extern void CUDAExec(struct Context* context, Array* array, LoopCounter *loopCounter);
-
-__code CUDAtwice(struct Context* context, struct LoopCounter* loopCounter, int index, int prefix, int* array, struct Context* workerContext) {
-    int i = loopCounter->i;
-    if (i < prefix) {
-        array[i+index*prefix] = array[i+index*prefix]*2;
-        loopCounter->i++;
-
-        goto meta(context, C_twice);
-    }
-
-    loopCounter->i = 0;
-    goto meta(workerContext, workerContext->next);
-}
-
-__code CUDAtwice_stub(struct Context* context) {
-printf("CUdAtwice stub\n");
-    struct LoopCounter* loopCounter = &context->data[context->dataNum]->LoopCounter;
-    struct Array* array = &context->data[context->dataNum+1]->Array;
-    CUDAExec(context,array,loopCounter);
-
-    //continuationにそってGPUworkerに戻る
-    goto meta(context, context->next);
-}
--- a/src/parallel_execution/examples/CUDAtwice.cu	Sun Jul 23 07:28:32 2017 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-extern "C" {
-
-#include <stdio.h>
-
-//    __global__ void twice(struct LoopCounter* loopCounter, int prefix ,int* array) {
-//         int index = blockIdx.x * blockDim.x + threadIdx.x;
-//         printf("array %p, blockIdx.x = %d, blockDim.x = %d, threadIdx.x = %d\n");
-//         int i = 0;
-//         while (i < prefix) {
-//              array[i+index*prefix] = array[i+index*prefix]*2;
-//         }
-//    }
-
-    struct LoopCounter {
-        int i;
-    } LoopCounter;
-
-    __global__ void twice(struct LoopCounter* loopCounter, int index, int prefix, int* array) {
-         printf("array %p, index = %d, prefix = %d loopCounter->i %d\n",array,index,prefix,loopCounter->i);
-C_twice:
-        int i = loopCounter->i;
-        if (i < prefix) {
-            array[i+index*prefix] = array[i+index*prefix]*2;
-            loopCounter->i++;
-
-            goto C_twice;
-        }
-
-        loopCounter->i = 0;
-    }
-
-
-}
--- a/src/parallel_execution/examples/InitIntegerDataGears.cbc	Sun Jul 23 07:28:32 2017 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,25 +0,0 @@
-#include "../../context.h"
-#include <stdio.h>
-__code initIntegerDataGears(__code next(struct Integer* output1, struct Integer* output2, struct Integer* output3, ...)) {
-    struct Integer* output1 = *O_output1;
-    struct Integer* output2 = *O_output2;
-    struct Integer* output3 = *O_output3;
-    output1->value = 1;
-    output2->value = 2;
-    output3->value = 3;
-    *O_output1 = output1;
-    *O_output2 = output2;
-    *O_output3 = output3;
-    goto meta(context, next);
-}
-
-__code initIntegerDataGears_stub(struct Context* context) {
-    Integer** O_output1 = (struct Integer **)&context->data[context->odg];
-    Integer** O_output2 = (struct Integer **)&context->data[context->odg+1];
-    Integer** O_output3 = (struct Integer **)&context->data[context->odg+2];
-    goto initIntegerDataGears(context,
-            context->next,
-            O_output1,
-            O_output2,
-            O_output3);
-}
--- a/src/parallel_execution/examples/SortArray.cbc	Sun Jul 23 07:28:32 2017 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-typedef struct SortArray<Impl>{
-    SortArray *sortArray;
-    Integer **array;
-    int loop_counter;
-    int loop_counter2;
-    int loop_counter3;
-    int sort_finish;
-    __code print(struct SortArray* sortArray, __code next(...));
-    __code make_array(struct SortArray* sortArray, __code next(...));
-    __code bitonic_sort(struct SortArray* sortArray, __code next(...));
-    __code kernel(struct SortArray* sortArray, __code next(...));
-    __code kernel2(struct SortArray* sortArray, __code next(...));
-    __code swap(struct SortArray* sortArray, __code next(...));
-} SortArray;
--- a/src/parallel_execution/examples/add.cbc	Sun Jul 23 07:28:32 2017 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-#include "../../context.h"
-#include <stdio.h>
-__code add(struct Integer* input1, struct Integer* input2, __code next(struct Integer* output, ...)) {
-    struct Integer* output = *O_output;
-    output->value = input1->value + input2->value;
-    printf("%d + %d = %d\n", input1->value, input2->value, output->value);
-    *O_output = output;
-    goto meta(context, next);
-}
-
-__code add_stub(struct Context* context) {
-    Integer** O_output = (struct Integer **)&context->data[context->odg];
-    goto add(context,
-            &context->data[context->idg]->Integer,
-            &context->data[context->idg + 1]->Integer,
-            context->next,
-            O_output);
-}
--- a/src/parallel_execution/examples/bitonicSort.cbc	Sun Jul 23 07:28:32 2017 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,211 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <math.h>
-
-#include "../../context.h"
-
-int cpu_num = 1;
-int length = 100;
-int gpu_num = 0;
-int CPU_ANY = -1;
-int CPU_CUDA = -1;
-
-void *start_taskManager(struct Context *context) {
-    goto initDataGears(context, Gearef(context, LoopCounter), Gearef(context, TaskManager));
-    return 0;
-}
-
-#ifdef USE_CUDAWorker
-#ifdef USE_CUDA_MAIN_THREAD
-extern volatile int cuda_initialized;
-#endif
-#endif
-
-__code initDataGears(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
-    // loopCounter->tree = createRedBlackTree(context);
-    loopCounter->i = 0;
-    taskManager->taskManager = (union Data*)createTaskManagerImpl(context, cpu_num, gpu_num, 0);
-#ifdef USE_CUDAWorker
-#ifdef USE_CUDA_MAIN_THREAD
-    while(! cuda_initialized) {};
-#endif
-#endif
-    goto meta(context, C_code1);
-}
-
-__code initDataGears_stub(struct Context* context) {
-    struct TaskManager* taskManager =  Gearef(context, TaskManager);
-    taskManager->taskManager = 0;
-#if (! defined(USE_CUDAWorker) || ! defined(USE_CUDA_MAIN_THREAD))
-    struct LoopCounter* loopCounter = Gearef(context, LoopCounter);
-    goto initDataGears(context, loopCounter, taskManager);
-#else
-    cuda_initialized = 0;
-    pthread_t thread;
-    pthread_create(&thread, NULL, (void*)&start_taskManager, context);
-    while (taskManager->taskManager == 0);
-    TaskManager *t = (TaskManager*)taskManager->taskManager;
-    TaskManagerImpl *im = (TaskManagerImpl*)t->taskManager;
-    struct Queue *q = (Queue *)im->workers[0];
-    createCUDAWorker(context,0,q, im);
-    pthread_join(thread,0);
-    exit(0);
-#endif
-}
-
-__code code1(struct Time* time) {
-    printf("cpus:\t\t%d\n", cpu_num);
-    printf("gpus:\t\t%d\n", gpu_num);
-    printf("length:\t\t%d\n", length);
-    /* puts("queue"); */
-    /* print_queue(context->data[ActiveQueue]->queue.first); */
-    /* puts("tree"); */
-    /* print_tree(context->data[Tree]->tree.root); */
-    /* puts("result"); */
-
-    time->time = (union Data*)createTimeImpl(context);
-    time->next = C_createTask1;
-    goto meta(context, time->time->Time.start);
-}
-
-__code createTask1(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
-    int logN = (int)log2((float)length);
-    struct Context** tasks = (struct Context**)ALLOC_ARRAY(context, Context, logN*(1+logN)/2 + 2);
-    int taskCount = 0;
-
-    struct SortArray* outputSortArray = &ALLOCATE_DATA_GEAR(context, SortArray)->SortArray;
-    struct SortArray* inputSortArray = outputSortArray;
-
-    // par goto makeArray(outputSortArray, _exit);
-    struct Context* task = NEW(struct Context);
-    initContext(task);
-    task->next = C_makeArray;
-    task->idgCount = 0;
-    task->idg = task->dataNum;
-    task->maxIdg = task->idg;
-    task->odg = task->maxIdg;
-    task->data[task->odg] = (union Data*)outputSortArray;
-    task->maxOdg = task->odg + 1;
-    tasks[taskCount] = task;
-
-    taskCount++;
-    for (int i=2; i <= length; i=2*i) {
-        int first = 1;
-        for (int j=i>>1; j > 0; j=j>>1) {
-            outputSortArray = &ALLOCATE_DATA_GEAR(context, SortArray)->SortArray;
-            struct Context* task = NEW(struct Context);
-            initContext(task);
-            struct Integer* integer1 = &ALLOCATE_DATA_GEAR(context, Integer)->Integer;
-            struct Integer* integer2 = &ALLOCATE_DATA_GEAR(context, Integer)->Integer;
-            integer1->value = j;
-            integer2->value = first;
-
-            task->next = C_bitonicSwap;
-            task->iterator = createOneDimIterator(context, length/2);
-            task->idgCount = 1;
-            task->idg = task->dataNum;
-            task->data[task->idg] = (union Data*)inputSortArray;
-            task->data[task->idg+1] = (union Data*)integer1;
-            task->data[task->idg+2] = (union Data*)integer2;
-            task->maxIdg = task->idg + 3;
-            task->odg = task->maxIdg;
-            task->data[task->odg] = (union Data*)outputSortArray;
-            task->maxOdg = task->odg + 1;
-            tasks[taskCount] = task;
-            taskCount++;
-            first = 0;
-            inputSortArray = outputSortArray;
-        }
-    }
-
-    // par goto printArray(inputSortArray, __exit)
-    task = NEW(struct Context);
-    initContext(task);
-    task->next = C_printArray;
-    task->idgCount = 1;
-    task->idg = task->dataNum;
-    task->data[task->idg] = (union Data*)inputSortArray;
-    task->maxIdg = task->idg + 1;
-    task->odg = task->maxIdg;
-    task->maxOdg = task->odg;
-    tasks[taskCount] = task;
-
-    taskManager->contexts = tasks;
-    // goto code2();
-    taskManager->next1 = C_code2;
-    goto meta(context, taskManager->taskManager->TaskManager.spawnTasks);
-    //goto meta(context, taskManager->taskManager->TaskManager.shutdown);
-}
-
-__code code2(struct LoopCounter* loopCounter, struct TaskManager* taskManager, struct Time* time) {
-    sleep(2);
-    taskManager->next = C_exit_code;
-    goto meta(context, taskManager->taskManager->TaskManager.shutdown);
-}
-
-void init(int argc, char** argv) {
-    for (int i = 1; argv[i]; ++i) {
-        if (strcmp(argv[i], "-cpu") == 0)
-            cpu_num = (int)atoi(argv[i+1]);
-        else if (strcmp(argv[i], "-l") == 0)
-            length = (int)atoi(argv[i+1]);
-        else if (strcmp(argv[i], "-cuda") == 0) {
-            gpu_num = 1;
-            CPU_CUDA = 0;
-        }
-    }
-}
-
-int main(int argc, char** argv) {
-    init(argc, argv);
-    struct Context* main_context = NEW(struct Context);
-    initContext(main_context);
-    main_context->next = C_initDataGears;
-
-    goto start_code(main_context);
-}
-
-__code makeArray(__code next(struct SortArray* output, ...)){
-    struct SortArray* output = *O_output;
-    if (output->loopCounter == 0){
-        output->array = (Integer**)ALLOC_ARRAY(context, Integer, length);
-        srand((unsigned) time(NULL));
-    }
-    if (output->loopCounter == GET_SIZE(output->array)){
-        printf("created Array\n");
-        output->loopCounter = 0;
-        goto meta(context, next);
-    }
-    struct Integer* integer = new Integer();
-    integer->value = rand() % 1000;
-    output->array[output->loopCounter] = integer;
-    printf("%d\n", output->array[output->loopCounter]->value);
-    output->loopCounter++;
-    *O_output = output;
-    goto meta(context, C_makeArray);
-}
-
-__code makeArray_stub(struct Context* context) {
-    SortArray** O_output = (struct SortArray **)&context->data[context->odg];
-    goto makeArray(context,
-                   context->next,
-                   O_output);
-}
-
-__code printArray(struct SortArray* inputArray, __code next(...)){
-    if (inputArray->loopCounter == GET_SIZE(inputArray->array)){
-        inputArray->loopCounter = 0;
-        goto meta(context, next);
-    }
-    printf("%d\n", inputArray->array[inputArray->loopCounter]->value);
-    inputArray->loopCounter++;
-    goto meta(context, C_printArray);
-}
-
-__code printArray_stub(struct Context* context) {
-    goto printArray(context,
-                   &context->data[context->idg]->SortArray,
-                   context->next);
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/parallel_execution/examples/bitonicSort/SortArray.cbc	Mon Jul 24 16:52:09 2017 +0900
@@ -0,0 +1,14 @@
+typedef struct SortArray<Impl>{
+    SortArray *sortArray;
+    Integer **array;
+    int loop_counter;
+    int loop_counter2;
+    int loop_counter3;
+    int sort_finish;
+    __code print(struct SortArray* sortArray, __code next(...));
+    __code make_array(struct SortArray* sortArray, __code next(...));
+    __code bitonic_sort(struct SortArray* sortArray, __code next(...));
+    __code kernel(struct SortArray* sortArray, __code next(...));
+    __code kernel2(struct SortArray* sortArray, __code next(...));
+    __code swap(struct SortArray* sortArray, __code next(...));
+} SortArray;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/parallel_execution/examples/bitonicSort/bitonicSort.cbc	Mon Jul 24 16:52:09 2017 +0900
@@ -0,0 +1,211 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <math.h>
+
+#include "../../../context.h"
+
+int cpu_num = 1;
+int length = 100;
+int gpu_num = 0;
+int CPU_ANY = -1;
+int CPU_CUDA = -1;
+
+void *start_taskManager(struct Context *context) {
+    goto initDataGears(context, Gearef(context, LoopCounter), Gearef(context, TaskManager));
+    return 0;
+}
+
+#ifdef USE_CUDAWorker
+#ifdef USE_CUDA_MAIN_THREAD
+extern volatile int cuda_initialized;
+#endif
+#endif
+
+__code initDataGears(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
+    // loopCounter->tree = createRedBlackTree(context);
+    loopCounter->i = 0;
+    taskManager->taskManager = (union Data*)createTaskManagerImpl(context, cpu_num, gpu_num, 0);
+#ifdef USE_CUDAWorker
+#ifdef USE_CUDA_MAIN_THREAD
+    while(! cuda_initialized) {};
+#endif
+#endif
+    goto meta(context, C_code1);
+}
+
+__code initDataGears_stub(struct Context* context) {
+    struct TaskManager* taskManager =  Gearef(context, TaskManager);
+    taskManager->taskManager = 0;
+#if (! defined(USE_CUDAWorker) || ! defined(USE_CUDA_MAIN_THREAD))
+    struct LoopCounter* loopCounter = Gearef(context, LoopCounter);
+    goto initDataGears(context, loopCounter, taskManager);
+#else
+    cuda_initialized = 0;
+    pthread_t thread;
+    pthread_create(&thread, NULL, (void*)&start_taskManager, context);
+    while (taskManager->taskManager == 0);
+    TaskManager *t = (TaskManager*)taskManager->taskManager;
+    TaskManagerImpl *im = (TaskManagerImpl*)t->taskManager;
+    struct Queue *q = (Queue *)im->workers[0];
+    createCUDAWorker(context,0,q, im);
+    pthread_join(thread,0);
+    exit(0);
+#endif
+}
+
+__code code1(struct Time* time) {
+    printf("cpus:\t\t%d\n", cpu_num);
+    printf("gpus:\t\t%d\n", gpu_num);
+    printf("length:\t\t%d\n", length);
+    /* puts("queue"); */
+    /* print_queue(context->data[ActiveQueue]->queue.first); */
+    /* puts("tree"); */
+    /* print_tree(context->data[Tree]->tree.root); */
+    /* puts("result"); */
+
+    time->time = (union Data*)createTimeImpl(context);
+    time->next = C_createTask1;
+    goto meta(context, time->time->Time.start);
+}
+
+__code createTask1(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
+    int logN = (int)log2((float)length);
+    struct Context** tasks = (struct Context**)ALLOC_ARRAY(context, Context, logN*(1+logN)/2 + 2);
+    int taskCount = 0;
+
+    struct SortArray* outputSortArray = &ALLOCATE_DATA_GEAR(context, SortArray)->SortArray;
+    struct SortArray* inputSortArray = outputSortArray;
+
+    // par goto makeArray(outputSortArray, _exit);
+    struct Context* task = NEW(struct Context);
+    initContext(task);
+    task->next = C_makeArray;
+    task->idgCount = 0;
+    task->idg = task->dataNum;
+    task->maxIdg = task->idg;
+    task->odg = task->maxIdg;
+    task->data[task->odg] = (union Data*)outputSortArray;
+    task->maxOdg = task->odg + 1;
+    tasks[taskCount] = task;
+
+    taskCount++;
+    for (int i=2; i <= length; i=2*i) {
+        int first = 1;
+        for (int j=i>>1; j > 0; j=j>>1) {
+            outputSortArray = &ALLOCATE_DATA_GEAR(context, SortArray)->SortArray;
+            struct Context* task = NEW(struct Context);
+            initContext(task);
+            struct Integer* integer1 = &ALLOCATE_DATA_GEAR(context, Integer)->Integer;
+            struct Integer* integer2 = &ALLOCATE_DATA_GEAR(context, Integer)->Integer;
+            integer1->value = j;
+            integer2->value = first;
+
+            task->next = C_bitonicSwap;
+            task->iterator = createOneDimIterator(context, length/2);
+            task->idgCount = 1;
+            task->idg = task->dataNum;
+            task->data[task->idg] = (union Data*)inputSortArray;
+            task->data[task->idg+1] = (union Data*)integer1;
+            task->data[task->idg+2] = (union Data*)integer2;
+            task->maxIdg = task->idg + 3;
+            task->odg = task->maxIdg;
+            task->data[task->odg] = (union Data*)outputSortArray;
+            task->maxOdg = task->odg + 1;
+            tasks[taskCount] = task;
+            taskCount++;
+            first = 0;
+            inputSortArray = outputSortArray;
+        }
+    }
+
+    // par goto printArray(inputSortArray, __exit)
+    task = NEW(struct Context);
+    initContext(task);
+    task->next = C_printArray;
+    task->idgCount = 1;
+    task->idg = task->dataNum;
+    task->data[task->idg] = (union Data*)inputSortArray;
+    task->maxIdg = task->idg + 1;
+    task->odg = task->maxIdg;
+    task->maxOdg = task->odg;
+    tasks[taskCount] = task;
+
+    taskManager->contexts = tasks;
+    // goto code2();
+    taskManager->next1 = C_code2;
+    goto meta(context, taskManager->taskManager->TaskManager.spawnTasks);
+    //goto meta(context, taskManager->taskManager->TaskManager.shutdown);
+}
+
+__code code2(struct LoopCounter* loopCounter, struct TaskManager* taskManager, struct Time* time) {
+    sleep(2);
+    taskManager->next = C_exit_code;
+    goto meta(context, taskManager->taskManager->TaskManager.shutdown);
+}
+
+void init(int argc, char** argv) {
+    for (int i = 1; argv[i]; ++i) {
+        if (strcmp(argv[i], "-cpu") == 0)
+            cpu_num = (int)atoi(argv[i+1]);
+        else if (strcmp(argv[i], "-l") == 0)
+            length = (int)atoi(argv[i+1]);
+        else if (strcmp(argv[i], "-cuda") == 0) {
+            gpu_num = 1;
+            CPU_CUDA = 0;
+        }
+    }
+}
+
+int main(int argc, char** argv) {
+    init(argc, argv);
+    struct Context* main_context = NEW(struct Context);
+    initContext(main_context);
+    main_context->next = C_initDataGears;
+
+    goto start_code(main_context);
+}
+
+__code makeArray(__code next(struct SortArray* output, ...)){
+    struct SortArray* output = *O_output;
+    if (output->loopCounter == 0){
+        output->array = (Integer**)ALLOC_ARRAY(context, Integer, length);
+        srand((unsigned) time(NULL));
+    }
+    if (output->loopCounter == GET_SIZE(output->array)){
+        printf("created Array\n");
+        output->loopCounter = 0;
+        goto meta(context, next);
+    }
+    struct Integer* integer = new Integer();
+    integer->value = rand() % 1000;
+    output->array[output->loopCounter] = integer;
+    printf("%d\n", output->array[output->loopCounter]->value);
+    output->loopCounter++;
+    *O_output = output;
+    goto meta(context, C_makeArray);
+}
+
+__code makeArray_stub(struct Context* context) {
+    SortArray** O_output = (struct SortArray **)&context->data[context->odg];
+    goto makeArray(context,
+                   context->next,
+                   O_output);
+}
+
+__code printArray(struct SortArray* inputArray, __code next(...)){
+    if (inputArray->loopCounter == GET_SIZE(inputArray->array)){
+        inputArray->loopCounter = 0;
+        goto meta(context, next);
+    }
+    printf("%d\n", inputArray->array[inputArray->loopCounter]->value);
+    inputArray->loopCounter++;
+    goto meta(context, C_printArray);
+}
+
+__code printArray_stub(struct Context* context) {
+    goto printArray(context,
+                   &context->data[context->idg]->SortArray,
+                   context->next);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/parallel_execution/examples/bitonicSort/sort.cbc	Mon Jul 24 16:52:09 2017 +0900
@@ -0,0 +1,95 @@
+#include<stdio.h>
+#include <stdlib.h>
+#include "../../../context.h"
+#define LOGN 5
+#define MAX 1 << LOGN
+
+int main(int argc, char const* argv[]) {
+    struct Context* main_context = NEW(struct Context);
+    initContext(main_context);
+    main_context->next = C_sort_start;
+    goto start_code(main_context);
+}
+
+__code sort_start(struct SortArray* sortArray){
+    sortArray->sortArray = new SortArray();
+    sortArray->sortArray->array = (Integer**)ALLOC_ARRAY(context, Integer, MAX);//ALLOC_ARRAYはDSの配列なのでintではできない
+    sortArray->sortArray->loop_counter = 0;
+    sortArray->sortArray->loop_counter2 = 0;
+    sortArray->sortArray->loop_counter3 = 0;
+    srand((unsigned) time(NULL));
+    goto meta(context, C_make_array);
+}
+
+__code make_array(struct SortArray* sortArray){//乱数生成
+    if (sortArray->sortArray->loop_counter == MAX){//ループの終了→配列表示へ
+        sortArray->sortArray->loop_counter = 0;
+        goto meta(context, C_print);
+    }
+    struct Integer* integer = new Integer();
+    integer->value = rand() % 1000;
+    sortArray->sortArray->array[sortArray->sortArray->loop_counter] = integer;
+    sortArray->sortArray->loop_counter++;
+    goto meta(context, C_make_array);
+}
+
+__code print(struct SortArray* sortArray){//配列表示
+    if (sortArray->sortArray->loop_counter == MAX){//ループの終了→ソートへ
+        printf("\n");
+        if(sortArray->sortArray->sort_finish == 1){//ソート終わってたら終了
+            goto meta(context, C_exit_code);
+        }
+        sortArray->sortArray->loop_counter = 0;
+        sortArray->sortArray->loop_counter2 = 0;
+        goto meta(context, C_bitonic_sort);
+    }
+
+    printf("%d, ", sortArray->sortArray->array[sortArray->sortArray->loop_counter]->value);
+    sortArray->sortArray->loop_counter++;
+    goto meta(context, C_print);
+}
+
+__code bitonic_sort(struct SortArray* sortArray){//ソートの繰り返し
+    if (sortArray->sortArray->loop_counter >= LOGN){//ループの終了→配列表示へ
+        sortArray->sortArray->loop_counter = 0;
+        sortArray->sortArray->sort_finish = 1;
+        goto meta(context, C_print);
+    }
+    goto meta(context, C_kernel);
+}
+
+__code kernel(struct SortArray* sortArray){//繰り返し2
+    if (sortArray->sortArray->loop_counter2 > sortArray->sortArray->loop_counter){//ループの終了→上のループへ
+        sortArray->sortArray->loop_counter++;
+        sortArray->sortArray->loop_counter2 = 0;
+        goto meta(context, C_bitonic_sort);
+    }
+
+    goto meta(context, C_kernel2);
+}
+
+__code kernel2(struct SortArray* sortArray){//ソートの中身
+    int i = sortArray->sortArray->loop_counter3;
+
+    if (i >= GET_SIZE(sortArray->sortArray->array)){//ループの終了→上のループへ
+        sortArray->sortArray->loop_counter2++;
+        sortArray->sortArray->loop_counter3 = 0;
+        goto meta(context, C_kernel);
+    }
+
+     goto meta(context, C_swap);
+}
+
+__code swap(struct SortArray* sortArray){//配列の要素を入れ替える
+    int i = sortArray->sortArray->loop_counter3;
+    int d = 1 << (sortArray->sortArray->loop_counter - sortArray->sortArray->loop_counter2);
+    int up = ((i >> sortArray->sortArray->loop_counter) & 2) == 0;
+
+    if ((i & d) == 0 && (sortArray->sortArray->array[i]->value > sortArray->sortArray->array[i | d]->value) == up) {
+        struct Integer *tmp = sortArray->sortArray->array[i];
+        sortArray->sortArray->array[i] = sortArray->sortArray->array[i | d];
+        sortArray->sortArray->array[i | d] = tmp;
+    }
+    sortArray->sortArray->loop_counter3++;
+    goto meta(context, C_kernel2);//上位のループへ
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/parallel_execution/examples/bitonicSort/swap.cbc	Mon Jul 24 16:52:09 2017 +0900
@@ -0,0 +1,29 @@
+#include "../../../context.h"
+#include <stdio.h>
+
+__code bitonicSwap(struct SortArray* inputArray, struct Integer* block, struct Integer* first, struct Integer* i, __code next(struct SortArray* output, ...)) {
+    struct SortArray* output = *O_output;
+    int position = i->value/block->value;
+    int index1 = i->value+block->value*position;
+    int index2 = (first->value == 1)? ((block->value<<1)*(position+1))-(index1%block->value)-1 : index1+block->value;
+    struct Integer** array = inputArray->array;
+    if (array[index2]->value < array[index1]->value) {
+        struct Integer *tmp = array[index1];
+        array[index1] = array[index2];
+        array[index2] = tmp;
+    }
+    output->array = array;
+    *O_output = output;
+    goto meta(context, next);
+}
+
+__code bitonicSwap_stub(struct Context* context) {
+    SortArray** O_output = (struct SortArray **)&context->data[context->odg];
+    goto bitonicSwap(context,
+                     &context->data[context->idg]->SortArray,
+                     &context->data[context->idg+1]->Integer,
+                     &context->data[context->idg+2]->Integer,
+                     &context->data[context->idg+3]->Integer,
+                     context->next,
+                     O_output);
+}
--- a/src/parallel_execution/examples/calc.cbc	Sun Jul 23 07:28:32 2017 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,183 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <assert.h>
-
-#include "../../context.h"
-
-int cpu_num = 1;
-int length = 100;
-int gpu_num = 0;
-int CPU_ANY = -1;
-int CPU_CUDA = -1;
-
-void print_queue(struct Element* element) {
-    while (element) {
-        printf("%p\n", ((struct Task *)(element->data)));
-        element = element->next;
-    }
-}
-
-void print_tree(struct Node* node) {
-    if (node != 0) {
-        printf("%d\n", node->value->Array.index);
-        print_tree(node->left);
-        print_tree(node->right);
-    }
-}
-
-void *start_taskManager(struct Context *context) {
-    goto initDataGears(context, Gearef(context, LoopCounter), Gearef(context, TaskManager));
-    return 0;
-}
-
-#ifdef USE_CUDAWorker
-#ifdef USE_CUDA_MAIN_THREAD
-extern volatile int cuda_initialized;
-#endif
-#endif
-
-__code initDataGears(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
-    // loopCounter->tree = createRedBlackTree(context);
-    loopCounter->i = 0;
-    taskManager->taskManager = (union Data*)createTaskManagerImpl(context, cpu_num, gpu_num, 0);
-#ifdef USE_CUDAWorker
-#ifdef USE_CUDA_MAIN_THREAD
-    while(! cuda_initialized) {};
-#endif
-#endif
-    goto meta(context, C_createTask1);
-}
-
-__code initDataGears_stub(struct Context* context) {
-    struct TaskManager* taskManager =  Gearef(context, TaskManager);
-    taskManager->taskManager = 0;
-#if (! defined(USE_CUDAWorker) || ! defined(USE_CUDA_MAIN_THREAD))
-    struct LoopCounter* loopCounter = Gearef(context, LoopCounter);
-    goto initDataGears(context, loopCounter, taskManager);
-#else
-    cuda_initialized = 0;
-    pthread_t thread;
-    pthread_create(&thread, NULL, (void*)&start_taskManager, context);
-    while (taskManager->taskManager == 0);
-    TaskManager *t = (TaskManager*)taskManager->taskManager;
-    TaskManagerImpl *im = (TaskManagerImpl*)t->taskManager;
-    struct Queue *q = (Queue *)im->workers[0];
-    createCUDAWorker(context,0,q, im);
-    pthread_join(thread,0);
-    exit(0);
-#endif
-}
-
-__code code1(struct Time* time) {
-    printf("cpus:\t\t%d\n", cpu_num);
-    printf("gpus:\t\t%d\n", gpu_num);
-    printf("length:\t\t%d\n", length);
-    /* puts("queue"); */
-    /* print_queue(context->data[ActiveQueue]->queue.first); */
-    /* puts("tree"); */
-    /* print_tree(context->data[Tree]->tree.root); */
-    /* puts("result"); */
-
-    //time->next = C_code2;
-    goto meta(context, C_exit_code);
-    //goto meta(context, C_start_time);
-}
-
-__code code1_stub(struct Context* context) {
-    goto code1(context, Gearef(context, Time));
-}
-
-
-__code createTask1(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
-    int i = loopCounter->i;
-
-    if (i < length) {
-        loopCounter->i++;
-        goto meta(context, C_createTask2);
-    }
-
-    loopCounter->i = 0;
-    taskManager->next = C_code1;
-    sleep(5);
-    goto meta(context, taskManager->taskManager->TaskManager.shutdown);
-}
-
-__code createTask2(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
-    struct Context** tasks = (struct Context**)ALLOC_ARRAY(context, Context, 3);
-
-    Integer* integer1 = &ALLOCATE_DATA_GEAR(context, Integer)->Integer;
-    Integer* integer2 = &ALLOCATE_DATA_GEAR(context, Integer)->Integer;
-    Integer* integer3 = &ALLOCATE_DATA_GEAR(context, Integer)->Integer;
-    // par goto mult(integer1, integer2, integer3, __exit);
-    struct Context* task = NEW(struct Context);
-    initContext(task);
-    task->next = C_mult;
-    task->idgCount = 2;
-    task->idg = task->dataNum;
-    task->data[task->idg] = (union Data*)integer1;
-    task->data[task->idg+1] = (union Data*)integer2;
-    task->maxIdg = task->idg + 2;
-    task->odg = task->maxIdg;
-    task->data[task->odg] = (union Data*)integer3;
-    task->maxOdg = task->odg + 1;
-    tasks[0] = task;
-
-    Integer* integer4 = &ALLOCATE_DATA_GEAR(context, Integer)->Integer;
-    Integer* integer5 = &ALLOCATE_DATA_GEAR(context, Integer)->Integer;
-    // par goto add(integer4, integer5, integer1, __exit);
-    task = NEW(struct Context);
-    initContext(task);
-    task->next = C_add;
-    task->idgCount = 2;
-    task->idg = task->dataNum;
-    task->data[task->idg] = (union Data*)integer4;
-    task->data[task->idg+1] = (union Data*)integer5;
-    task->maxIdg = task->idg + 2;
-    task->odg = task->maxIdg;
-    task->data[task->odg] = (union Data*)integer1;
-    task->maxOdg = task->odg + 1;
-    tasks[1] = task;
-
-    // par goto initIntegerDataGears(integer2, integer4, integer5, __exit);
-    task = NEW(struct Context);
-    initContext(task);
-    task->next = C_initIntegerDataGears;
-    task->idgCount = 0;
-    task->idg = task->dataNum;
-    task->maxIdg = task->idg;
-    task->odg = task->maxIdg;
-    task->data[task->odg] = (union Data*)integer2;
-    task->data[task->odg+1] = (union Data*)integer4;
-    task->data[task->odg+2] = (union Data*)integer5;
-    task->maxOdg = task->odg + 3;
-    tasks[2] = task;
-
-    taskManager->contexts = tasks;
-    // goto crateTask1();
-    taskManager->next1 = C_createTask1;
-    goto meta(context, taskManager->taskManager->TaskManager.spawnTasks);
-}
-
-void init(int argc, char** argv) {
-    for (int i = 1; argv[i]; ++i) {
-        if (strcmp(argv[i], "-cpu") == 0)
-            cpu_num = (int)atoi(argv[i+1]);
-        else if (strcmp(argv[i], "-l") == 0)
-            length = (int)atoi(argv[i+1]);
-        else if (strcmp(argv[i], "-cuda") == 0) {
-            gpu_num = 1;
-            CPU_CUDA = 0;
-        }
-    }
-}
-
-int main(int argc, char** argv) {
-    init(argc, argv);
-    struct Context* main_context = NEW(struct Context);
-    initContext(main_context);
-    main_context->next = C_initDataGears;
-
-    goto start_code(main_context);
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/parallel_execution/examples/calc/add.cbc	Mon Jul 24 16:52:09 2017 +0900
@@ -0,0 +1,18 @@
+#include "../../../context.h"
+#include <stdio.h>
+__code add(struct Integer* input1, struct Integer* input2, __code next(struct Integer* output, ...)) {
+    struct Integer* output = *O_output;
+    output->value = input1->value + input2->value;
+    printf("%d + %d = %d\n", input1->value, input2->value, output->value);
+    *O_output = output;
+    goto meta(context, next);
+}
+
+__code add_stub(struct Context* context) {
+    Integer** O_output = (struct Integer **)&context->data[context->odg];
+    goto add(context,
+            &context->data[context->idg]->Integer,
+            &context->data[context->idg + 1]->Integer,
+            context->next,
+            O_output);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/parallel_execution/examples/calc/calc.cbc	Mon Jul 24 16:52:09 2017 +0900
@@ -0,0 +1,183 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <assert.h>
+
+#include "../../../context.h"
+
+int cpu_num = 1;
+int length = 100;
+int gpu_num = 0;
+int CPU_ANY = -1;
+int CPU_CUDA = -1;
+
+void print_queue(struct Element* element) {
+    while (element) {
+        printf("%p\n", ((struct Task *)(element->data)));
+        element = element->next;
+    }
+}
+
+void print_tree(struct Node* node) {
+    if (node != 0) {
+        printf("%d\n", node->value->Array.index);
+        print_tree(node->left);
+        print_tree(node->right);
+    }
+}
+
+void *start_taskManager(struct Context *context) {
+    goto initDataGears(context, Gearef(context, LoopCounter), Gearef(context, TaskManager));
+    return 0;
+}
+
+#ifdef USE_CUDAWorker
+#ifdef USE_CUDA_MAIN_THREAD
+extern volatile int cuda_initialized;
+#endif
+#endif
+
+__code initDataGears(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
+    // loopCounter->tree = createRedBlackTree(context);
+    loopCounter->i = 0;
+    taskManager->taskManager = (union Data*)createTaskManagerImpl(context, cpu_num, gpu_num, 0);
+#ifdef USE_CUDAWorker
+#ifdef USE_CUDA_MAIN_THREAD
+    while(! cuda_initialized) {};
+#endif
+#endif
+    goto meta(context, C_createTask1);
+}
+
+__code initDataGears_stub(struct Context* context) {
+    struct TaskManager* taskManager =  Gearef(context, TaskManager);
+    taskManager->taskManager = 0;
+#if (! defined(USE_CUDAWorker) || ! defined(USE_CUDA_MAIN_THREAD))
+    struct LoopCounter* loopCounter = Gearef(context, LoopCounter);
+    goto initDataGears(context, loopCounter, taskManager);
+#else
+    cuda_initialized = 0;
+    pthread_t thread;
+    pthread_create(&thread, NULL, (void*)&start_taskManager, context);
+    while (taskManager->taskManager == 0);
+    TaskManager *t = (TaskManager*)taskManager->taskManager;
+    TaskManagerImpl *im = (TaskManagerImpl*)t->taskManager;
+    struct Queue *q = (Queue *)im->workers[0];
+    createCUDAWorker(context,0,q, im);
+    pthread_join(thread,0);
+    exit(0);
+#endif
+}
+
+__code code1(struct Time* time) {
+    printf("cpus:\t\t%d\n", cpu_num);
+    printf("gpus:\t\t%d\n", gpu_num);
+    printf("length:\t\t%d\n", length);
+    /* puts("queue"); */
+    /* print_queue(context->data[ActiveQueue]->queue.first); */
+    /* puts("tree"); */
+    /* print_tree(context->data[Tree]->tree.root); */
+    /* puts("result"); */
+
+    //time->next = C_code2;
+    goto meta(context, C_exit_code);
+    //goto meta(context, C_start_time);
+}
+
+__code code1_stub(struct Context* context) {
+    goto code1(context, Gearef(context, Time));
+}
+
+
+__code createTask1(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
+    int i = loopCounter->i;
+
+    if (i < length) {
+        loopCounter->i++;
+        goto meta(context, C_createTask2);
+    }
+
+    loopCounter->i = 0;
+    taskManager->next = C_code1;
+    sleep(5);
+    goto meta(context, taskManager->taskManager->TaskManager.shutdown);
+}
+
+__code createTask2(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
+    struct Context** tasks = (struct Context**)ALLOC_ARRAY(context, Context, 3);
+
+    Integer* integer1 = &ALLOCATE_DATA_GEAR(context, Integer)->Integer;
+    Integer* integer2 = &ALLOCATE_DATA_GEAR(context, Integer)->Integer;
+    Integer* integer3 = &ALLOCATE_DATA_GEAR(context, Integer)->Integer;
+    // par goto mult(integer1, integer2, integer3, __exit);
+    struct Context* task = NEW(struct Context);
+    initContext(task);
+    task->next = C_mult;
+    task->idgCount = 2;
+    task->idg = task->dataNum;
+    task->data[task->idg] = (union Data*)integer1;
+    task->data[task->idg+1] = (union Data*)integer2;
+    task->maxIdg = task->idg + 2;
+    task->odg = task->maxIdg;
+    task->data[task->odg] = (union Data*)integer3;
+    task->maxOdg = task->odg + 1;
+    tasks[0] = task;
+
+    Integer* integer4 = &ALLOCATE_DATA_GEAR(context, Integer)->Integer;
+    Integer* integer5 = &ALLOCATE_DATA_GEAR(context, Integer)->Integer;
+    // par goto add(integer4, integer5, integer1, __exit);
+    task = NEW(struct Context);
+    initContext(task);
+    task->next = C_add;
+    task->idgCount = 2;
+    task->idg = task->dataNum;
+    task->data[task->idg] = (union Data*)integer4;
+    task->data[task->idg+1] = (union Data*)integer5;
+    task->maxIdg = task->idg + 2;
+    task->odg = task->maxIdg;
+    task->data[task->odg] = (union Data*)integer1;
+    task->maxOdg = task->odg + 1;
+    tasks[1] = task;
+
+    // par goto initIntegerDataGears(integer2, integer4, integer5, __exit);
+    task = NEW(struct Context);
+    initContext(task);
+    task->next = C_initIntegerDataGears;
+    task->idgCount = 0;
+    task->idg = task->dataNum;
+    task->maxIdg = task->idg;
+    task->odg = task->maxIdg;
+    task->data[task->odg] = (union Data*)integer2;
+    task->data[task->odg+1] = (union Data*)integer4;
+    task->data[task->odg+2] = (union Data*)integer5;
+    task->maxOdg = task->odg + 3;
+    tasks[2] = task;
+
+    taskManager->contexts = tasks;
+    // goto crateTask1();
+    taskManager->next1 = C_createTask1;
+    goto meta(context, taskManager->taskManager->TaskManager.spawnTasks);
+}
+
+void init(int argc, char** argv) {
+    for (int i = 1; argv[i]; ++i) {
+        if (strcmp(argv[i], "-cpu") == 0)
+            cpu_num = (int)atoi(argv[i+1]);
+        else if (strcmp(argv[i], "-l") == 0)
+            length = (int)atoi(argv[i+1]);
+        else if (strcmp(argv[i], "-cuda") == 0) {
+            gpu_num = 1;
+            CPU_CUDA = 0;
+        }
+    }
+}
+
+int main(int argc, char** argv) {
+    init(argc, argv);
+    struct Context* main_context = NEW(struct Context);
+    initContext(main_context);
+    main_context->next = C_initDataGears;
+
+    goto start_code(main_context);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/parallel_execution/examples/calc/initIntegerDataGears.cbc	Mon Jul 24 16:52:09 2017 +0900
@@ -0,0 +1,25 @@
+#include "../../../context.h"
+#include <stdio.h>
+__code initIntegerDataGears(__code next(struct Integer* output1, struct Integer* output2, struct Integer* output3, ...)) {
+    struct Integer* output1 = *O_output1;
+    struct Integer* output2 = *O_output2;
+    struct Integer* output3 = *O_output3;
+    output1->value = 1;
+    output2->value = 2;
+    output3->value = 3;
+    *O_output1 = output1;
+    *O_output2 = output2;
+    *O_output3 = output3;
+    goto meta(context, next);
+}
+
+__code initIntegerDataGears_stub(struct Context* context) {
+    Integer** O_output1 = (struct Integer **)&context->data[context->odg];
+    Integer** O_output2 = (struct Integer **)&context->data[context->odg+1];
+    Integer** O_output3 = (struct Integer **)&context->data[context->odg+2];
+    goto initIntegerDataGears(context,
+            context->next,
+            O_output1,
+            O_output2,
+            O_output3);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/parallel_execution/examples/calc/mult.cbc	Mon Jul 24 16:52:09 2017 +0900
@@ -0,0 +1,18 @@
+#include "../../../context.h"
+#include <stdio.h>
+__code mult(struct Integer* input1, struct Integer* input2, __code next(struct Integer* output, ...)) {
+    struct Integer* output = *O_output;
+    output->value = input1->value * input2->value;
+    printf("%d * %d = %d\n", input1->value, input2->value, output->value);
+    *O_output = output;
+    goto meta(context, next);
+}
+
+__code mult_stub(struct Context* context) {
+    Integer** O_output = (struct Integer **)&context->data[context->odg];
+    goto mult(context,
+            &context->data[context->idg]->Integer,
+            &context->data[context->idg + 1]->Integer,
+            context->next,
+            O_output);
+}
--- a/src/parallel_execution/examples/mult.cbc	Sun Jul 23 07:28:32 2017 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-#include "../../context.h"
-#include <stdio.h>
-__code mult(struct Integer* input1, struct Integer* input2, __code next(struct Integer* output, ...)) {
-    struct Integer* output = *O_output;
-    output->value = input1->value * input2->value;
-    printf("%d * %d = %d\n", input1->value, input2->value, output->value);
-    *O_output = output;
-    goto meta(context, next);
-}
-
-__code mult_stub(struct Context* context) {
-    Integer** O_output = (struct Integer **)&context->data[context->odg];
-    goto mult(context,
-            &context->data[context->idg]->Integer,
-            &context->data[context->idg + 1]->Integer,
-            context->next,
-            O_output);
-}
--- a/src/parallel_execution/examples/sort.cbc	Sun Jul 23 07:28:32 2017 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,95 +0,0 @@
-#include<stdio.h>
-#include <stdlib.h>
-#include "../../context.h"
-#define LOGN 5
-#define MAX 1 << LOGN
-
-int main(int argc, char const* argv[]) {
-    struct Context* main_context = NEW(struct Context);
-    initContext(main_context);
-    main_context->next = C_sort_start;
-    goto start_code(main_context);
-}
-
-__code sort_start(struct SortArray* sortArray){
-    sortArray->sortArray = new SortArray();
-    sortArray->sortArray->array = (Integer**)ALLOC_ARRAY(context, Integer, MAX);//ALLOC_ARRAYはDSの配列なのでintではできない
-    sortArray->sortArray->loop_counter = 0;
-    sortArray->sortArray->loop_counter2 = 0;
-    sortArray->sortArray->loop_counter3 = 0;
-    srand((unsigned) time(NULL));
-    goto meta(context, C_make_array);
-}
-
-__code make_array(struct SortArray* sortArray){//乱数生成
-    if (sortArray->sortArray->loop_counter == MAX){//ループの終了→配列表示へ
-        sortArray->sortArray->loop_counter = 0;
-        goto meta(context, C_print);
-    }
-    struct Integer* integer = new Integer();
-    integer->value = rand() % 1000;
-    sortArray->sortArray->array[sortArray->sortArray->loop_counter] = integer;
-    sortArray->sortArray->loop_counter++;
-    goto meta(context, C_make_array);
-}
-
-__code print(struct SortArray* sortArray){//配列表示
-    if (sortArray->sortArray->loop_counter == MAX){//ループの終了→ソートへ
-        printf("\n");
-        if(sortArray->sortArray->sort_finish == 1){//ソート終わってたら終了
-            goto meta(context, C_exit_code);
-        }
-        sortArray->sortArray->loop_counter = 0;
-        sortArray->sortArray->loop_counter2 = 0;
-        goto meta(context, C_bitonic_sort);
-    }
-
-    printf("%d, ", sortArray->sortArray->array[sortArray->sortArray->loop_counter]->value);
-    sortArray->sortArray->loop_counter++;
-    goto meta(context, C_print);
-}
-
-__code bitonic_sort(struct SortArray* sortArray){//ソートの繰り返し
-    if (sortArray->sortArray->loop_counter >= LOGN){//ループの終了→配列表示へ
-        sortArray->sortArray->loop_counter = 0;
-        sortArray->sortArray->sort_finish = 1;
-        goto meta(context, C_print);
-    }
-    goto meta(context, C_kernel);
-}
-
-__code kernel(struct SortArray* sortArray){//繰り返し2
-    if (sortArray->sortArray->loop_counter2 > sortArray->sortArray->loop_counter){//ループの終了→上のループへ
-        sortArray->sortArray->loop_counter++;
-        sortArray->sortArray->loop_counter2 = 0;
-        goto meta(context, C_bitonic_sort);
-    }
-
-    goto meta(context, C_kernel2);
-}
-
-__code kernel2(struct SortArray* sortArray){//ソートの中身
-    int i = sortArray->sortArray->loop_counter3;
-
-    if (i >= GET_SIZE(sortArray->sortArray->array)){//ループの終了→上のループへ
-        sortArray->sortArray->loop_counter2++;
-        sortArray->sortArray->loop_counter3 = 0;
-        goto meta(context, C_kernel);
-    }
-
-     goto meta(context, C_swap);
-}
-
-__code swap(struct SortArray* sortArray){//配列の要素を入れ替える
-    int i = sortArray->sortArray->loop_counter3;
-    int d = 1 << (sortArray->sortArray->loop_counter - sortArray->sortArray->loop_counter2);
-    int up = ((i >> sortArray->sortArray->loop_counter) & 2) == 0;
-
-    if ((i & d) == 0 && (sortArray->sortArray->array[i]->value > sortArray->sortArray->array[i | d]->value) == up) {
-        struct Integer *tmp = sortArray->sortArray->array[i];
-        sortArray->sortArray->array[i] = sortArray->sortArray->array[i | d];
-        sortArray->sortArray->array[i | d] = tmp;
-    }
-    sortArray->sortArray->loop_counter3++;
-    goto meta(context, C_kernel2);//上位のループへ
-}
--- a/src/parallel_execution/examples/swap.cbc	Sun Jul 23 07:28:32 2017 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,29 +0,0 @@
-#include "../../context.h"
-#include <stdio.h>
-
-__code bitonicSwap(struct SortArray* inputArray, struct Integer* block, struct Integer* first, struct Integer* i, __code next(struct SortArray* output, ...)) {
-    struct SortArray* output = *O_output;
-    int position = i->value/block->value;
-    int index1 = i->value+block->value*position;
-    int index2 = (first->value == 1)? ((block->value<<1)*(position+1))-(index1%block->value)-1 : index1+block->value;
-    struct Integer** array = inputArray->array;
-    if (array[index2]->value < array[index1]->value) {
-        struct Integer *tmp = array[index1];
-        array[index1] = array[index2];
-        array[index2] = tmp;
-    }
-    output->array = array;
-    *O_output = output;
-    goto meta(context, next);
-}
-
-__code bitonicSwap_stub(struct Context* context) {
-    SortArray** O_output = (struct SortArray **)&context->data[context->odg];
-    goto bitonicSwap(context,
-                     &context->data[context->idg]->SortArray,
-                     &context->data[context->idg+1]->Integer,
-                     &context->data[context->idg+2]->Integer,
-                     &context->data[context->idg+3]->Integer,
-                     context->next,
-                     O_output);
-}
--- a/src/parallel_execution/examples/twice.cbc	Sun Jul 23 07:28:32 2017 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,191 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "../../context.h"
-
-int cpu_num = 1;
-int length = 102400;
-int split = 8;
-int* array_ptr;
-int gpu_num = 0;
-int CPU_ANY = -1;
-int CPU_CUDA = -1;
-
-void print_queue(struct Element* element) {
-    while (element) {
-        printf("%p\n", ((struct Task *)(element->data)));
-        element = element->next;
-    }
-}
-
-void print_tree(struct Node* node) {
-    if (node != 0) {
-        printf("%d\n", node->value->Array.index);
-        print_tree(node->left);
-        print_tree(node->right);
-    }
-}
-
-void *start_taskManager(struct Context *context) {
-    goto initDataGears(context, Gearef(context, LoopCounter), Gearef(context, TaskManager));
-    return 0;
-}
-
-#ifdef USE_CUDAWorker
-#ifdef USE_CUDA_MAIN_THREAD
-extern volatile int cuda_initialized;
-#endif
-#endif
-
-__code initDataGears(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
-    // loopCounter->tree = createRedBlackTree(context);
-    loopCounter->i = 0;
-    taskManager->taskManager = (union Data*)createTaskManagerImpl(context, cpu_num, gpu_num, 0);
-#ifdef USE_CUDAWorker
-#ifdef USE_CUDA_MAIN_THREAD
-    while(! cuda_initialized) {};
-#endif
-#endif
-    goto meta(context, C_code1);
-}
-
-__code initDataGears_stub(struct Context* context) {
-    struct TaskManager* taskManager =  Gearef(context, TaskManager);
-    taskManager->taskManager = 0;
-#if (! defined(USE_CUDAWorker) || ! defined(USE_CUDA_MAIN_THREAD))
-    struct LoopCounter* loopCounter = Gearef(context, LoopCounter);
-    goto initDataGears(context, loopCounter, taskManager);
-#else
-    cuda_initialized = 0;
-    pthread_t thread;
-    pthread_create(&thread, NULL, (void*)&start_taskManager, context);
-    while (taskManager->taskManager == 0);
-    TaskManager *t = (TaskManager*)taskManager->taskManager;
-    TaskManagerImpl *im = (TaskManagerImpl*)t->taskManager;
-    struct Queue *q = (Queue *)im->workers[0];
-    createCUDAWorker(context,0,q, im);
-    pthread_join(thread,0);
-    exit(0);
-#endif
-}
-
-__code code1(struct Time* time) {
-    printf("cpus:\t\t%d\n", cpu_num);
-    printf("gpus:\t\t%d\n", gpu_num);
-    printf("length:\t\t%d\n", length);
-    printf("length/task:\t%d\n", length/split);
-    /* puts("queue"); */
-    /* print_queue(context->data[ActiveQueue]->queue.first); */
-    /* puts("tree"); */
-    /* print_tree(context->data[Tree]->tree.root); */
-    /* puts("result"); */
-    time->time = (union Data*)createTimeImpl(context);
-    time->next = C_createTask1;
-    goto meta(context, time->time->Time.start);
-}
-
-__code code2(struct LoopCounter* loopCounter) {
-    int i = loopCounter->i;
-
-    if (i < length) {
-        //printf("%d\n", array_ptr[i]);
-        if (array_ptr[i] == (i*2)) {
-            loopCounter->i++;
-            goto meta(context, C_code2);
-        } else
-            puts("wrong result");
-
-    }
-
-    goto meta(context, C_exit_code);
-}
-
-__code createTask1(struct LoopCounter* loopCounter, struct TaskManager* taskManager, struct Time* time) {
-    int i = loopCounter->i;
-
-    if ((length/split*i) < length) {
-        goto meta(context, C_createTask2);
-    }
-
-    loopCounter->i = 0;
-    taskManager->next = time->time->Time.end;
-    time->next = C_code2;
-#if ( defined(USE_CUDAWorker) && defined(USE_CUDA_MAIN_THREAD))
-sleep(5);
-#endif
-    goto meta(context, taskManager->taskManager->TaskManager.shutdown);
-}
-
-__code createTask2(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
-    struct Context** tasks = (struct Context**)ALLOC_ARRAY(context, Context, 1);
-
-    int i = loopCounter->i;
-    LoopCounter* loopCounter2 = &ALLOCATE_DATA_GEAR(context, LoopCounter)->LoopCounter;
-    Array* array = &ALLOCATE_DATA_GEAR(context, Array)->Array;
-    array->index = i;
-    array->prefix = length/split;
-    array->array = array_ptr;
-    array->size = length;
-    loopCounter2->i = 0;
-    loopCounter->i++;
-
-    // par goto twice(loopCounter2, array, __exit);
-    struct Context* task = NEW(struct Context);
-    initContext(task);
-    task->idgCount = 0;
-    if (gpu_num) {
-#ifdef USE_CUDAWorker
-        task->next = C_CUDAtwice;
-        task->workerId = CPU_CUDA;
-#else
-        task->next = C_twice;
-#endif
-    } else {
-        task->next = C_twice;
-    }
-    task->idg = task->dataNum;
-    task->data[task->idg] = (union Data*)loopCounter2;
-    task->data[task->idg+1] = (union Data*)array;
-    task->maxIdg = task->idg + 2;
-    task->odg = task->maxIdg;
-    task->maxOdg = task->odg;
-    tasks[0] = task;
-
-    taskManager->contexts = tasks;
-    // goto crateTask1();
-    taskManager->next1 = C_createTask1;
-    goto meta(context, taskManager->taskManager->TaskManager.spawnTasks);
-}
-
-void init(int argc, char** argv) {
-    for (int i = 1; argv[i]; ++i) {
-        if (strcmp(argv[i], "-cpu") == 0)
-            cpu_num = (int)atoi(argv[i+1]);
-        else if (strcmp(argv[i], "-l") == 0)
-            length = (int)atoi(argv[i+1]);
-        else if (strcmp(argv[i], "-s") == 0)
-            split = (int)atoi(argv[i+1]);
-        else if (strcmp(argv[i], "-cuda") == 0) {
-            gpu_num = 1;
-            CPU_CUDA = 0;
-        }
-    }
-}
-
-
-int main(int argc, char** argv) {
-    init(argc, argv);
-
-    array_ptr = NEWN(length, int);
-
-    for(int i=0; i<length; i++)
-        array_ptr[i]=i;
-
-    struct Context* main_context = NEW(struct Context);
-    initContext(main_context);
-    main_context->next = C_initDataGears;
-
-    goto start_code(main_context);
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/parallel_execution/examples/twice/CUDAtwice.cbc	Mon Jul 24 16:52:09 2017 +0900
@@ -0,0 +1,28 @@
+#include <stdio.h>
+#include "../../../context.h"
+
+
+extern void CUDAExec(struct Context* context, Array* array, LoopCounter *loopCounter);
+
+__code CUDAtwice(struct Context* context, struct LoopCounter* loopCounter, int index, int prefix, int* array, struct Context* workerContext) {
+    int i = loopCounter->i;
+    if (i < prefix) {
+        array[i+index*prefix] = array[i+index*prefix]*2;
+        loopCounter->i++;
+
+        goto meta(context, C_twice);
+    }
+
+    loopCounter->i = 0;
+    goto meta(workerContext, workerContext->next);
+}
+
+__code CUDAtwice_stub(struct Context* context) {
+printf("CUdAtwice stub\n");
+    struct LoopCounter* loopCounter = &context->data[context->dataNum]->LoopCounter;
+    struct Array* array = &context->data[context->dataNum+1]->Array;
+    CUDAExec(context,array,loopCounter);
+
+    //continuationにそってGPUworkerに戻る
+    goto meta(context, context->next);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/parallel_execution/examples/twice/CUDAtwice.cu	Mon Jul 24 16:52:09 2017 +0900
@@ -0,0 +1,33 @@
+extern "C" {
+
+#include <stdio.h>
+
+//    __global__ void twice(struct LoopCounter* loopCounter, int prefix ,int* array) {
+//         int index = blockIdx.x * blockDim.x + threadIdx.x;
+//         printf("array %p, blockIdx.x = %d, blockDim.x = %d, threadIdx.x = %d\n");
+//         int i = 0;
+//         while (i < prefix) {
+//              array[i+index*prefix] = array[i+index*prefix]*2;
+//         }
+//    }
+
+    struct LoopCounter {
+        int i;
+    } LoopCounter;
+
+    __global__ void twice(struct LoopCounter* loopCounter, int index, int prefix, int* array) {
+         printf("array %p, index = %d, prefix = %d loopCounter->i %d\n",array,index,prefix,loopCounter->i);
+C_twice:
+        int i = loopCounter->i;
+        if (i < prefix) {
+            array[i+index*prefix] = array[i+index*prefix]*2;
+            loopCounter->i++;
+
+            goto C_twice;
+        }
+
+        loopCounter->i = 0;
+    }
+
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/parallel_execution/examples/twice/twice.cbc	Mon Jul 24 16:52:09 2017 +0900
@@ -0,0 +1,191 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "../../../context.h"
+
+int cpu_num = 1;
+int length = 102400;
+int split = 8;
+int* array_ptr;
+int gpu_num = 0;
+int CPU_ANY = -1;
+int CPU_CUDA = -1;
+
+void print_queue(struct Element* element) {
+    while (element) {
+        printf("%p\n", ((struct Task *)(element->data)));
+        element = element->next;
+    }
+}
+
+void print_tree(struct Node* node) {
+    if (node != 0) {
+        printf("%d\n", node->value->Array.index);
+        print_tree(node->left);
+        print_tree(node->right);
+    }
+}
+
+void *start_taskManager(struct Context *context) {
+    goto initDataGears(context, Gearef(context, LoopCounter), Gearef(context, TaskManager));
+    return 0;
+}
+
+#ifdef USE_CUDAWorker
+#ifdef USE_CUDA_MAIN_THREAD
+extern volatile int cuda_initialized;
+#endif
+#endif
+
+__code initDataGears(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
+    // loopCounter->tree = createRedBlackTree(context);
+    loopCounter->i = 0;
+    taskManager->taskManager = (union Data*)createTaskManagerImpl(context, cpu_num, gpu_num, 0);
+#ifdef USE_CUDAWorker
+#ifdef USE_CUDA_MAIN_THREAD
+    while(! cuda_initialized) {};
+#endif
+#endif
+    goto meta(context, C_code1);
+}
+
+__code initDataGears_stub(struct Context* context) {
+    struct TaskManager* taskManager =  Gearef(context, TaskManager);
+    taskManager->taskManager = 0;
+#if (! defined(USE_CUDAWorker) || ! defined(USE_CUDA_MAIN_THREAD))
+    struct LoopCounter* loopCounter = Gearef(context, LoopCounter);
+    goto initDataGears(context, loopCounter, taskManager);
+#else
+    cuda_initialized = 0;
+    pthread_t thread;
+    pthread_create(&thread, NULL, (void*)&start_taskManager, context);
+    while (taskManager->taskManager == 0);
+    TaskManager *t = (TaskManager*)taskManager->taskManager;
+    TaskManagerImpl *im = (TaskManagerImpl*)t->taskManager;
+    struct Queue *q = (Queue *)im->workers[0];
+    createCUDAWorker(context,0,q, im);
+    pthread_join(thread,0);
+    exit(0);
+#endif
+}
+
+__code code1(struct Time* time) {
+    printf("cpus:\t\t%d\n", cpu_num);
+    printf("gpus:\t\t%d\n", gpu_num);
+    printf("length:\t\t%d\n", length);
+    printf("length/task:\t%d\n", length/split);
+    /* puts("queue"); */
+    /* print_queue(context->data[ActiveQueue]->queue.first); */
+    /* puts("tree"); */
+    /* print_tree(context->data[Tree]->tree.root); */
+    /* puts("result"); */
+    time->time = (union Data*)createTimeImpl(context);
+    time->next = C_createTask1;
+    goto meta(context, time->time->Time.start);
+}
+
+__code code2(struct LoopCounter* loopCounter) {
+    int i = loopCounter->i;
+
+    if (i < length) {
+        //printf("%d\n", array_ptr[i]);
+        if (array_ptr[i] == (i*2)) {
+            loopCounter->i++;
+            goto meta(context, C_code2);
+        } else
+            puts("wrong result");
+
+    }
+
+    goto meta(context, C_exit_code);
+}
+
+__code createTask1(struct LoopCounter* loopCounter, struct TaskManager* taskManager, struct Time* time) {
+    int i = loopCounter->i;
+
+    if ((length/split*i) < length) {
+        goto meta(context, C_createTask2);
+    }
+
+    loopCounter->i = 0;
+    taskManager->next = time->time->Time.end;
+    time->next = C_code2;
+#if ( defined(USE_CUDAWorker) && defined(USE_CUDA_MAIN_THREAD))
+sleep(5);
+#endif
+    goto meta(context, taskManager->taskManager->TaskManager.shutdown);
+}
+
+__code createTask2(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
+    struct Context** tasks = (struct Context**)ALLOC_ARRAY(context, Context, 1);
+
+    int i = loopCounter->i;
+    LoopCounter* loopCounter2 = &ALLOCATE_DATA_GEAR(context, LoopCounter)->LoopCounter;
+    Array* array = &ALLOCATE_DATA_GEAR(context, Array)->Array;
+    array->index = i;
+    array->prefix = length/split;
+    array->array = array_ptr;
+    array->size = length;
+    loopCounter2->i = 0;
+    loopCounter->i++;
+
+    // par goto twice(loopCounter2, array, __exit);
+    struct Context* task = NEW(struct Context);
+    initContext(task);
+    task->idgCount = 0;
+    if (gpu_num) {
+#ifdef USE_CUDAWorker
+        task->next = C_CUDAtwice;
+        task->workerId = CPU_CUDA;
+#else
+        task->next = C_twice;
+#endif
+    } else {
+        task->next = C_twice;
+    }
+    task->idg = task->dataNum;
+    task->data[task->idg] = (union Data*)loopCounter2;
+    task->data[task->idg+1] = (union Data*)array;
+    task->maxIdg = task->idg + 2;
+    task->odg = task->maxIdg;
+    task->maxOdg = task->odg;
+    tasks[0] = task;
+
+    taskManager->contexts = tasks;
+    // goto crateTask1();
+    taskManager->next1 = C_createTask1;
+    goto meta(context, taskManager->taskManager->TaskManager.spawnTasks);
+}
+
+void init(int argc, char** argv) {
+    for (int i = 1; argv[i]; ++i) {
+        if (strcmp(argv[i], "-cpu") == 0)
+            cpu_num = (int)atoi(argv[i+1]);
+        else if (strcmp(argv[i], "-l") == 0)
+            length = (int)atoi(argv[i+1]);
+        else if (strcmp(argv[i], "-s") == 0)
+            split = (int)atoi(argv[i+1]);
+        else if (strcmp(argv[i], "-cuda") == 0) {
+            gpu_num = 1;
+            CPU_CUDA = 0;
+        }
+    }
+}
+
+
+int main(int argc, char** argv) {
+    init(argc, argv);
+
+    array_ptr = NEWN(length, int);
+
+    for(int i=0; i<length; i++)
+        array_ptr[i]=i;
+
+    struct Context* main_context = NEW(struct Context);
+    initContext(main_context);
+    main_context->next = C_initDataGears;
+
+    goto start_code(main_context);
+}