changeset 1891:e0d465efc57e draft

directory reogranization for Cell/Fifo/ManyCore/Gpu
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Sat, 04 Jan 2014 19:10:32 +0900
parents 9085a4692cfd
children a32179f5b743
files TaskManager/Cell/CellDmaManager.cc TaskManager/Cell/CellDmaManager.h TaskManager/Cell/CellTaskManagerFactory.cc TaskManager/Cell/CellTaskManagerImpl.cc TaskManager/Cell/MainScheduler.cc TaskManager/Cell/MainScheduler.h TaskManager/Cell/ManyCoreDmaManager.cc TaskManager/Cell/ManyCoreDmaManager.h TaskManager/Cell/PreRefDmaManager.cc TaskManager/Cell/PreRefDmaManager.h TaskManager/Cell/ReferencedDmaManager.cc TaskManager/Cell/ReferencedDmaManager.h TaskManager/Cell/spe/CellDmaManager.cc TaskManager/Cell/spe/CellDmaManager.h TaskManager/ChangeLog TaskManager/Fifo/FifoTaskManagerImpl.cc TaskManager/Fifo/FifoTaskManagerImplFactory.cc TaskManager/Gpu/GpuTaskManagerFactory.cc TaskManager/Makefile.def TaskManager/Makefile.gpu TaskManager/Makefile.parallel TaskManager/ManyCore/MainScheduler.cc TaskManager/ManyCore/MainScheduler.h TaskManager/ManyCore/ManyCoreDmaManager.cc TaskManager/ManyCore/ManyCoreDmaManager.h TaskManager/ManyCore/ManyCoreTaskManagerFactory.cc TaskManager/ManyCore/PreRefDmaManager.cc TaskManager/ManyCore/PreRefDmaManager.h TaskManager/ManyCore/ReferencedDmaManager.cc TaskManager/ManyCore/ReferencedDmaManager.h TaskManager/include/TaskManagerFactory.h TaskManager/kernel/ppe/CpuThreads.cc TaskManager/kernel/ppe/TaskManager.cc
diffstat 33 files changed, 796 insertions(+), 796 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/Cell/CellDmaManager.cc	Sat Jan 04 19:10:32 2014 +0900
@@ -0,0 +1,357 @@
+//#include <stdio.h>
+#include <stdlib.h>
+#include "CellDmaManager.h"
+#include "Scheduler.h"
+
+
+unsigned long long alloc_flag = 0;
+
+/**
+ * DMA Load
+ *
+ * @param[in] buf  Buffer of Load Data
+ * @param[in] addr Address of Load Data at Main Memory
+ * @param[in] size Size of Load Data
+ * @param[in] mask DMA tag
+*/
+void *CellDmaManager::dma_load(Scheduler *s, memaddr addr, uint32 size, uint32 mask)
+{
+    void *buf = 0;
+    if (size == 0) return buf;
+    buf = s->manager->allocate(size);
+    mfc_get((volatile void *)buf, addr, size, mask, 0, 0);
+    return buf;
+}
+
+void *CellDmaManager::dma_load1(void *buf, memaddr addr, uint32 size, uint32 mask)
+{
+    if (size == 0) return buf;
+    mfc_get((volatile void *)buf, addr, size, mask, 0, 0);
+    return buf;
+}
+
+
+void *
+CellDmaManager::get_writebuf(Scheduler *s,memaddr addr, uint32 size)
+{
+    void *b = s->manager->allocate(size);
+    return b;
+}
+
+/**
+ * DMA Store
+ *
+ * @param[in] buf  Buffer of Store Data at SPE
+ * @param[in] addr Address of Store Data at Main Memory
+ * @param[in] size Size of Store Data
+ * @param[in] mask DMA tag
+*/
+void* CellDmaManager::dma_store(Scheduler *s,void *buf, memaddr addr, uint32 size, uint32 mask)
+{
+    if (size == 0) return buf;
+    mfc_put((volatile void *)buf, addr, size, mask, 0, 0);
+    return buf; 
+}
+
+void CellDmaManager::dma_wait(uint32 mask)
+{
+    (this->*start_dmawait_profile)();
+    mfc_write_tag_mask(1 << mask);
+    mfc_write_tag_update_all();
+    mfc_read_tag_status();
+    __asm__ __volatile__("   sync");
+    (this->*end_dmawait_profile)(&global_wait_time);
+}
+
+#ifndef MAIL_QUEUE
+
+void CellDmaManager::mail_write(memaddr data)
+{
+    (this->*start_dmawait_profile)();
+    spu_write_out_mbox((uint32)data);
+#if 0
+    if (ABIBIT>32) {
+	unsigned long data0 = (unsigned long)data;
+	spu_write_out_mbox((uint32)(data0>>32));
+    }
+#endif
+    (this->*end_dmawait_profile)(&global_mail_time);
+}
+#else
+
+// mail を queue にしたみた。mailの書き出しの待ちあるのかわからないけど
+//TASK_LIST_MAIL は仕事が最後に溜まってしまうけど、MailQueueなら出来る時にmailを書き出す
+//ので多少は効果あるといいな。
+void CellDmaManager::mail_write(memaddr data)
+{
+  unsigned long long wait = 0;
+
+    if (0 != spu_readchcnt(SPU_WrOutMbox)) {
+      if (mail_queue->count()) {
+	  (this->*start_dmawait_profile)();
+          spu_write_out_mbox((uint32)data);
+	  (this->*end_dmawait_profile)(&wait);
+	  mail_write_time += wait;
+	  global_wait_time += wait;
+      } else {
+
+	//mail_queue から poll する
+	(this->*start_dmawait_profile)();
+	spu_write_out_mbox((uint32)mail_queue->recv());
+	(this->*end_dmawait_profile)(&wait);
+	  mail_write_time += wait;
+	  global_wait_time += wait;
+	//mail_queue に加える
+	mail_queue->send(data);
+
+      }
+    } else {
+
+      mail_queue->send(data);
+
+    }
+
+}
+#endif
+
+// tasklist がもうない場合に、MailQueue 全部を書き出す
+void CellDmaManager::mail_write_finish_list(memaddr data)
+{
+
+    unsigned long long wait = 0;
+
+    while (mail_queue->count()) {
+      (this->*start_dmawait_profile)();
+      spu_write_out_mbox((uint32)mail_queue->recv());      
+      (this->*end_dmawait_profile)(&wait);
+
+      global_mail_time += wait;
+      finish_mail_write_time += wait;
+
+    }
+
+    (this->*start_dmawait_profile)();
+    spu_write_out_mbox((uint32)data);
+    (this->*end_dmawait_profile)(&wait);
+
+    global_mail_time += wait;
+    finish_mail_write_time += wait;
+
+}
+
+memaddr CellDmaManager::mail_read()
+{
+
+
+  unsigned long long wait = 0;
+
+    (this->*start_dmawait_profile)();
+    memaddr data = (memaddr)spu_read_in_mbox();
+    
+#if 0 
+    if (ABIBIT>32) {
+	data += (spu_read_in_mbox()<<32);
+    }
+#endif
+    (this->*end_dmawait_profile)(&wait);
+
+    global_mail_time += wait;
+    mail_read_time += wait;
+
+    return data;
+}
+
+memaddr CellDmaManager::task_list_mail_read()
+{
+
+    unsigned long long wait = 0;
+
+    (this->*start_dmawait_profile)();
+    memaddr data = (memaddr)spu_read_in_mbox();
+    (this->*end_dmawait_profile)(&wait);
+
+
+    task_list_mail_read_time += wait;
+    global_mail_time += wait;
+    task_list_read_count += 1;
+
+    return data;
+}
+
+
+
+void *CellDmaManager::dma_loadList(Scheduler *s, ListDataPtr list, uint32 mask)
+{
+    void *buff = s->manager->allocate(list->size);
+    mfc_getl(buff, 0, list->element, sizeof(mfc_list_element_t)*list->length,
+	     mask, 0, 0);
+    return buff;
+}
+
+void CellDmaManager::dma_storeList(ListDataPtr list, void *buff, uint32 mask)
+{
+    mfc_putl(buff, 0, list->element, sizeof(mfc_list_element_t)*list->length,
+	     mask, 0, 0);
+}
+
+CellDmaManager::CellDmaManager() 
+{
+
+    mail_queue = new MailManager();
+    stop_profile();
+}
+void
+CellDmaManager::start_profile()
+{
+    global_busy_time = 0;
+    global_mail_time = 0;
+    global_wait_time = 0;
+    task_list_mail_read_time = 0;
+    finish_mail_write_time = 0;
+    task_list_read_count = 0;
+
+    mail_read_time = 0;
+    mail_write_time = 0;
+
+    start_dmawait_profile =  &CellDmaManager::do_start_dmawait_profile;
+    end_dmawait_profile =  &CellDmaManager::do_end_dmawait_profile;  
+
+}
+
+void
+CellDmaManager::stop_profile()
+{
+    start_dmawait_profile =  &CellDmaManager::null_start_dmawait_profile;
+    end_dmawait_profile =  &CellDmaManager::null_end_dmawait_profile;
+}
+
+/**
+ * DMA Wait
+ *
+ * @param[in] mask Tag for Wait DMA process
+ */
+void
+CellDmaManager::do_start_dmawait_profile()
+{
+
+    wait_time = spu_readch(SPU_RdDec); 
+    global_busy_time += busy_time - wait_time;
+    //printf("busy_time = %d, wait_time = %d\n", busy_time, wait_time);
+    spu_writech(SPU_WrDec, 0xffffffff);
+
+    // Measurement of mainMem_alloc
+    mainMemalloc_time += (alloc_busy_time - wait_time)*alloc_flag;
+}
+
+
+void
+CellDmaManager::do_end_dmawait_profile(unsigned long long *counter)
+{
+    wait_time = spu_readch(SPU_RdDec);
+    //printf("wait_time = %d", wait_time);
+    *counter += 0xffffffff - wait_time;
+    busy_time = wait_time;
+
+    // Measurement of mainMem_alloc
+    alloc_busy_time = wait_time;
+}
+
+void CellDmaManager::null_start_dmawait_profile() {}
+void CellDmaManager::null_end_dmawait_profile(unsigned long long *counter) {}
+
+void
+CellDmaManager::show_dma_wait(Scheduler *s, int cpu)
+{
+
+  double r = ((double)global_busy_time)/((double)(
+	global_busy_time+global_wait_time+global_mail_time
+	))*100.0;
+
+  double d = ((double)global_wait_time)/((double)(
+	global_busy_time+global_wait_time+global_mail_time
+	))*100.0;
+
+  double m = ((double)global_mail_time)/((double)(
+	global_busy_time+global_wait_time+global_mail_time
+	))*100.0;
+
+  double tr = ((double)task_list_mail_read_time)/((double)(
+	global_busy_time+global_wait_time+global_mail_time
+	))*100.0;
+
+  double tw = ((double)finish_mail_write_time)/((double)(
+	global_busy_time+global_wait_time+global_mail_time
+	))*100.0;
+
+  double mr = ((double)mail_read_time)/((double)(
+	global_busy_time+global_wait_time+global_mail_time
+	))*100.0;
+
+  double mw = ((double)mail_write_time)/((double)(
+	global_busy_time+global_wait_time+global_mail_time
+	))*100.0;
+
+
+  s->printf("spu%d:\n busy_time = %lld"
+  " wait_time = %lld(%.3g%%), "
+  " mail_time = %lld(%.3g%%), " 
+  " mail_read_time = %lld(%.3g%%), "
+  " mail_write_time = %lld(%.3g%%),\n"
+  " task_list_mail_read_time = %lld(%.3g%%), " 
+  " finish_mail_write_time = %lld(%.3g%%) "
+  " busy_ratio = %.3g%%, \n"
+  " task_list_read_count = %d"
+  " average_tasklist_mail_time = %lld"
+  " mainMem_alloc_time = %lld\n"
+    ,cpu, global_busy_time,
+    global_wait_time, d, global_mail_time, m, 
+    mail_read_time, mr,
+    mail_write_time, mw, 
+    task_list_mail_read_time, tr,
+    finish_mail_write_time, tw,
+    r,
+    task_list_read_count,
+    task_list_mail_read_time / task_list_read_count,	    
+    mainMemalloc_time);
+
+    global_busy_time = 0;
+    global_mail_time = 0;
+    global_wait_time = 0;
+
+    mail_read_time = 0;
+    mail_write_time = 0;
+
+    task_list_mail_read_time = 0;
+    finish_mail_write_time = 0;
+    task_list_read_count = 0;
+
+    mainMemalloc_time = 0;
+    alloc_flag = 0;
+}
+
+uint32
+CellDmaManager::get_tag()
+{
+    static int tag = 16;
+    tag ++;
+    tag &= 0x0f;
+    return tag+16;
+}
+
+/**
+ *  DMA buffer offset in rbuf
+ */
+void
+CellDmaManager::bound(ListData *list)
+{
+    ListElement *elm = list->element;
+    int *bound = list->bound;
+    int offset=0;
+    for(int i=0;i<list->length;i++) {
+        bound[i] = offset;
+        offset += elm[i].size;
+    }
+}
+
+
+/* end */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/Cell/CellDmaManager.h	Sat Jan 04 19:10:32 2014 +0900
@@ -0,0 +1,78 @@
+#ifndef INCLUDED_CELL_DMA_MANAGER
+#define INCLUDED_CELL_DMA_MANAGER
+
+#include "base.h"
+#include "types.h"
+#include "DmaManager.h"
+#include "MailManager.h"
+#include "TaskManagerImpl.h"
+
+#include <spu_mfcio.h>
+#include <spu_intrinsics.h>
+
+#define SPU_PROFILE 1
+
+class Scheduler;
+
+class CellDmaManager : public DmaManager {
+public:
+    BASE_NEW_DELETE(CellDmaManager);
+
+    typedef struct dma_list {
+	uint32 addr;  // should be memaddr, but in Cell's specification...
+	uint32 size;
+    }  __attribute__ ((aligned (DEFAULT_ALIGNMENT))) DmaList, *DmaListPtr;
+
+
+    CellDmaManager() ;
+
+    /* variables */
+    unsigned int wait_time, busy_time, alloc_busy_time;
+    unsigned long long global_busy_time, global_wait_time, global_mail_time, mainMemalloc_time;
+    unsigned long long task_list_mail_time;
+    unsigned long long mail_read_time, mail_write_time;
+    unsigned long long task_list_mail_read_time, finish_mail_write_time;
+    int task_list_read_count;
+
+    /* functions */
+    void *dma_load(Scheduler *s, memaddr addr, uint32 size, uint32 mask);
+    void *dma_load1(void *buf, memaddr addr, uint32 size, uint32 mask);
+    void *dma_store(Scheduler *s,void *buf, memaddr addr, uint32 size, uint32 mask);
+
+    void *get_writebuf(Scheduler *s, memaddr addr, uint32 size);
+
+    void dma_wait(uint32 mask) ;
+    void dma_wait(uint32 mask, int cmd) ;
+    void (CellDmaManager::*start_dmawait_profile)();
+    void (CellDmaManager::*end_dmawait_profile)(unsigned long long *counter);
+    void start_profile();
+    void stop_profile();
+
+
+    void show_dma_wait(Scheduler *s, int cpu);
+
+    void mail_write(memaddr data);
+    void mail_write_finish_list(memaddr data);
+    memaddr mail_read();
+    memaddr task_list_mail_read();
+    void *dma_loadList(Scheduler *s,ListDataPtr list, uint32 mask);
+    void dma_storeList(ListDataPtr, void *buff, uint32 mask);
+    uint32 get_tag();
+    void bound(ListData *);
+    void *get_writebuf(Scheduler *s, ListDataPtr, uint32 size) ;
+
+
+    private:
+
+    void do_start_dmawait_profile();
+    void do_end_dmawait_profile(unsigned long long *counter);
+    void null_start_dmawait_profile();
+    void null_end_dmawait_profile(unsigned long long *counter);
+
+    MailManagerPtr mail_queue;
+
+
+/* end */
+}  ;
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/Cell/CellTaskManagerFactory.cc	Sat Jan 04 19:10:32 2014 +0900
@@ -0,0 +1,10 @@
+#include "CellTaskManagerImpl.h"
+#include "SpeThreads.h"
+
+TaskManagerImpl *create_impl(int num, int num_gpu, int useRefDma)
+{
+    int io_num = 2; // two threads for I/O
+    Threads *cpus = new SpeThreads(num);
+
+    return new CellTaskManagerImpl(num, num_gpu, cpus);
+}
--- a/TaskManager/Cell/CellTaskManagerImpl.cc	Fri Jan 03 19:40:14 2014 +0900
+++ b/TaskManager/Cell/CellTaskManagerImpl.cc	Sat Jan 04 19:10:32 2014 +0900
@@ -11,9 +11,6 @@
 #include "MainScheduler.h"
 #include "types.h"
 #include "SysFunc.h"
-#ifdef __CERIUM_GPU__
-#include "GpuThreads.h"
-#endif
 static void send_alloc_reply(CellTaskManagerImpl *tm, int id,
                              Threads *speThreads);
 
@@ -102,7 +99,6 @@
                     speid = (cur_anySpeid++ % cpu_num) + id_offset;
                 else
                     speid = (cur_anySpeid++ % gpuNum) ; // gpu があれば gpu に割り振る
-#ifdef __CERIUM_GPU__
             } else if (gpuNum == 0 && htask->cpu_type < (int)SPE_0) {
                 // gpu = 0 で gpu を指定されたときには cpu で実行する
                 speid = cur_anySpeid++ % machineNum + id_offset ;
@@ -111,7 +107,6 @@
                 else speid = cur_anyGPUid++ % gpuNum;
             } else if (htask->cpu_type < GPU_0+gpuNum) {
                 speid = htask->cpu_type - (int)(GPU_0);
-#endif
             } else if (htask->cpu_type == ANY_ANY) { 
                 speid = cur_anySpeid++ % machineNum;
             } else if (htask->cpu_type == IO_0) { 
@@ -360,26 +355,22 @@
 }
 
 void CellTaskManagerImpl::show_profile() {
-#ifdef __CERIUM_GPU__
     for (int id = 0; id < gpuNum; id++) {
         HTaskPtr t = schedTaskManager->create_task(ShowTime, 0, 0, 0, 0);
         t->set_cpu((CPU_TYPE) (id + GPU_0));
         t->spawn();
     }
-#endif
     HTaskPtr t = schedTaskManager->create_task(ShowTime, 0, 0, 0, 0);
     t->set_cpu(SPE_ANY);
     t->iterate(machineNum);
 }
 
 void CellTaskManagerImpl::start_profile() {
-#ifdef __CERIUM_GPU__
     for (int id = 0; id < gpuNum; id++) {
         HTaskPtr t = schedTaskManager->create_task(StartProfile, 0, 0, 0, 0);
         t->set_cpu((CPU_TYPE) (id + GPU_0));
         t->spawn();
     }
-#endif
     HTaskPtr t = schedTaskManager->create_task(StartProfile, 0, 0, 0, 0);
     t->set_cpu(SPE_ANY);
     t->iterate(machineNum);
@@ -401,20 +392,3 @@
     return tl;
 }
 
-#if defined (__CERIUM_CELL__)||defined (__CERIUM_GPU__)
-TaskManagerImpl *create_impl(int num, int num_gpu, int useRefDma)
-{
-    int io_num = 2; // two threads for I/O
-#ifdef __CERIUM_CELL__
-    Threads *cpus = new SpeThreads(num);
-
-#elif __CERIUM_GPU__    
-    init_task_list(gpu_task_list);
-    Threads *cpus = new CpuThreads(num, io_num, useRefDma,num_gpu);
-    num += num_gpu; // for GPU
-#else    
-    Threads *cpus = new CpuThreads(num, io_num, useRefDma);
-#endif
-    return new CellTaskManagerImpl(num, num_gpu, cpus);
-}
-#endif // __CERIUM_CELL
--- a/TaskManager/Cell/MainScheduler.cc	Fri Jan 03 19:40:14 2014 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,35 +0,0 @@
-#include <stdlib.h>
-#include <string.h>
-#include "MainScheduler.h"
-#include "TaskManagerImpl.h"
-#include "FifoDmaManager.h"
-#include "ReferencedDmaManager.h"
-#include "PreRefDmaManager.h"
-#include "ManyCoreDmaManager.h"
-#include "error.h"
-
-void
-MainScheduler::init_impl(int useRefDma)
-{
-    if (useRefDma & 0x10) {
-        fifoDmaManager = new PreRefDmaManager();
-    } else if (useRefDma & 0x01) {
-        fifoDmaManager = new ManyCoreDmaManager();
-    } else {
-        fifoDmaManager = new ReferencedDmaManager();
-    }
-    connector = fifoDmaManager;
-    connector->init();
-}
-
-/**
- * メインメモリ領域を allocate する。
- * Fifo バージョンだから、まあ普通に malloc でおk
- */
-void
-MainScheduler::mainMem_alloc(int id, int size)
-{
-    mainMemList[id] = (memaddr)manager->allocate(size);
-}
-
-/* end */
--- a/TaskManager/Cell/MainScheduler.h	Fri Jan 03 19:40:14 2014 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,30 +0,0 @@
-#ifndef INCLUDED_MAIN_SCHEDULER
-#define INCLUDED_MAIN_SCHEDULER
-
-#include "Scheduler.h"
-#include "FifoDmaManager.h"
-
-class MainScheduler : public Scheduler {
- protected:
-    FifoDmaManager *fifoDmaManager;
- public:
-    ~MainScheduler(void) {}
-    void init_impl(int useRefDma);
-    void mainMem_alloc(int id, int size);
-    void mail_write_from_host(memaddr data) {
-        fifoDmaManager->mail_write_from_host(data);
-    }
-    int min_cpu() { return mincpu; }
-    int max_cpu() {return maxcpu; }
-
-    memaddr mail_read_from_host() {
-        return fifoDmaManager->mail_read_from_host();
-    }
-
-    int has_mail_from_host() {
-        return fifoDmaManager->has_mail_from_host();
-    }
-
-};
-
-#endif
--- a/TaskManager/Cell/ManyCoreDmaManager.cc	Fri Jan 03 19:40:14 2014 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-#include "ReferencedDmaManager.h"
-#include "Scheduler.h"
-#include "TaskManagerImpl.h"
-#include <string.h>
-
-void *
-ReferencedDmaManager::dma_load(Scheduler *s, memaddr addr, uint32 size, uint32 mask)
-{
-    return (void*)addr;
-}
-
-void *
-ReferencedDmaManager::dma_load1(void *buf, memaddr addr, uint32 size, uint32 mask)
-{
-//    void *from = (void*)addr;
-//    if (buf != from) 
-//        memcpy(buf, from, size);
-    return (void*)addr;
-}
-
-
-void *
-ReferencedDmaManager::dma_loadList(Scheduler *s, ListDataPtr list, uint32 mask)
-{
-    ListElementPtr elm = &list->element[0];
-    return (void*)elm->addr; // ==readbuf
-}
-
-void
-ReferencedDmaManager::free_(void *buf) {
-}
-
-/**
- *  DMA buffer offset in rbuf
- *
- *  faked offset for real address must be set to get correct address by
- *    return (void*)((char*)readbuf + inListData.bound[index]);
- *
- */
-void
-ReferencedDmaManager::bound(ListData *list)
-{
-    ListElement *elm = list->element;
-    long *bound = list->bound;
-#ifdef __CERIUM_CELL__
-    uint32  readbuf = elm[0].addr;
-#else
-    memaddr readbuf = elm[0].addr;
-#endif
-    for(int i=0;i<list->length;i++) {
-        bound[i] = elm[i].addr-readbuf;
-    }
-}
-
-void *
-ReferencedDmaManager::get_writebuf(Scheduler *s,memaddr addr, uint32 size)
-{
-    return (void*)addr;
-}
-
-void *
-ReferencedDmaManager::dma_store(void *buf, memaddr addr, uint32 size, uint32 mask)
-{
-//    void *to = (void*)addr;
-//    if (buf != to) 
-//        memcpy(to, buf, size);
-    return (void*)addr;
-}
-
-void
-ReferencedDmaManager::dma_storeList(ListDataPtr list, void *buff, uint32 mask)
-{
-}
-
-
--- a/TaskManager/Cell/ManyCoreDmaManager.h	Fri Jan 03 19:40:14 2014 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,20 +0,0 @@
-#ifndef INCLUDED_MANYCORE_DMA_MANAGER
-#define INCLUDED_MANYCORE_DMA_MANAGER
-
-#include "FifoDmaManager.h"
-#include "SynchronizedMailManager.h"
-
-class ManyCoreDmaManager : public FifoDmaManager {
-
-public:
-
-    virtual void init()
-    {
-        mail_queue1 = new SynchronizedMailManager();
-        mail_queue2 = new SynchronizedMailManager();
-    }
-
-
-} ;
-
-#endif/* REFERENCED_DMA_MANAGER */
--- a/TaskManager/Cell/PreRefDmaManager.cc	Fri Jan 03 19:40:14 2014 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,19 +0,0 @@
-#include "PreRefDmaManager.h"
-#include "Scheduler.h"
-#include "TaskManagerImpl.h"
-
-void *
-PreRefDmaManager::dma_load(Scheduler *s, memaddr addr, uint32 size, uint32 mask)
-{
-
-#ifdef __CERIUM_FIFO__
-	asm("prefetcht0 %0"::"m"(addr):"memory");
-#endif // __CERIUM_FIFO__
-
-
-	return (void*)addr;
-}
-
-void
-PreRefDmaManager::free_(void *buf) {
-}
--- a/TaskManager/Cell/PreRefDmaManager.h	Fri Jan 03 19:40:14 2014 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-#ifndef INCLUDED_PRE_REFERENCED_DMA_MANAGER
-#define INCLUDED_PRE_REFERENCED_DMA_MANAGER
-
-#include "ReferencedDmaManager.h"
-
-class PreRefDmaManager : public ReferencedDmaManager {
-
-public:
-	/* functions */
-    virtual void *dma_load(Scheduler *s, memaddr addr, uint32 size, uint32 mask);
-    virtual void free_(void *buf);
-} ;
-
-#endif/* PRE_REFERENCED_DMA_MANAGER */
--- a/TaskManager/Cell/ReferencedDmaManager.cc	Fri Jan 03 19:40:14 2014 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-#include "ReferencedDmaManager.h"
-#include "Scheduler.h"
-#include "TaskManagerImpl.h"
-#include <string.h>
-
-void *
-ReferencedDmaManager::dma_load(Scheduler *s, memaddr addr, uint32 size, uint32 mask)
-{
-    return (void*)addr;
-}
-
-void *
-ReferencedDmaManager::dma_load1(void *buf, memaddr addr, uint32 size, uint32 mask)
-{
-//    void *from = (void*)addr;
-//    if (buf != from) 
-//        memcpy(buf, from, size);
-    return (void*)addr;
-}
-
-
-void *
-ReferencedDmaManager::dma_loadList(Scheduler *s, ListDataPtr list, uint32 mask)
-{
-    ListElementPtr elm = &list->element[0];
-    return (void*)elm->addr; // ==readbuf
-}
-
-void
-ReferencedDmaManager::free_(void *buf) {
-}
-
-/**
- *  DMA buffer offset in rbuf
- *
- *  faked offset for real address must be set to get correct address by
- *    return (void*)((char*)readbuf + inListData.bound[index]);
- *
- */
-void
-ReferencedDmaManager::bound(ListData *list)
-{
-    ListElement *elm = list->element;
-    long *bound = list->bound;
-#ifdef __CERIUM_CELL__
-    uint32  readbuf = elm[0].addr;
-#else
-    memaddr readbuf = elm[0].addr;
-#endif
-    for(int i=0;i<list->length;i++) {
-        bound[i] = elm[i].addr-readbuf;
-    }
-}
-
-void *
-ReferencedDmaManager::get_writebuf(Scheduler *s,memaddr addr, uint32 size)
-{
-    return (void*)addr;
-}
-
-void *
-ReferencedDmaManager::dma_store(void *buf, memaddr addr, uint32 size, uint32 mask)
-{
-//    void *to = (void*)addr;
-//    if (buf != to) 
-//        memcpy(to, buf, size);
-    return (void*)addr;
-}
-
-void
-ReferencedDmaManager::dma_storeList(ListDataPtr list, void *buff, uint32 mask)
-{
-}
-
-
--- a/TaskManager/Cell/ReferencedDmaManager.h	Fri Jan 03 19:40:14 2014 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,25 +0,0 @@
-#ifndef INCLUDED_REFERENCED_DMA_MANAGER
-#define INCLUDED_REFERENCED_DMA_MANAGER
-
-#include "ManyCoreDmaManager.h"
-
-class ReferencedDmaManager : public ManyCoreDmaManager {
-
-public:
-	/* functions */
-    virtual void *dma_load(Scheduler *s, memaddr addr, uint32 size, uint32 mask);
-    virtual void *dma_load1(void *buf, memaddr addr, uint32 size, uint32 mask);
-    virtual void *dma_loadList(Scheduler *s, ListDataPtr list, uint32 mask);
-    virtual void *dma_store(void *buf, memaddr addr, uint32 size, uint32 mask);
-    virtual void dma_storeList(ListDataPtr list, void *buff, uint32 mask);
-    virtual void *get_writebuf(Scheduler *s,memaddr addr, uint32 size);
-
-    virtual void free_(void *buff);
-    virtual void bound(ListData *list);
-    virtual void set_mail_waiter(SemPtr w) {
-        mail_queue1->set_waiter(w);
-    }
-
-} ;
-
-#endif/* REFERENCED_DMA_MANAGER */
--- a/TaskManager/Cell/spe/CellDmaManager.cc	Fri Jan 03 19:40:14 2014 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,357 +0,0 @@
-//#include <stdio.h>
-#include <stdlib.h>
-#include "CellDmaManager.h"
-#include "Scheduler.h"
-
-
-unsigned long long alloc_flag = 0;
-
-/**
- * DMA Load
- *
- * @param[in] buf  Buffer of Load Data
- * @param[in] addr Address of Load Data at Main Memory
- * @param[in] size Size of Load Data
- * @param[in] mask DMA tag
-*/
-void *CellDmaManager::dma_load(Scheduler *s, memaddr addr, uint32 size, uint32 mask)
-{
-    void *buf = 0;
-    if (size == 0) return buf;
-    buf = s->manager->allocate(size);
-    mfc_get((volatile void *)buf, addr, size, mask, 0, 0);
-    return buf;
-}
-
-void *CellDmaManager::dma_load1(void *buf, memaddr addr, uint32 size, uint32 mask)
-{
-    if (size == 0) return buf;
-    mfc_get((volatile void *)buf, addr, size, mask, 0, 0);
-    return buf;
-}
-
-
-void *
-CellDmaManager::get_writebuf(Scheduler *s,memaddr addr, uint32 size)
-{
-    void *b = s->manager->allocate(size);
-    return b;
-}
-
-/**
- * DMA Store
- *
- * @param[in] buf  Buffer of Store Data at SPE
- * @param[in] addr Address of Store Data at Main Memory
- * @param[in] size Size of Store Data
- * @param[in] mask DMA tag
-*/
-void* CellDmaManager::dma_store(Scheduler *s,void *buf, memaddr addr, uint32 size, uint32 mask)
-{
-    if (size == 0) return buf;
-    mfc_put((volatile void *)buf, addr, size, mask, 0, 0);
-    return buf; 
-}
-
-void CellDmaManager::dma_wait(uint32 mask)
-{
-    (this->*start_dmawait_profile)();
-    mfc_write_tag_mask(1 << mask);
-    mfc_write_tag_update_all();
-    mfc_read_tag_status();
-    __asm__ __volatile__("   sync");
-    (this->*end_dmawait_profile)(&global_wait_time);
-}
-
-#ifndef MAIL_QUEUE
-
-void CellDmaManager::mail_write(memaddr data)
-{
-    (this->*start_dmawait_profile)();
-    spu_write_out_mbox((uint32)data);
-#if 0
-    if (ABIBIT>32) {
-	unsigned long data0 = (unsigned long)data;
-	spu_write_out_mbox((uint32)(data0>>32));
-    }
-#endif
-    (this->*end_dmawait_profile)(&global_mail_time);
-}
-#else
-
-// mail を queue にしたみた。mailの書き出しの待ちあるのかわからないけど
-//TASK_LIST_MAIL は仕事が最後に溜まってしまうけど、MailQueueなら出来る時にmailを書き出す
-//ので多少は効果あるといいな。
-void CellDmaManager::mail_write(memaddr data)
-{
-  unsigned long long wait = 0;
-
-    if (0 != spu_readchcnt(SPU_WrOutMbox)) {
-      if (mail_queue->count()) {
-	  (this->*start_dmawait_profile)();
-          spu_write_out_mbox((uint32)data);
-	  (this->*end_dmawait_profile)(&wait);
-	  mail_write_time += wait;
-	  global_wait_time += wait;
-      } else {
-
-	//mail_queue から poll する
-	(this->*start_dmawait_profile)();
-	spu_write_out_mbox((uint32)mail_queue->recv());
-	(this->*end_dmawait_profile)(&wait);
-	  mail_write_time += wait;
-	  global_wait_time += wait;
-	//mail_queue に加える
-	mail_queue->send(data);
-
-      }
-    } else {
-
-      mail_queue->send(data);
-
-    }
-
-}
-#endif
-
-// tasklist がもうない場合に、MailQueue 全部を書き出す
-void CellDmaManager::mail_write_finish_list(memaddr data)
-{
-
-    unsigned long long wait = 0;
-
-    while (mail_queue->count()) {
-      (this->*start_dmawait_profile)();
-      spu_write_out_mbox((uint32)mail_queue->recv());      
-      (this->*end_dmawait_profile)(&wait);
-
-      global_mail_time += wait;
-      finish_mail_write_time += wait;
-
-    }
-
-    (this->*start_dmawait_profile)();
-    spu_write_out_mbox((uint32)data);
-    (this->*end_dmawait_profile)(&wait);
-
-    global_mail_time += wait;
-    finish_mail_write_time += wait;
-
-}
-
-memaddr CellDmaManager::mail_read()
-{
-
-
-  unsigned long long wait = 0;
-
-    (this->*start_dmawait_profile)();
-    memaddr data = (memaddr)spu_read_in_mbox();
-    
-#if 0 
-    if (ABIBIT>32) {
-	data += (spu_read_in_mbox()<<32);
-    }
-#endif
-    (this->*end_dmawait_profile)(&wait);
-
-    global_mail_time += wait;
-    mail_read_time += wait;
-
-    return data;
-}
-
-memaddr CellDmaManager::task_list_mail_read()
-{
-
-    unsigned long long wait = 0;
-
-    (this->*start_dmawait_profile)();
-    memaddr data = (memaddr)spu_read_in_mbox();
-    (this->*end_dmawait_profile)(&wait);
-
-
-    task_list_mail_read_time += wait;
-    global_mail_time += wait;
-    task_list_read_count += 1;
-
-    return data;
-}
-
-
-
-void *CellDmaManager::dma_loadList(Scheduler *s, ListDataPtr list, uint32 mask)
-{
-    void *buff = s->manager->allocate(list->size);
-    mfc_getl(buff, 0, list->element, sizeof(mfc_list_element_t)*list->length,
-	     mask, 0, 0);
-    return buff;
-}
-
-void CellDmaManager::dma_storeList(ListDataPtr list, void *buff, uint32 mask)
-{
-    mfc_putl(buff, 0, list->element, sizeof(mfc_list_element_t)*list->length,
-	     mask, 0, 0);
-}
-
-CellDmaManager::CellDmaManager() 
-{
-
-    mail_queue = new MailManager();
-    stop_profile();
-}
-void
-CellDmaManager::start_profile()
-{
-    global_busy_time = 0;
-    global_mail_time = 0;
-    global_wait_time = 0;
-    task_list_mail_read_time = 0;
-    finish_mail_write_time = 0;
-    task_list_read_count = 0;
-
-    mail_read_time = 0;
-    mail_write_time = 0;
-
-    start_dmawait_profile =  &CellDmaManager::do_start_dmawait_profile;
-    end_dmawait_profile =  &CellDmaManager::do_end_dmawait_profile;  
-
-}
-
-void
-CellDmaManager::stop_profile()
-{
-    start_dmawait_profile =  &CellDmaManager::null_start_dmawait_profile;
-    end_dmawait_profile =  &CellDmaManager::null_end_dmawait_profile;
-}
-
-/**
- * DMA Wait
- *
- * @param[in] mask Tag for Wait DMA process
- */
-void
-CellDmaManager::do_start_dmawait_profile()
-{
-
-    wait_time = spu_readch(SPU_RdDec); 
-    global_busy_time += busy_time - wait_time;
-    //printf("busy_time = %d, wait_time = %d\n", busy_time, wait_time);
-    spu_writech(SPU_WrDec, 0xffffffff);
-
-    // Measurement of mainMem_alloc
-    mainMemalloc_time += (alloc_busy_time - wait_time)*alloc_flag;
-}
-
-
-void
-CellDmaManager::do_end_dmawait_profile(unsigned long long *counter)
-{
-    wait_time = spu_readch(SPU_RdDec);
-    //printf("wait_time = %d", wait_time);
-    *counter += 0xffffffff - wait_time;
-    busy_time = wait_time;
-
-    // Measurement of mainMem_alloc
-    alloc_busy_time = wait_time;
-}
-
-void CellDmaManager::null_start_dmawait_profile() {}
-void CellDmaManager::null_end_dmawait_profile(unsigned long long *counter) {}
-
-void
-CellDmaManager::show_dma_wait(Scheduler *s, int cpu)
-{
-
-  double r = ((double)global_busy_time)/((double)(
-	global_busy_time+global_wait_time+global_mail_time
-	))*100.0;
-
-  double d = ((double)global_wait_time)/((double)(
-	global_busy_time+global_wait_time+global_mail_time
-	))*100.0;
-
-  double m = ((double)global_mail_time)/((double)(
-	global_busy_time+global_wait_time+global_mail_time
-	))*100.0;
-
-  double tr = ((double)task_list_mail_read_time)/((double)(
-	global_busy_time+global_wait_time+global_mail_time
-	))*100.0;
-
-  double tw = ((double)finish_mail_write_time)/((double)(
-	global_busy_time+global_wait_time+global_mail_time
-	))*100.0;
-
-  double mr = ((double)mail_read_time)/((double)(
-	global_busy_time+global_wait_time+global_mail_time
-	))*100.0;
-
-  double mw = ((double)mail_write_time)/((double)(
-	global_busy_time+global_wait_time+global_mail_time
-	))*100.0;
-
-
-  s->printf("spu%d:\n busy_time = %lld"
-  " wait_time = %lld(%.3g%%), "
-  " mail_time = %lld(%.3g%%), " 
-  " mail_read_time = %lld(%.3g%%), "
-  " mail_write_time = %lld(%.3g%%),\n"
-  " task_list_mail_read_time = %lld(%.3g%%), " 
-  " finish_mail_write_time = %lld(%.3g%%) "
-  " busy_ratio = %.3g%%, \n"
-  " task_list_read_count = %d"
-  " average_tasklist_mail_time = %lld"
-  " mainMem_alloc_time = %lld\n"
-    ,cpu, global_busy_time,
-    global_wait_time, d, global_mail_time, m, 
-    mail_read_time, mr,
-    mail_write_time, mw, 
-    task_list_mail_read_time, tr,
-    finish_mail_write_time, tw,
-    r,
-    task_list_read_count,
-    task_list_mail_read_time / task_list_read_count,	    
-    mainMemalloc_time);
-
-    global_busy_time = 0;
-    global_mail_time = 0;
-    global_wait_time = 0;
-
-    mail_read_time = 0;
-    mail_write_time = 0;
-
-    task_list_mail_read_time = 0;
-    finish_mail_write_time = 0;
-    task_list_read_count = 0;
-
-    mainMemalloc_time = 0;
-    alloc_flag = 0;
-}
-
-uint32
-CellDmaManager::get_tag()
-{
-    static int tag = 16;
-    tag ++;
-    tag &= 0x0f;
-    return tag+16;
-}
-
-/**
- *  DMA buffer offset in rbuf
- */
-void
-CellDmaManager::bound(ListData *list)
-{
-    ListElement *elm = list->element;
-    int *bound = list->bound;
-    int offset=0;
-    for(int i=0;i<list->length;i++) {
-        bound[i] = offset;
-        offset += elm[i].size;
-    }
-}
-
-
-/* end */
--- a/TaskManager/Cell/spe/CellDmaManager.h	Fri Jan 03 19:40:14 2014 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,78 +0,0 @@
-#ifndef INCLUDED_CELL_DMA_MANAGER
-#define INCLUDED_CELL_DMA_MANAGER
-
-#include "base.h"
-#include "types.h"
-#include "DmaManager.h"
-#include "MailManager.h"
-#include "TaskManagerImpl.h"
-
-#include <spu_mfcio.h>
-#include <spu_intrinsics.h>
-
-#define SPU_PROFILE 1
-
-class Scheduler;
-
-class CellDmaManager : public DmaManager {
-public:
-    BASE_NEW_DELETE(CellDmaManager);
-
-    typedef struct dma_list {
-	uint32 addr;  // should be memaddr, but in Cell's specification...
-	uint32 size;
-    }  __attribute__ ((aligned (DEFAULT_ALIGNMENT))) DmaList, *DmaListPtr;
-
-
-    CellDmaManager() ;
-
-    /* variables */
-    unsigned int wait_time, busy_time, alloc_busy_time;
-    unsigned long long global_busy_time, global_wait_time, global_mail_time, mainMemalloc_time;
-    unsigned long long task_list_mail_time;
-    unsigned long long mail_read_time, mail_write_time;
-    unsigned long long task_list_mail_read_time, finish_mail_write_time;
-    int task_list_read_count;
-
-    /* functions */
-    void *dma_load(Scheduler *s, memaddr addr, uint32 size, uint32 mask);
-    void *dma_load1(void *buf, memaddr addr, uint32 size, uint32 mask);
-    void *dma_store(Scheduler *s,void *buf, memaddr addr, uint32 size, uint32 mask);
-
-    void *get_writebuf(Scheduler *s, memaddr addr, uint32 size);
-
-    void dma_wait(uint32 mask) ;
-    void dma_wait(uint32 mask, int cmd) ;
-    void (CellDmaManager::*start_dmawait_profile)();
-    void (CellDmaManager::*end_dmawait_profile)(unsigned long long *counter);
-    void start_profile();
-    void stop_profile();
-
-
-    void show_dma_wait(Scheduler *s, int cpu);
-
-    void mail_write(memaddr data);
-    void mail_write_finish_list(memaddr data);
-    memaddr mail_read();
-    memaddr task_list_mail_read();
-    void *dma_loadList(Scheduler *s,ListDataPtr list, uint32 mask);
-    void dma_storeList(ListDataPtr, void *buff, uint32 mask);
-    uint32 get_tag();
-    void bound(ListData *);
-    void *get_writebuf(Scheduler *s, ListDataPtr, uint32 size) ;
-
-
-    private:
-
-    void do_start_dmawait_profile();
-    void do_end_dmawait_profile(unsigned long long *counter);
-    void null_start_dmawait_profile();
-    void null_end_dmawait_profile(unsigned long long *counter);
-
-    MailManagerPtr mail_queue;
-
-
-/* end */
-}  ;
-
-#endif
--- a/TaskManager/ChangeLog	Fri Jan 03 19:40:14 2014 +0900
+++ b/TaskManager/ChangeLog	Sat Jan 04 19:10:32 2014 +0900
@@ -1,3 +1,19 @@
+2014-1-4 Shinji kONO <kono@ie.u-ryukyu.ac.jp>
+
+	MY_SPE_STATUS_READY は task 終了を待ってから出しているが、あまり、望ましくない。
+	spe_running の意味とは異なってしまうが、もう TaskFinishMail は来ないという意味でも良い。
+	その代わり、本当の終了を待つというプロトコルが必要になる。
+
+	Many Core 側でも必要以上に待っている? もっとも、DMA pipeline が動いてないので
+	ほとんど関係ないが。SchedTaskList は SchedTask を継承しているのでT1を兼ねてる。
+
+	write   T3   T2  T1@  N2  N  M    * MY_SPE_STATUS_READY
+	exec    T2   T1  N2   N   M  T1   @ TaskFinishMail
+	read    T1   N2* N    M   T1 T2   ! TL dma load
+	next    N2   N   M%   TL! T2 T3   % TL mail read
+
+	MY_SPE_STATUS_READY は TaskFinishMail よりも早めに出すほうが良いのか。
+
 2014-1-3 Shinji kONO <kono@ie.u-ryukyu.ac.jp>
 
 	speTaskList を cyclic queue に直しそこねてた。
--- a/TaskManager/Fifo/FifoTaskManagerImpl.cc	Fri Jan 03 19:40:14 2014 +0900
+++ b/TaskManager/Fifo/FifoTaskManagerImpl.cc	Sat Jan 04 19:10:32 2014 +0900
@@ -301,30 +301,3 @@
     return tl;
 }
 
-/**
- * # # # # # # # # 
- *   Abstract Factory Pattern
- * # # # # # # # 
- */
-#ifdef __CERIUM_FIFO__
-TaskManagerImpl*
-create_impl(int num, int gpu, int useRefDma)
-{
-#ifdef __CERIUM_PARALLEL__
-	//マルチコアverでコンパイルしたのにかかわらず、
-	//CPU数が0だと、FifoTaskManagerが呼ばれてしまうので
-	//0の場合は1を入れて、CellTaskManagerが呼ばれるようにする。
-	if (num == 0) num = 1;
-#else
-	num = 0;
-#endif
-
-	if (num == 0) {
-        return new FifoTaskManagerImpl(num);
-	} else {
-		Threads *cpus = new CpuThreads(num,useRefDma);
-		return new CellTaskManagerImpl(num,gpu,cpus);
-	}
-}
-#endif // __CERIUM_FIFO__
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/Fifo/FifoTaskManagerImplFactory.cc	Sat Jan 04 19:10:32 2014 +0900
@@ -0,0 +1,9 @@
+#include "FifoTaskManagerImpl.h"
+
+TaskManagerImpl*
+create_impl(int num, int gpu, int useRefDma)
+{
+	num = 0;
+        return new FifoTaskManagerImpl(num);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/Gpu/GpuTaskManagerFactory.cc	Sat Jan 04 19:10:32 2014 +0900
@@ -0,0 +1,12 @@
+#define DEBUG
+#include "GpuTaskManagerImpl.h"
+#include "GpuThreads.h"
+
+TaskManagerImpl *create_impl(int num, int num_gpu, int useRefDma)
+{
+    int io_num = 2; // two threads for I/O
+    init_task_list(gpu_task_list);
+    Threads *cpus = new CpuThreads(num, io_num, useRefDma,num_gpu);
+    num += num_gpu; // for GPU
+    return new GpuTaskManagerImpl(num, num_gpu, cpus);
+}
--- a/TaskManager/Makefile.def	Fri Jan 03 19:40:14 2014 +0900
+++ b/TaskManager/Makefile.def	Sat Jan 04 19:10:32 2014 +0900
@@ -31,6 +31,10 @@
 IMPL_GPU_SRCS = $(wildcard $(IMPL_GPU_DIR)/*.cc)
 IMPL_GPU_OBJS = $(IMPL_GPU_SRCS:.cc=.o)
 
+IMPL_MANYCORE_DIR  = ManyCore
+IMPL_MANYCORE_SRCS = $(wildcard $(IMPL_MANYCORE_DIR)/*.cc)
+IMPL_MANYCORE_OBJS = $(IMPL_MANYCORE_SRCS:.cc=.o)
+
 ABIBIT = 64
 
 OPT = -g -O0 -DMAIL_QUEUE   #-DEARLY_TOUCH 
--- a/TaskManager/Makefile.gpu	Fri Jan 03 19:40:14 2014 +0900
+++ b/TaskManager/Makefile.gpu	Sat Jan 04 19:10:32 2014 +0900
@@ -19,8 +19,7 @@
 
 ALL_OBJS = $(KERN_MAIN_OBJS) $(KERN_PPE_OBJS) $(KERN_SCHED_OBJS) \
 	$(KERN_SYSTASK_OBJS) $(IMPL_FIFO_OBJS) $(KERN_MEM_OBJS) \
-	$(IMPL_GPU_OBJS) Cell/spe/SpeTaskManagerImpl.o Cell/CellTaskManagerImpl.o  \
-	    Cell/MainScheduler.o Cell/ManyCoreDmaManager.o Cell/PreRefDmaManager.o Cell/ReferencedDmaManager.o
+	$(IMPL_GPU_OBJS) Cell/CellTaskManagerImpl.o  
 
 Makefile.dep: 
 	make -f Makefile.gpu depend
--- a/TaskManager/Makefile.parallel	Fri Jan 03 19:40:14 2014 +0900
+++ b/TaskManager/Makefile.parallel	Sat Jan 04 19:10:32 2014 +0900
@@ -19,9 +19,8 @@
 default: $(TARGET)
 
 ALL_OBJS = $(KERN_MAIN_OBJS) $(KERN_PPE_OBJS) $(KERN_SCHED_OBJS) \
-	$(KERN_SYSTASK_OBJS) $(IMPL_FIFO_OBJS) $(KERN_MEM_OBJS) \
-	Cell/spe/SpeTaskManagerImpl.o Cell/CellTaskManagerImpl.o
-            Cell/MainScheduler.o Cell/ManyCoreDmaManager.o Cell/PreRefDmaManager.o Cell/ReferencedDmaManager.o
+	$(KERN_SYSTASK_OBJS) $(IMPL_MANYCORE_OBJS) $(KERN_MEM_OBJS) \
+	Cell/CellTaskManagerImpl.o
 
 Makefile.dep: 
 	make -f Makefile.parallel depend
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/ManyCore/MainScheduler.cc	Sat Jan 04 19:10:32 2014 +0900
@@ -0,0 +1,35 @@
+#include <stdlib.h>
+#include <string.h>
+#include "MainScheduler.h"
+#include "TaskManagerImpl.h"
+#include "FifoDmaManager.h"
+#include "ReferencedDmaManager.h"
+#include "PreRefDmaManager.h"
+#include "ManyCoreDmaManager.h"
+#include "error.h"
+
+void
+MainScheduler::init_impl(int useRefDma)
+{
+    if (useRefDma & 0x10) {
+        fifoDmaManager = new PreRefDmaManager();
+    } else if (useRefDma & 0x01) {
+        fifoDmaManager = new ManyCoreDmaManager();
+    } else {
+        fifoDmaManager = new ReferencedDmaManager();
+    }
+    connector = fifoDmaManager;
+    connector->init();
+}
+
+/**
+ * メインメモリ領域を allocate する。
+ * Fifo バージョンだから、まあ普通に malloc でおk
+ */
+void
+MainScheduler::mainMem_alloc(int id, int size)
+{
+    mainMemList[id] = (memaddr)manager->allocate(size);
+}
+
+/* end */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/ManyCore/MainScheduler.h	Sat Jan 04 19:10:32 2014 +0900
@@ -0,0 +1,30 @@
+#ifndef INCLUDED_MAIN_SCHEDULER
+#define INCLUDED_MAIN_SCHEDULER
+
+#include "Scheduler.h"
+#include "FifoDmaManager.h"
+
+class MainScheduler : public Scheduler {
+ protected:
+    FifoDmaManager *fifoDmaManager;
+ public:
+    ~MainScheduler(void) {}
+    void init_impl(int useRefDma);
+    void mainMem_alloc(int id, int size);
+    void mail_write_from_host(memaddr data) {
+        fifoDmaManager->mail_write_from_host(data);
+    }
+    int min_cpu() { return mincpu; }
+    int max_cpu() {return maxcpu; }
+
+    memaddr mail_read_from_host() {
+        return fifoDmaManager->mail_read_from_host();
+    }
+
+    int has_mail_from_host() {
+        return fifoDmaManager->has_mail_from_host();
+    }
+
+};
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/ManyCore/ManyCoreDmaManager.cc	Sat Jan 04 19:10:32 2014 +0900
@@ -0,0 +1,75 @@
+#include "ReferencedDmaManager.h"
+#include "Scheduler.h"
+#include "TaskManagerImpl.h"
+#include <string.h>
+
+void *
+ReferencedDmaManager::dma_load(Scheduler *s, memaddr addr, uint32 size, uint32 mask)
+{
+    return (void*)addr;
+}
+
+void *
+ReferencedDmaManager::dma_load1(void *buf, memaddr addr, uint32 size, uint32 mask)
+{
+//    void *from = (void*)addr;
+//    if (buf != from) 
+//        memcpy(buf, from, size);
+    return (void*)addr;
+}
+
+
+void *
+ReferencedDmaManager::dma_loadList(Scheduler *s, ListDataPtr list, uint32 mask)
+{
+    ListElementPtr elm = &list->element[0];
+    return (void*)elm->addr; // ==readbuf
+}
+
+void
+ReferencedDmaManager::free_(void *buf) {
+}
+
+/**
+ *  DMA buffer offset in rbuf
+ *
+ *  faked offset for real address must be set to get correct address by
+ *    return (void*)((char*)readbuf + inListData.bound[index]);
+ *
+ */
+void
+ReferencedDmaManager::bound(ListData *list)
+{
+    ListElement *elm = list->element;
+    long *bound = list->bound;
+#ifdef __CERIUM_CELL__
+    uint32  readbuf = elm[0].addr;
+#else
+    memaddr readbuf = elm[0].addr;
+#endif
+    for(int i=0;i<list->length;i++) {
+        bound[i] = elm[i].addr-readbuf;
+    }
+}
+
+void *
+ReferencedDmaManager::get_writebuf(Scheduler *s,memaddr addr, uint32 size)
+{
+    return (void*)addr;
+}
+
+void *
+ReferencedDmaManager::dma_store(void *buf, memaddr addr, uint32 size, uint32 mask)
+{
+//    void *to = (void*)addr;
+//    if (buf != to) 
+//        memcpy(to, buf, size);
+    return (void*)addr;
+}
+
+void
+ReferencedDmaManager::dma_storeList(ListDataPtr list, void *buff, uint32 mask)
+{
+}
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/ManyCore/ManyCoreDmaManager.h	Sat Jan 04 19:10:32 2014 +0900
@@ -0,0 +1,20 @@
+#ifndef INCLUDED_MANYCORE_DMA_MANAGER
+#define INCLUDED_MANYCORE_DMA_MANAGER
+
+#include "FifoDmaManager.h"
+#include "SynchronizedMailManager.h"
+
+class ManyCoreDmaManager : public FifoDmaManager {
+
+public:
+
+    virtual void init()
+    {
+        mail_queue1 = new SynchronizedMailManager();
+        mail_queue2 = new SynchronizedMailManager();
+    }
+
+
+} ;
+
+#endif/* REFERENCED_DMA_MANAGER */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/ManyCore/ManyCoreTaskManagerFactory.cc	Sat Jan 04 19:10:32 2014 +0900
@@ -0,0 +1,8 @@
+#include "ManyCoreTaskManagerImpl.h"
+
+TaskManagerImpl *create_impl(int num, int num_gpu, int useRefDma)
+{
+    int io_num = 2; // two threads for I/O
+    Threads *cpus = new CpuThreads(num, io_num, useRefDma);
+    return new ManyCoreTaskManagerImpl(num, num_gpu, cpus);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/ManyCore/PreRefDmaManager.cc	Sat Jan 04 19:10:32 2014 +0900
@@ -0,0 +1,19 @@
+#include "PreRefDmaManager.h"
+#include "Scheduler.h"
+#include "TaskManagerImpl.h"
+
+void *
+PreRefDmaManager::dma_load(Scheduler *s, memaddr addr, uint32 size, uint32 mask)
+{
+
+#ifdef __CERIUM_FIFO__
+	asm("prefetcht0 %0"::"m"(addr):"memory");
+#endif // __CERIUM_FIFO__
+
+
+	return (void*)addr;
+}
+
+void
+PreRefDmaManager::free_(void *buf) {
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/ManyCore/PreRefDmaManager.h	Sat Jan 04 19:10:32 2014 +0900
@@ -0,0 +1,14 @@
+#ifndef INCLUDED_PRE_REFERENCED_DMA_MANAGER
+#define INCLUDED_PRE_REFERENCED_DMA_MANAGER
+
+#include "ReferencedDmaManager.h"
+
+class PreRefDmaManager : public ReferencedDmaManager {
+
+public:
+	/* functions */
+    virtual void *dma_load(Scheduler *s, memaddr addr, uint32 size, uint32 mask);
+    virtual void free_(void *buf);
+} ;
+
+#endif/* PRE_REFERENCED_DMA_MANAGER */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/ManyCore/ReferencedDmaManager.cc	Sat Jan 04 19:10:32 2014 +0900
@@ -0,0 +1,75 @@
+#include "ReferencedDmaManager.h"
+#include "Scheduler.h"
+#include "TaskManagerImpl.h"
+#include <string.h>
+
+void *
+ReferencedDmaManager::dma_load(Scheduler *s, memaddr addr, uint32 size, uint32 mask)
+{
+    return (void*)addr;
+}
+
+void *
+ReferencedDmaManager::dma_load1(void *buf, memaddr addr, uint32 size, uint32 mask)
+{
+//    void *from = (void*)addr;
+//    if (buf != from) 
+//        memcpy(buf, from, size);
+    return (void*)addr;
+}
+
+
+void *
+ReferencedDmaManager::dma_loadList(Scheduler *s, ListDataPtr list, uint32 mask)
+{
+    ListElementPtr elm = &list->element[0];
+    return (void*)elm->addr; // ==readbuf
+}
+
+void
+ReferencedDmaManager::free_(void *buf) {
+}
+
+/**
+ *  DMA buffer offset in rbuf
+ *
+ *  faked offset for real address must be set to get correct address by
+ *    return (void*)((char*)readbuf + inListData.bound[index]);
+ *
+ */
+void
+ReferencedDmaManager::bound(ListData *list)
+{
+    ListElement *elm = list->element;
+    long *bound = list->bound;
+#ifdef __CERIUM_CELL__
+    uint32  readbuf = elm[0].addr;
+#else
+    memaddr readbuf = elm[0].addr;
+#endif
+    for(int i=0;i<list->length;i++) {
+        bound[i] = elm[i].addr-readbuf;
+    }
+}
+
+void *
+ReferencedDmaManager::get_writebuf(Scheduler *s,memaddr addr, uint32 size)
+{
+    return (void*)addr;
+}
+
+void *
+ReferencedDmaManager::dma_store(void *buf, memaddr addr, uint32 size, uint32 mask)
+{
+//    void *to = (void*)addr;
+//    if (buf != to) 
+//        memcpy(to, buf, size);
+    return (void*)addr;
+}
+
+void
+ReferencedDmaManager::dma_storeList(ListDataPtr list, void *buff, uint32 mask)
+{
+}
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/ManyCore/ReferencedDmaManager.h	Sat Jan 04 19:10:32 2014 +0900
@@ -0,0 +1,25 @@
+#ifndef INCLUDED_REFERENCED_DMA_MANAGER
+#define INCLUDED_REFERENCED_DMA_MANAGER
+
+#include "ManyCoreDmaManager.h"
+
+class ReferencedDmaManager : public ManyCoreDmaManager {
+
+public:
+	/* functions */
+    virtual void *dma_load(Scheduler *s, memaddr addr, uint32 size, uint32 mask);
+    virtual void *dma_load1(void *buf, memaddr addr, uint32 size, uint32 mask);
+    virtual void *dma_loadList(Scheduler *s, ListDataPtr list, uint32 mask);
+    virtual void *dma_store(void *buf, memaddr addr, uint32 size, uint32 mask);
+    virtual void dma_storeList(ListDataPtr list, void *buff, uint32 mask);
+    virtual void *get_writebuf(Scheduler *s,memaddr addr, uint32 size);
+
+    virtual void free_(void *buff);
+    virtual void bound(ListData *list);
+    virtual void set_mail_waiter(SemPtr w) {
+        mail_queue1->set_waiter(w);
+    }
+
+} ;
+
+#endif/* REFERENCED_DMA_MANAGER */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/include/TaskManagerFactory.h	Sat Jan 04 19:10:32 2014 +0900
@@ -0,0 +1,3 @@
+#include "TaskManagerImpl.h"
+
+extern TaskManagerImpl *create_impl(int num, int num_gpu, int useRefDma);
--- a/TaskManager/kernel/ppe/CpuThreads.cc	Fri Jan 03 19:40:14 2014 +0900
+++ b/TaskManager/kernel/ppe/CpuThreads.cc	Sat Jan 04 19:10:32 2014 +0900
@@ -8,7 +8,6 @@
 #include "SysFunc.h"
 #include "SchedNop.h"
 #include "SpeTaskManagerImpl.h"
-#include "CellScheduler.h"
 #include <fcntl.h>
 
 SchedExternTask(ShowTime);
--- a/TaskManager/kernel/ppe/TaskManager.cc	Fri Jan 03 19:40:14 2014 +0900
+++ b/TaskManager/kernel/ppe/TaskManager.cc	Sat Jan 04 19:10:32 2014 +0900
@@ -1,4 +1,5 @@
 #include "TaskManager.h"
+#include "TaskManagerFactory.h"
 #include "Scheduler.h"
 #include "HTask.h"
 #include "Task.h"
@@ -14,22 +15,15 @@
 }
 
 /**
- * create_impl(int);
- *
- * [cell版] ../../Cell/CellTaskManagerImpl.cpp
- * [fifo版] ../../Fifo/FifoTaskManagerImpl.cpp
- * で定義されています。コンパイル時に
+ * TaskManagerFactory
+ *      create_impl(int);
  *
  * % make cell ってすると cell 版が、
  * % make fifo ってすると fifo 版 がリンクされるようにしているので
  * それに応じて create_impl が返す値を変えています。
  * cell だったら CellManagerImpl, fifo だったら FifoManagerImpl です。
  *
- * 今までは ifdef CELL とか書いてましたわ。どっちがいいかね
- *
- * てか、普通に TaskManagerImpl に関数持たせた方が早いか・・・?
  */
-extern TaskManagerImpl* create_impl(int,int,int);
 
 void
 TaskManager::init(int spuIdle, int export_task_log, int useRefDma=0)