Mercurial > hg > Game > Cerium

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/Cell/CellTaskManagerImpl.cc.orig	Thu Jul 19 11:56:35 2012 +0900
@@ -0,0 +1,374 @@
+#define DEBUG
+#include "error.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "CellTaskManagerImpl.h"
+#include "HTask.h"
+#include "QueueInfo.h"
+#include "ExportTaskLog.h"
+#include "SchedTask.h"
+#include "MainScheduler.h"
+#include "types.h"
+#include "SysFunc.h"
+
+static void send_alloc_reply(CellTaskManagerImpl *tm, int id,
+		Threads *speThreads);
+
+CellTaskManagerImpl::~CellTaskManagerImpl() {
+
+	delete speThreads;
+	delete[] speTaskList;
+
+	delete ppeManager;
+}
+
+void CellTaskManagerImpl::init(int spuIdle_,int useRefDma) {
+	spe_running = 0;
+	spuIdle = spuIdle_;
+
+	// 実行される Task 用の パイプライン用のダブルバッファ
+	speTaskList = new QueueInfo<TaskList>*[machineNum]; // spe上の走っている Task の配列
+	taskListInfo = new QueueInfo<TaskList>*[machineNum]; // 次に走る Task の配列
+
+
+	for (int i = 0; i < machineNum; i++) {
+		taskListInfo[i] = new QueueInfo<TaskList> (taskListPool);
+		speTaskList[i] = new QueueInfo<TaskList> (taskListPool);
+	}
+
+	// PPE 側の管理をする Manager
+	ppeManager = new FifoTaskManagerImpl(machineNum);
+	// 大半のTaskQueueInfoは、共有される
+	MainScheduler *mscheduler = new MainScheduler;
+	set_scheduler(mscheduler);
+	ppeManager->init(mscheduler, this, useRefDma); // ここで HTaskInfo が共有される。
+
+	speThreads->init();
+
+	// 実行可能な HTask のリスト。 FifoTaskManager と共有される
+	activeTaskQueue = ppeManager->activeTaskQueue;
+	// HTask の factory。 HTaskInfo ならなんでもいい。
+	htaskImpl = activeTaskQueue; // any HTaskInfo
+
+
+	ppeManager->get_scheduler()->set_manager(this);
+
+	// Task 内からManager->task_create() とかするときに必要なTaskManager。
+	// 現状では ppe 側からしか動かない
+	// spe 側から Task create できない
+	schedTaskManager = new SchedTask();
+	schedTaskManager->init(0, 0, ppeManager->get_scheduler(), 0);
+	ppeManager->schedTaskManager = schedTaskManager;
+}
+
+void CellTaskManagerImpl::append_activeTask(HTaskPtr task) {
+	if (task->cpu_type == CPU_PPE) {
+		ppeManager->append_activeTask(task);
+	} else {
+		activeTaskQueue->addLast(task);
+	}
+}
+
+// SPE_ANY が指定されていた時に
+// これをインクリメントしつつ呼ぶことにする。
+unsigned int cur_anySpeid = 0;
+
+/**
+ * ActiveTaskQueue から Task を
+ * 各 SPE に渡す (backgound) TaskList に入れる
+ *
+ * ここの activeTaskQueue は FifoTaskManagerImpl のと意味が違い、
+ * spe に渡される Task だけ入っている
+ */
+void CellTaskManagerImpl::set_runTaskList(QueueInfo<HTask> *activeTaskQueue) {
+	int speid;
+	HTaskPtr htask = activeTaskQueue->getFirst();
+	while (htask != NULL) {
+
+		if (htask->cpu_type == CPU_PPE) {
+
+			htask = activeTaskQueue->getNext(htask);
+
+		} else {
+			if (htask->cpu_type == SPE_ANY) {
+				speid = cur_anySpeid++;
+			} else {
+				// -1 してるのは
+				// htask->cpu_type - CPU_SPE で
+				// SPE0 = 1, SPE1 = 2, ... SPE5 = 6 ってなってるので
+				// 配列的 (SPE0 = arr[0], SPE1 = arr[1]) にするため
+				speid = htask->cpu_type - CPU_SPE - 1;
+			}
+
+			speid %= machineNum;
+			set_taskList(htask, taskListInfo[speid]);
+
+			HTaskPtr next = activeTaskQueue->getNext(htask);
+			activeTaskQueue->remove(htask);
+			htask = next;
+
+		}
+	}
+}
+
+void CellTaskManagerImpl::sendTaskList() {
+	for (int id = 0; id < machineNum; id++) {
+		mail_check(id);
+		if (!speTaskList[id]->empty()) {
+			continue; // まだ、走ってる
+		}
+		if (!taskListInfo[id]->empty()) {
+			// SPE に送る TaskList の準備
+			send_taskList(id);
+			spe_running++;
+		}
+	}
+}
+
+void CellTaskManagerImpl::poll() {
+	set_runTaskList(activeTaskQueue);
+	// TaskList 待ちの SPE に TaskList を送る
+	sendTaskList();
+}
+
+void CellTaskManagerImpl::debug_check_spe_idle(
+		QueueInfo<HTask> * activeTaskQueue, int spe_running_) {
+	printf("spu_idle! spe_running = %d : activeTaskQueue->length = %d \n",
+			spe_running_, activeTaskQueue->length());
+	HTaskPtr task = activeTaskQueue->getFirst();
+	int tmp_i = 0;
+	do {
+		printf("task_name = %s ,", ppeManager->get_task_name(task));
+		printf("cpu = [%d], count = %d", task->cpu_type, tmp_i);
+		tmp_i++;
+	} while ((task = activeTaskQueue->getNext(task)) != 0);
+	printf("\n");
+}
+
+void CellTaskManagerImpl::run() {
+    int spu_limit = spuIdle;
+    if (machineNum == 0) {
+        ppeManager->run();
+        return;
+    }
+
+    do {
+        // PPE side
+        ppeManager->poll();
+        // SPE side
+        do {
+            poll();
+        } while (ppeManager->activeTaskQueue->empty() && spe_running > 0);
+
+        if (spe_running < spu_limit) {
+            debug_check_spe_idle(ppeManager->activeTaskQueue, spe_running);
+        }
+
+    } while (!ppeManager->activeTaskQueue->empty() || !activeTaskQueue->empty() || spe_running > 0);
+    if (!waitTaskQueue->empty()) {
+        show_dead_lock_info();
+    }
+
+}
+
+static void loop_check(HTask *p, HTask *me, int depth) {
+	if (p == me)
+		printf("*%lx ", (long) p); // loop
+	if (depth == 0)
+		return;
+	QueueInfo<TaskQueue> *w = p->wait_i;
+	if (w) {
+		for (TaskQueue *q = w->getFirst(); q; q = w->getNext(q)) {
+			loop_check(q->task, me, depth - 1);
+		}
+	}
+}
+
+void CellTaskManagerImpl::show_dead_lock_info() {
+	get_scheduler()-> printf("Dead lock detected\n   ppe queue %d\n",
+			ppeManager->activeTaskQueue->length());
+	// 確か waitQueue は共通...
+	// get_scheduler()-> printf("   wait queue %d\n",ppeManager->waitTaskQueue->length());
+	get_scheduler()-> printf("   wait queue %d\n", waitTaskQueue->length());
+	for (HTask *p = waitTaskQueue->getFirst(); p; p = waitTaskQueue->getNext(p)) {
+		printf("  Waiting task%d %lx", p->command, (long) p);
+		QueueInfo<TaskQueue> *w = p->wait_i;
+		if (w) {
+			for (TaskQueue *q = w->getFirst(); q; q = w->getNext(q)) {
+				printf("    waiting task%d %lx", q->task->command,
+						(long) q->task);
+				if (!waitTaskQueue->find(q->task)) {
+					printf("!"); // stray task
+				}
+				loop_check(q->task, p, 10);
+			}
+		}
+		printf("\n");
+	}
+	get_scheduler()-> printf("   spe queue %d\n", activeTaskQueue->length());
+	for (int i = 0; i < machineNum; i++) {
+		get_scheduler()-> printf("   spe %d send %d wait %d\n", i,
+				speTaskList[i]->length(), taskListInfo[i]->length());
+	}
+}
+
+/**
+ * SPE からのメールをチェックする
+ */
+
+void CellTaskManagerImpl::mail_check(int id) {
+	memaddr data;
+
+	// SPE Scheduler からの mail check
+	while (speThreads->has_mail(id, 1, &data)) {
+		if (data == (memaddr) MY_SPE_STATUS_READY) {
+			//  MY_SPE_STATUS_READY: SPE が持ってた Task 全て終了
+			// freeAll する前に循環リストに戻す
+			speTaskList[id]->getLast()->next = speTaskList[id];
+			speTaskList[id]->freeAll();
+			spe_running--;
+			// printf("SPE %d status ready, %d running\n",id, spe_running);
+		} else if (data == (memaddr) MY_SPE_COMMAND_MALLOC) {
+			// MY_SPE_COMMAND_MALLOC   SPE からのmain memory request
+			send_alloc_reply(this, id, speThreads);
+		} else if (data > (memaddr) MY_SPE_NOP) {
+#ifdef TASK_LIST_MAIL
+			TaskListPtr list = (TaskListPtr)data;
+			check_task_list_finish(schedTaskManager, list, waitTaskQueue);
+#else
+			// 終了したタスク(PPEにあるのでアドレス)
+			HTaskPtr task = (HTaskPtr) data;
+#if 0
+			if (task->cpu_type != CPU_SPE) {
+				const char *name = get_task_name(task);
+				if (name != NULL) {
+					printf("[SPE] ");
+					printf("Task id : %d, ", task->command);
+					printf("Task name : %s\n", name);
+				}
+			}
+#endif
+#ifndef NOT_CHECK
+
+			if (task != NULL) {
+				//SPE で処理された Task が返ってくるはず。それがもし、type PPE なら・・・
+				if (task->cpu_type == CPU_PPE) {
+					printf("attention : PPE task run on SPE\n");
+					printf("Task id : %d\n", task->command);
+					const char *name = get_task_name(task);
+					if (name != NULL) {
+						printf("Task name : %s\n", name);
+					}
+				}
+			}
+
+#endif
+
+			task->post_func(schedTaskManager, task->post_arg1, task->post_arg2);
+			check_task_finish(task, waitTaskQueue);
+#endif
+		}
+		// MY_SPE_NOP: 特に意味のないコマンド
+	}
+}
+
+void CellTaskManagerImpl::polling() {
+	// may  call recursively check_task_list_finish()
+	// we need fifo here
+	for (int i = 0; i < machineNum; i++) {
+		mail_check(i);
+	}
+}
+
+static void send_alloc_reply(CellTaskManagerImpl *tm, int id,
+		Threads *speThreads) {
+
+	/**
+	 * info[0] = alloc_id; (CellScheduler::mainMem_alloc 参照)
+	 * info[1] = alloc_addr;
+	 */
+	memaddr alloc_info[2];
+	long alloc_size;
+	long command;
+
+	speThreads->get_mail(id, 2, alloc_info);
+	command = (long) alloc_info[0];
+	alloc_size = (long) alloc_info[1];
+
+	alloc_info[1] = (memaddr) tm->allocate(alloc_size);
+	//__debug_ppe("[PPE] MALLOCED 0x%lx from [SPE %d]\n", alloc_info[1],id);
+	// 今のところ何もしてない。どうも、この allocate を free
+	// するのは、SPE task が返した値を見て行うらしい。それは、
+	// 忘れやすいのではないか?
+	speThreads->add_output_tasklist(command, alloc_info[1], alloc_size);
+
+	speThreads->send_mail(id, 2, alloc_info);
+}
+
+/**
+ * 条件を満たしたら SPE に TaskList を送信する
+ * 条件1. SPE が持ってた TaskList を終了して、次の TaskList を待ってる
+ * 条件2. SPE に送る TaskList に Task がある
+ *
+ * SPE で実行終了した speTaskList  と
+ * これから実行する taskListInfo  のバッファを入れ替える
+ */
+void CellTaskManagerImpl::send_taskList(int id) {
+	// speTaskList は走り終わった ppe の Task の List.
+	// taskListInfo はこれから走る Task の List.
+	// 交換して実行する
+	QueueInfo<TaskList> *tmp = taskListInfo[id];
+	taskListInfo[id] = speTaskList[id];
+	speTaskList[id] = tmp;
+
+	// speTaskList は本来は循環リストなのだけど、実行中は線形リストである。
+	// spe の Task が終了した時点でなおす。
+	tmp->getLast()->next = 0;
+	TaskListPtr p = tmp->getFirst();
+	// printf("SPE %d task list sending\n",id);
+	speThreads->send_mail(id, 1, (memaddr *) &p);
+	// printf("SPE %d task list sent\n",id);
+}
+
+void CellTaskManagerImpl::show_profile() {
+	for (int id = 0; id < machineNum; id++) {
+		HTaskPtr t = schedTaskManager->create_task(ShowTime, 0, 0, 0, 0);
+		t->set_cpu((CPU_TYPE) (id + SPE_0));
+		t->spawn();
+	}
+}
+
+void CellTaskManagerImpl::start_profile() {
+	for (int id = 0; id < machineNum; id++) {
+		HTaskPtr t = schedTaskManager->create_task(StartProfile, 0, 0, 0, 0);
+		t->set_cpu((CPU_TYPE) (id + SPE_0));
+		t->spawn();
+	}
+}
+
+void CellTaskManagerImpl::export_task_log() {
+    ExportTaskLog _export(taskLogQueue);
+    _export.printOut();
+}
+
+void CellTaskManagerImpl::print_arch() {
+	printf("CellTaskManager\n");
+}
+
+TaskListPtr CellTaskManagerImpl::createTaskList()
+{
+        TaskListPtr tl = taskListInfo[0]->create();
+        bzero(tl->tasks,sizeof(Task)*TASK_MAX_SIZE);
+	return tl;
+}
+
+
+#ifdef __CERIUM_CELL__
+TaskManagerImpl*
+create_impl(int num, int useRefDma)
+{
+	Threads *cpus = new SpeThreads(num);
+	return new CellTaskManagerImpl(num,cpus);
+}
+#endif // __CERIUM_CELL
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/Cell/SpeThreads.cc.orig	Thu Jul 19 11:56:35 2012 +0900
@@ -0,0 +1,169 @@
+#include <stdlib.h>
+#include "types.h"
+#include "SpeThreads.h"
+#include "Scheduler.h"
+
+
+SpeThreads::SpeThreads(int num) : cpu_num(num) {}
+
+SpeThreads::~SpeThreads(void)
+{
+    memaddr mail = (memaddr)MY_SPE_COMMAND_EXIT;
+    int ret;
+
+    for (int i = 0; i < cpu_num; i++) {
+        send_mail(i, 1, &mail);
+    }
+
+    for (int i = 0; i < cpu_num; i++) {
+        pthread_join(threads[i], NULL);
+        ret = spe_context_destroy(spe_ctx[i]);
+        if (ret) {
+            perror("[~SpeThreads] spe_context_destroy");
+        }
+    }
+
+    spe_image_close(spe_handle);
+
+    delete [] spe_ctx;
+    delete [] threads;
+    delete [] args;
+}
+
+void*
+SpeThreads::spe_thread_run(void *arg)
+{
+    unsigned int entry = SPE_DEFAULT_ENTRY;
+    //spe_context_ptr_t ctx = (spe_context_ptr_t)arg;
+    thread_arg_t *arg_t = (thread_arg_t *)arg;
+
+    spe_stop_info_t stop_info;
+    unsigned long long status;
+
+    spe_context_run(arg_t->ctx, &entry, 0, (void*)arg_t->speid, NULL, &stop_info);
+
+    status = ((stop_info.result.spe_exit_code & 0xff) << 8)
+	| (stop_info.result.spe_signal_code & 0xff);
+
+    switch(stop_info.stop_reason) {
+    case SPE_EXIT:
+	break;
+    case SPE_STOP_AND_SIGNAL:
+	printf("[SPE %d] SPE_STOP_AND_SIGNAL stop_info.result.stop_signal_code=%d\n", arg_t->speid, stop_info.result.spe_signal_code);
+	break;
+    case SPE_RUNTIME_ERROR:
+	printf("[SPE %d] SPE_RUNTIME_ERROR stop_info.result.spe_runtime_error=%d\n", arg_t->speid,  stop_info.result.spe_runtime_error);
+	break;
+    case SPE_RUNTIME_EXCEPTION:
+	printf("[SPE %d] SPE_RUNTIME_EXCEPTION stop_info.result.spe_runtime_exception=%d\n", arg_t->speid,  stop_info.result.spe_runtime_exception);
+	break;
+    }
+
+    pthread_exit(NULL);
+}
+
+void*
+SpeThreads::frontend_thread_run(void *arg)
+{
+    pthread_t thread;
+    thread_arg_t *arg_t = (thread_arg_t *)arg;
+
+    pthread_create(&thread, NULL, &spe_thread_run, (void*)arg_t->ctx);
+
+    // mail read の blocking ができれば
+    // ここで呼んだ方が早い。
+
+    pthread_exit(NULL);
+}
+
+void
+SpeThreads::init(void)
+{
+    spe_handle = spe_image_open(SPE_ELF);
+
+    if (spe_handle == NULL) {
+	perror("spe_image_open");
+	exit(EXIT_FAILURE);
+    }
+
+    spe_ctx = new spe_context_ptr_t[cpu_num];
+    threads = new pthread_t[cpu_num];
+    args    = new thread_arg_t[cpu_num];
+
+    for (int i = 0; i < cpu_num; i++) {
+	args[i].speid = i;
+	spe_ctx[i] = spe_context_create(0, NULL);
+	spe_program_load(spe_ctx[i], spe_handle);
+	args[i].ctx = spe_ctx[i];
+    }
+
+    for (int i = 0; i < cpu_num; i++) {
+	pthread_create(&threads[i], NULL,
+		       &spe_thread_run, (void*)&args[i]);
+    }
+}
+
+
+/**
+ * SPE からのメールを受信する。
+ *
+ * @param [speid] SPE ID
+ *
+ * @return Received 32-bit mailbox messages
+ *         if ([ret] < 0) no data read
+ */
+int
+SpeThreads::get_mail(int speid, int count, memaddr *ret)
+{
+    // only used in CellTaskManagerImpl (should be removed?)
+    return spe_out_mbox_read(spe_ctx[speid], (unsigned int*)ret, count*(sizeof(memaddr)/sizeof(int)));
+}
+
+int
+SpeThreads::has_mail(int speid, int count, memaddr *ret)
+{
+/*
+ *  spe_out_mbox_status return only 1, waiting for multiple length
+ *  does not work.
+ */
+    if (spe_out_mbox_status(spe_ctx[speid]) >= 1) {
+      return spe_out_mbox_read(spe_ctx[speid], (unsigned int*)ret, count*(sizeof(memaddr)/sizeof(int)));
+    } else {
+	return 0;
+    }
+}
+
+/**
+ * Inbound Mailbox
+ * メール送信 PPE -> SPE
+ *
+ * なるべく NONBLOCKING なんだけど、
+ * Inbound Mailbox キューに空きがないと送信できないので
+ * 送信する数だけ空いているか確認してから送る。空いて無い場合は待つ。
+ *
+ * 結局待つんだよな。しかも ALL_BLOCKING って実は busy wait だったりするし
+ *
+ * @param [speid] SPE ID
+ * @param [data] Send 32-bit mailbox messages
+ * @param [num] The number of messages
+ */
+void
+SpeThreads::send_mail(int speid, int num, memaddr *data)
+
+{
+    spe_in_mbox_write(spe_ctx[speid], (unsigned int *)data, num*(sizeof(memaddr)/sizeof(int)), SPE_MBOX_ALL_BLOCKING);
+}
+
+void
+SpeThreads::add_output_tasklist(int command, memaddr buff, int alloc_size)
+{
+    /*
+     * output TaskList が無ければ新しく作る
+     * あれば TaskList に allocate した Task を追加
+     * command に対応した Task の初期化を実行する
+     * SPE に data が書き出し終わった後に PPE 側で初期化
+     */
+
+}
+
+/* end */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/Cell/SpeThreads.h.orig	Thu Jul 19 11:56:35 2012 +0900
@@ -0,0 +1,39 @@
+#ifndef INCLUDED_SPE_THREADS
+#define INCLUDED_SPE_THREADS
+
+#include <libspe2.h>
+#include <pthread.h>
+#include "Threads.h"
+#define SPE_ELF "spe-main"
+
+typedef struct arg {
+    int speid;
+    spe_context_ptr_t ctx;
+} thread_arg_t;
+
+
+class SpeThreads : public Threads {
+public:
+    /* constructor */
+    SpeThreads(int num = 1);
+    ~SpeThreads(void);
+
+    /* functions */
+    void init(void);
+    int get_mail(int speid, int count, memaddr *ret); // BLOCKING
+    int has_mail(int speid, int count, memaddr *ret); // NONBLOCK
+    void send_mail(int speid, int num, memaddr *data); // BLOCKING
+    static void *spe_thread_run(void *arg);
+    static void *frontend_thread_run(void *arg);
+    void add_output_tasklist(int command, memaddr buff, int alloc_size);
+
+private:
+    /* variables */
+    spe_program_handle_t *spe_handle;
+    spe_context_ptr_t *spe_ctx;
+    pthread_t *threads;
+    thread_arg_t *args;
+    int cpu_num;
+};
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/Cell/spe/MemIterator.cc	Thu Jul 19 11:56:35 2012 +0900
@@ -0,0 +1,214 @@
+#include "MemIterator.h"
+
+/*
+ * Iterator は MemList にまとめる方がいいかも。
+ *
+ * @param [addr_list] main memory 側のアドレスリスト
+ * @param [mem_list]  memory segment のリスト
+ * @param [cmd] Iterator パターンというべきか.
+ *
+ * cmd は、ms, MemList で情報を持っててもできる
+ *
+ */
+
+MemIterator::MemIterator(AddrListPtr addr_list, MemList *mem_list,
+                         int cmd, SchedTask *smanager)
+{
+
+    this->addr_list  = addr_list;
+    this->mem_list  = mem_list;
+    this->smanager  = smanager;
+
+    read_ms  = NULL;
+    exec_ms  = NULL;
+    write_ms = NULL;
+    free_ms  = NULL;
+
+    // コマンドは、ms 自体に持たせるべきなのかも知れない。
+
+    if (cmd == READ_WRITE) {
+
+        // 始め、必要になるであろう、データを load しておく
+        read_ms = smanager->get_segment(addr_list->addr,mem_list);
+        get_ms = &MemIterator::get_read_write_ms;
+
+    } else if (cmd == READ) {
+
+        // 始め、必要になるであろう、データを load しておく
+        read_ms = smanager->get_segment(addr_list->addr,mem_list);
+        get_ms = &MemIterator::get_read_ms;
+
+    } else if(cmd == WRITE) {
+
+        get_ms = &MemIterator::get_write_ms;
+
+    }
+
+
+}
+
+/*
+ * 次の ms があるか返す
+ *
+ * @retval 1 next がある
+ * @retval 0 next がない
+ */
+int
+MemIterator::hasNext(void)
+{
+    return (addr_list->next) ? 1 : 0;
+}
+
+/*
+ * アドレスのリストから次に使う領域は予想できる
+ * read, write する ms に使う。
+ *
+ * @return アドレスのリストに沿って、msに格納し返す
+ */
+MemorySegmentPtr
+MemIterator::get_read_write_ms(void)
+{
+
+    // main memory に書き出し
+    smanager->put_segment(write_ms);
+
+    /*
+     * 前回の put segment を待つ
+     * ms はメモリ領域を使い回しているので、free 部分も dma 完了を待つ
+     */
+    smanager->wait_segment(free_ms);
+
+    // stage 遷移
+    free_ms   = write_ms;
+    write_ms  = exec_ms;
+    exec_ms   = read_ms;
+
+    // アドレスをリストから追っていく
+    addr_list = addr_list->next;
+
+    // 次必要なものを load し始めておく
+    read_ms   = smanager->get_segment(addr_list->addr, mem_list);
+
+
+    // 前回の get segment を待つ
+    smanager->wait_segment(exec_ms);
+
+    return exec_ms;
+}
+
+/*
+ * read only な ms に使う
+ *
+ * @return アドレスのリストに沿って、msに格納し返す
+ */
+MemorySegmentPtr
+MemIterator::get_read_ms(void)
+{
+
+    // stage 遷移
+    exec_ms = read_ms;
+
+    // アドレスをリストから追っていく
+    addr_list = addr_list->next;
+
+
+    // 次必要なものを load し始めておく
+    read_ms = smanager->get_segment(addr_list->addr,mem_list);
+
+    // 前回の get segment を待つ
+    smanager->wait_segment(exec_ms);
+
+    return exec_ms;
+}
+
+/*
+ * アドレスのリストから次に使う領域は予想できる
+ * write only な ms に使うパターン
+ *
+ * @return free な ms 領域を返す
+ */
+MemorySegmentPtr
+MemIterator::get_write_ms(void)
+{
+
+    /*
+     * 前回の put segment を待つ.
+     * ms はメモリ領域を使い回しているので、free 部分も dma 完了を待つ.
+     * 書き出しのタイミングは memory segment の数による
+     * ので、最適な書き込み完了待ちはできない.
+     * MemList 側に書けば、できるはず.
+     */
+    smanager->wait_segment(free_ms);
+
+    // stage 遷移
+    free_ms   = write_ms;
+
+    // 書きこみ開始。次 get_write_ms が呼ばれる時に wait する。
+    smanager->put_segment(free_ms);
+
+    // free な領域を取得する。dma load する必要はない。
+    write_ms = smanager->get_free_segment(addr_list->addr, mem_list);
+
+    // アドレスをリストから追っていく
+    addr_list = addr_list->next;
+
+    return write_ms;
+}
+
+
+/*
+ * 出しておいた segment 命令を待つ.
+ * まだ、書きこまれていないmsを書きこむ
+ * 最後の締め.
+ */
+
+void
+MemIterator::collect_ms(void)
+{
+    smanager->put_segment(write_ms);
+    smanager->wait_segment(free_ms);
+    smanager->wait_segment(write_ms);
+    smanager->wait_segment(read_ms);
+}
+
+
+/*
+ * 走査中のlistの一部を上書きする.
+ * mainMem (CreateSpan) とかやるとき、使う予定
+ */
+void
+MemIterator::overwrite_list(AddrListPtr list)
+{
+
+    /*
+     * すでに次のリストの read を開始している可能性がある
+     * ので、それをいったん中止、同じ ms に 新しく load をかける。
+     *
+     * でも、これは write only な場合いらない..
+     */
+
+    // dma命令キャンセルする方法があるかな. 調べてみる. あるならwaitするよりそっちがいい.
+    smanager->wait_segment(read_ms);
+
+    /* segment に直接上書きする機能が必要かも. 要実装.
+     * 指定した ms に load する
+     * この場合は、read_ms に新しく list->addr を load する
+     */
+    smanager->overwrite_segment(read_ms, list->addr);
+
+
+    /*
+     * 現在のaddr_list を新しい list に上書きする
+     */
+    AddrListPtr tmp = list;
+
+    while(list->next) {
+        list = list->next;
+    }
+
+    list->next = addr_list->next;
+    addr_list = tmp;
+
+}
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/Gpu/GpuScheduler.cc.orig	Thu Jul 19 11:56:35 2012 +0900
@@ -0,0 +1,128 @@
+#include "GpuScheduler.h"
+#include "ReferencedDmaManager.h"
+#include "SchedTask.h"
+#include "GpuThreads.h"
+#include "stdio.h"
+#include <fcntl.h>
+#include <sys/stat.h>
+
+GpuScheduler::GpuScheduler()
+{
+    init_impl(0);
+}
+
+void
+GpuScheduler::init_impl(int useRefDma)
+{
+    fifoDmaManager = new ReferencedDmaManager();
+    connector = fifoDmaManager;
+}
+
+
+void
+GpuScheduler::run()
+{
+    for (;;) {
+        memaddr params_addr = connector->task_list_mail_read();
+
+        // Get OpenCL infomation
+        GpuThreads* gputhreads = GpuThreads::getInstance();
+        cl_context context = gputhreads->context;
+        cl_command_queue command_queue = gputhreads->command_queue;
+
+        if ((memaddr)params_addr == (memaddr)MY_SPE_COMMAND_EXIT) {
+            clFinish(command_queue);
+            return ;
+        }
+
+        while (params_addr) {
+            TaskListPtr tasklist = (TaskListPtr)connector->dma_load(this, params_addr,
+                                                                    sizeof(TaskList), DMA_READ_TASKLIST);
+
+
+            for (TaskPtr nextTask = tasklist->tasks; nextTask < tasklist->last(); nextTask = nextTask->next()) {
+                cl_kernel& kernel = *task_list[nextTask->command].kernel;
+                int err = CL_SUCCESS;
+                for(int i=0;i<nextTask->param_count;i++) {
+                    err |= clSetKernelArg(kernel,  i, sizeof(memaddr), (cl_mem*)nextTask->param(i));
+                }
+
+                for(int i=0;i<nextTask->inData_count;i++) {
+                    cl_mem memobj = clCreateBuffer(context, CL_MEM_READ_WRITE, nextTask->inData(i)->size, NULL, NULL);
+                    err |= clEnqueueWriteBuffer(command_queue, memobj, CL_TRUE, 0,
+                                                nextTask->inData(i)->size, nextTask->inData(i)->addr, 0, NULL, NULL);
+                    //clSetKernleArg(kernel, cur_index,);
+                }
+                // カーネル引数の設定
+
+                clEnqueueTask(command_queue, kernel, 0, NULL, NULL);
+
+                for(int i=0;i<nextTask->outData_count;i++) {
+                    cl_mem memobj = clCreateBuffer(context, CL_MEM_READ_WRITE, nextTask->outData(i)->size, NULL, NULL);
+                    err |= clEnqueueReadBuffer(command_queue, memobj, CL_TRUE, 0,
+                                               nextTask->outData(i)->size, nextTask->outData(i)->addr, 0, NULL, NULL);
+                }
+            }
+
+            clFlush(command_queue); // waiting for queued task
+            params_addr = (memaddr)tasklist->next;
+        }
+
+        connector->mail_write((memaddr)(tasklist->waiter));
+    }
+
+    // TaskArrayの処理
+
+}
+
+void
+gpu_register_task(int cmd, const char* filename, const char* functionname)
+{
+    GpuThreads* gputhreads = GpuThreads::getInstance();
+    //gputhreads->init();
+    cl_context context = gputhreads->context;
+    cl_device_id device_id = gputhreads->device_id;
+
+    int fp;
+    char *source_str;
+    size_t source_size;
+
+    fp = open(filename, O_RDONLY);
+
+    if (!fp) {
+        fprintf(stderr, "Failed to load kernel.\n");
+        exit(1);
+    }
+
+    struct stat stats;
+    fstat(fp,&stats);
+    off_t size = stats.st_size;
+
+    if (!size) {
+        fprintf(stderr, "Failed to load kernel.\n");
+        exit(1);
+    }
+
+    source_str = (char*)malloc(size);
+    source_size = read(fp, source_str, size);
+    close(fp);
+
+    cl_program program = NULL;
+    cl_int ret = gputhreads->ret;
+    program = clCreateProgramWithSource(context, 1, (const char **)&source_str,
+                                               (const size_t *)&source_size, &ret);
+    clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
+
+    cl_kernel *kernel = new cl_kernel;
+    *kernel = clCreateKernel(program, functionname, &ret);
+
+    task_list[cmd].run = null_run;
+    task_list[cmd].load = null_loader;
+    task_list[cmd].wait = null_loader;
+    task_list[cmd].name = functionname;
+    task_list[cmd].kernel = kernel;
+
+}
+
+/* end */
+
Binary file TaskManager/Gpu/GpuScheduler.o has changed
Binary file TaskManager/Gpu/GpuTaskManagerImpl.o has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/Gpu/GpuThreads.cc.orig	Thu Jul 19 11:56:35 2012 +0900
@@ -0,0 +1,83 @@
+#include "GpuThreads.h"
+#include "GpuScheduler.h"
+#include "TaskManagerImpl.h"
+#include "SpeTaskManagerImpl.h"
+
+GpuThreads::GpuThreads()
+{
+    threads = new pthread_t;
+    args = new gpu_arg;
+}
+
+GpuThreads::~GpuThreads()
+{
+    delete threads;
+    delete args;
+
+    clReleaseCommandQueue(command_queue);
+    clReleaseContext(context);
+}
+
+void
+GpuThreads::init()
+{
+    clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
+    clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, &ret_num_devices);
+    // unavailable GPU
+    if( ret_num_devices == 0) {
+        exit(EXIT_FAILURE);
+    }
+    context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
+    command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
+
+    args->scheduler = new GpuScheduler();
+    args->useRefDma = use_refdma;
+
+    // pthread_create(threads, NULL, &gpu_thread_run, args);
+
+}
+
+void *
+GpuThreads::gpu_thread_run(void *args)
+{
+    gpu_arg *argt = (gpu_arg *) args;
+    Scheduler *g_scheduler = argt->scheduler;
+
+    TaskManagerImpl *manager = new SpeTaskManagerImpl();
+    g_scheduler->init(manager, argt->useRefDma);
+
+    manager->set_scheduler(g_scheduler);
+
+    g_scheduler->run();
+    g_scheduler->finish();
+
+    return NULL;
+}
+
+int
+GpuThreads::get_mail(int speid, int count, memaddr *ret)
+{
+    *ret = args->scheduler->mail_read_from_host();
+    return 1;
+}
+
+int
+GpuThreads::has_mail(int speid, int count, memaddr *ret)
+{
+    if (args->scheduler->has_mail_from_host() != 0) {
+        return get_mail(0, 0, ret);
+    } else {
+        return 0;
+    }
+}
+
+void
+GpuThreads::send_mail(int speid, int num, memaddr *data)
+{
+    args->scheduler->mail_write_from_host(*data);
+}
+
+void
+GpuThreads::add_output_tasklist(int command, memaddr buff, int alloc_size)
+{
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/Gpu/GpuThreads.h.orig	Thu Jul 19 11:56:35 2012 +0900
@@ -0,0 +1,52 @@
+#ifndef INCLUDED_GPU_THREADS
+#define INCLUDED_GPU_THREADS
+
+#include <pthread.h>
+#include "Threads.h"
+#include "GpuScheduler.h"
+
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+// Singleton Pattern
+struct gpu_arg {
+    GpuScheduler *scheduler;
+    int useRefDma;
+};
+
+class GpuThreads : public Threads {
+public:
+    static GpuThreads* getInstance() {
+      static GpuThreads singleton;
+      return &singleton;
+    }
+    ~GpuThreads();
+
+    void init();
+    static void *gpu_thread_run(void *args);
+
+    int get_mail(int speid, int count, memaddr *ret);
+    int has_mail(int speid, int count, memaddr *ret);
+    void send_mail(int speid, int num, memaddr *data);
+    void add_output_tasklist(int command, memaddr buff, int alloc_size);
+
+public:
+    cl_platform_id platform_id;
+    cl_device_id device_id;
+    cl_uint ret_num_platforms;
+    cl_uint ret_num_devices;
+    cl_context context ;
+    cl_command_queue command_queue;
+    cl_int ret;
+
+private:
+    GpuThreads();
+    gpu_arg *args;
+    pthread_t *threads;
+    int use_refdma;
+};
+
+#endif
Binary file TaskManager/Gpu/GpuThreads.o has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/include/types.h.orig	Thu Jul 19 11:56:35 2012 +0900
@@ -0,0 +1,82 @@
+#ifndef INCLUDED_TYPES
+#define INCLUDED_TYPES
+
+#include <stdint.h>
+
+typedef uint16_t uint16;
+typedef uint32_t uint32;
+typedef uint64_t uint64;
+
+//  HOST main memory address
+//       SPU's (void *) is always 32bit (actually 18bit (256kbyte))
+//       memaddr is different from (void *) in SPU.
+//
+#ifdef __SPU__
+#if ABIBIT>32
+typedef uint64_t memaddr;
+#else
+typedef uint32_t memaddr;
+#endif
+#else
+typedef char* memaddr;
+#endif
+
+
+#define Newq(Type,Count) ((Type *)malloc(sizeof(Type)*Count))
+#define ReAlloc(Pointer,Type,Count) ((Type *)realloc((void*)Pointer,sizeof(Type)*Count))
+
+
+#define SPE_ALIGNMENT 16
+#define SPE_ALIGNMENT_FULL 128
+#define SPE_ALIGN __attribute__((aligned(SPE_ALIGNMENT)))
+#define SPE_ALIGN_FULL __attribute__((aligned(SPE_ALIGNMENT_FULL))
+#define ROUND_UP_ALIGN(value, alignment) \
+    (((value) + ((alignment) - 1))&(~((alignment)-1)))
+#define DEFAULT_ALIGNMENT SPE_ALIGNMENT
+//#define DEFAULT_ALIGNMENT SPE_ALIGNMENT_FULL
+
+#define DMA_MAX_SIZE 16384
+
+#define round_up16(value)  ROUND_UP_ALIGN(value, 16)
+#define round_up128(value) ROUND_UP_ALIGN(value, 128)
+
+#define TaskArray (-1)
+#define TaskArray1 (-2)
+
+// SPU 依存 (よろしくないが...)
+
+// ここも typedef しとくか？
+enum {
+// どの方向かで enum 分けるだろjk...
+// PPE -> SPE
+    MY_SPE_NOP = 0,
+    MY_SPE_COMMAND_EXIT,
+    MY_SPE_COMMAND_GO,
+
+// SPE -> PPE
+    MY_SPE_STATUS_BUSY,
+    MY_SPE_STATUS_READY,
+    MY_SPE_COMMAND_MALLOC,
+};
+
+#define MAX_USE_SPE_NUM 32
+
+typedef enum {
+    CPU_PPE = 0, // default
+    GPU_0 = 1,
+    GPU_1 = 2,
+    GPU_2 = 3,
+    GPU_3 = 4,
+    CPU_SPE = 5,
+    SPE_ANY = CPU_SPE,
+    SPE_0 = 6,
+    SPE_1 = 7,
+    SPE_2 = 8,
+    SPE_3 = 9,
+    SPE_4 = 10,
+    SPE_5 = 11,
+
+
+} CPU_TYPE;
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/kernel/ppe/CpuThreads.cc.orig	Thu Jul 19 11:56:35 2012 +0900
@@ -0,0 +1,149 @@
+#include <stdlib.h>
+#include "types.h"
+#include "CpuThreads.h"
+#include "MainScheduler.h"
+#include "SysFunc.h"
+#include "SchedNop.h"
+#include "SpeTaskManagerImpl.h"
+#include "CellScheduler.h"
+
+SchedExternTask(ShowTime);
+SchedExternTask(StartProfile);
+
+
+CpuThreads::CpuThreads(int num, int useRefDma, int start_id) : cpu_num(num), use_refdma(useRefDma), id_offset(start_id) {
+#ifdef __CERIUM_GPU__
+    gpu = new GpuThreds;
+#endif
+    threads = new pthread_t[cpu_num];
+    args    = new cpu_thread_arg_t[cpu_num];
+    wait	= new Sem(0);
+
+}
+
+CpuThreads::~CpuThreads()
+{
+    memaddr mail = (memaddr)MY_SPE_COMMAND_EXIT;
+
+    for (int i = 0; i < cpu_num; i++) {
+        send_mail(i, 1, &mail);
+    }
+
+    for (int i = 0; i < cpu_num; i++) {
+		pthread_join(threads[i], NULL);
+	}
+
+    for (int i = 0; i < cpu_num; i++) {
+    	delete args[i].scheduler;
+    }
+
+    delete [] threads;
+    delete [] args;
+#ifdef __CERIUM_GPU__
+    delete gpu;
+#endif
+}
+
+void *
+CpuThreads::cpu_thread_run(void *args)
+{
+    cpu_thread_arg_t *argt = (cpu_thread_arg_t *) args;
+    Scheduler *c_scheduler = argt->scheduler;
+
+    TaskManagerImpl *manager = new SpeTaskManagerImpl();
+    c_scheduler->init(manager,argt->useRefDma);
+    c_scheduler->id = (int)argt->cpuid;
+
+    manager->set_scheduler(c_scheduler);
+
+    SchedRegister(ShowTime);
+    SchedRegister(StartProfile);
+
+    argt->wait->sem_v();	//準備完了したスレッドができるたびに+1していく
+
+    c_scheduler->run(new SchedNop());
+    c_scheduler->finish();
+
+    return NULL;
+}
+
+void
+//CpuThreads::init()
+CpuThreads::init()
+{
+	for (int i = 0; i < cpu_num; i++) {
+		args[i].cpuid = i + id_offset;
+		args[i].scheduler = new MainScheduler();
+		args[i].wait = wait;
+		args[i].useRefDma = use_refdma;
+	}
+
+    for (int i = 0; i < cpu_num; i++) {
+	pthread_create(&threads[i], NULL,
+		      &cpu_thread_run, (void*)&args[i]);
+    }
+
+    for (int i = 0; i < cpu_num; i++) {
+    	wait->sem_p();
+    }
+}
+
+/**
+ * このCPU からのメールを受信する。
+ *
+ * @param [cpuid] SPE ID
+ *
+ * @return Received 32-bit mailbox messages
+ *         if ([ret] < 0) no data read
+ */
+int
+CpuThreads::get_mail(int cpuid, int count, memaddr *ret)
+{
+
+    *ret = args[cpuid-id_offset].scheduler->mail_read_from_host();
+    return 1;
+}
+
+int
+CpuThreads::has_mail(int cpuid, int count, memaddr *ret)
+{
+	if (args[cpuid-id_offset].scheduler->has_mail_from_host() != 0) {
+		return get_mail(cpuid,count,ret);
+	} else {
+		return 0; //mailがないとき0を返す
+	}
+}
+
+/**
+ * Inbound Mailbox
+ * メール送信 Front End -> CPU
+ *
+ * なるべく NONBLOCKING なんだけど、
+ * Inbound Mailbox キューに空きがないと送信できないので
+ * 送信する数だけ空いているか確認してから送る。空いて無い場合は待つ。
+ *
+ * 結局待つんだよな。しかも ALL_BLOCKING って実は busy wait だったりするし
+ *
+ * @param [cpuid] SPE ID
+ * @param [data] Send 32-bit mailbox messages
+ * @param [num] The number of messages
+ */
+void
+CpuThreads::send_mail(int cpuid, int num, memaddr *data)
+{
+    args[cpuid-id_offset].scheduler->mail_write_from_host(*data);
+}
+
+void
+CpuThreads::add_output_tasklist(int command, memaddr buff, int alloc_size)
+{
+    /*
+     * output TaskList が無ければ新しく作る
+     * あれば TaskList に allocate した Task を追加
+     * command に対応した Task の初期化を実行する
+     * SPE に data が書き出し終わった後に PPE 側で初期化
+     */
+
+}
+
+/* end */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/kernel/ppe/CpuThreads.h.orig	Thu Jul 19 11:56:35 2012 +0900
@@ -0,0 +1,48 @@
+#ifndef INCLUDED_CPU_THREADS
+#define INCLUDED_CPU_THREADS
+
+#include <pthread.h>
+#include "Threads.h"
+#include "TaskManagerImpl.h"
+#include "MainScheduler.h"
+#include "Sem.h"
+#ifdef __CERIUM_GPU__
+#include "GpuThreads.h"
+#endif
+typedef struct cpu_arg {
+    int cpuid;
+    // should be syncrhonized
+    MainScheduler *scheduler;
+    TaskManagerImpl *manager;
+    SemPtr wait;
+	int useRefDma;
+} cpu_thread_arg_t;
+
+class CpuThreads : public Threads {
+public:
+    /* constructor */
+    CpuThreads(int num = 1, int useRefDma = 0, int start_id = 0);
+    ~CpuThreads();
+    static void *cpu_thread_run(void *args);
+
+    /* functions */
+    virtual void init();
+    virtual int get_mail(int speid, int count, memaddr *ret); // BLOCKING
+    virtual int has_mail(int speid, int count, memaddr *ret); // NONBLOCK
+    virtual void send_mail(int speid, int num, memaddr *data); // BLOCKING
+    virtual void add_output_tasklist(int command, memaddr buff, int alloc_size);
+
+private:
+    /* variables */
+    pthread_t *threads;
+    cpu_thread_arg_t *args;
+    SemPtr wait; //スレッド生成時の待ち用
+    int cpu_num;
+	int use_refdma;
+    int id_offset;
+#ifdef __CERIUM_GPU__
+    GpuThreads gpu;
+#endif
+};
+
+#endif
Binary file TaskManager/test/GpuRegistTaskTest/GpuRegistTaskTest has changed
Binary file TaskManager/test/GpuRegistTaskTest/GpuRegistTaskTest.o has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/test/GpuRunTest/GpuRunTest.cc.orig	Thu Jul 19 11:56:35 2012 +0900
@@ -0,0 +1,92 @@
+#include <stdio.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include "TaskManager.h"
+#include "GpuScheduler.h"
+#include "GpuThreads.h"
+#include "Gpufunc.h"
+#include "types.h"
+#define DEFAULT 5
+
+char usr_help_str[] = "GpuRun [length]\n";
+
+extern void gpu_register_task(int, const char*, const char*);
+extern void task_init(void);
+
+void
+print_data(int *data, int size, const char *title)
+{
+    printf("%s ---\n", title);
+    for ( int i = 0; i < size; i++) {
+        printf("%2d ", data[i]);
+    }
+    printf("\n");
+}
+
+
+
+void
+tester(int *indata, int *outdata, int num) {
+
+    //チェック
+    int check = 0;
+    for (int c=0; c<num; c++) {
+        if(outdata[c] == indata[c]*2) {
+            check++;
+        }
+    }
+
+    printf("Computed '%d/%d' correct values\n",check,num);
+
+}
+
+void
+task_init()
+{
+    int cmd = SchedRun;
+
+    GpuThreads* gputhreads = GpuThreads::getInstance();
+    gputhreads->init();
+
+    GpuSchedRegister(cmd, "twice.cl", "twice");
+}
+
+void
+test(TaskManager *manager, long int length) {
+
+    int *indata  = (int *)manager->allocate(sizeof(int)*length);
+    int *outdata = new int[length];
+    int count;
+    for (count=0; count < length ;count++) {
+        indata[count] = count;
+    }
+    print_data(indata, count, "before");
+
+    HTaskPtr schedtask = manager->create_task(SchedRun);
+    schedtask->set_inData(0, indata, sizeof (int)*length);
+    schedtask->set_outData(1, outdata, sizeof (int)*length);
+    schedtask->set_inData(2, &count, sizeof (int));
+    schedtask->set_cpu(SPE_ANY);   // これでは、GPU しか使えないではないか。
+    schedtask->spawn();
+
+}
+
+int
+TMmain(TaskManager *manager, int argc, char* argv[])
+{
+    long int length = DEFAULT;
+
+    if (argc > 1) { //引数が渡されていて、
+        if(atoi(argv[1])) {//数字なら
+            length = atoi(argv[1]);
+        }
+    }
+
+    task_init();
+
+    test(manager, length);
+
+    return 0;
+}
+
+/* end */
Binary file example/Bulk/main.o has changed
Binary file example/Bulk/ppe/Twice.o has changed
Binary file example/Bulk/ppe/task_init.o has changed
Binary file example/Bulk/twice has changed
Binary file example/get_segment/main.o has changed
Binary file example/get_segment/mainMem has changed
Binary file example/get_segment/ppe/Hello.o has changed
Binary file example/get_segment/ppe/Hello1.o has changed
Binary file example/get_segment/ppe/task_init.o has changed