changeset 1477:5ca4e9469c65 draft

remove GpuTaskManagerImpl
author Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
date Thu, 19 Jul 2012 14:03:49 +0900
parents 0094cd28bf41
children beda5f2f88ce
files TaskManager/Cell/CellTaskManagerImpl.cc TaskManager/Cell/CellTaskManagerImpl.cc.orig TaskManager/Cell/SpeThreads.cc.orig TaskManager/Cell/SpeThreads.h.orig TaskManager/Gpu/GpuScheduler.cc TaskManager/Gpu/GpuScheduler.cc.orig TaskManager/Gpu/GpuScheduler.o TaskManager/Gpu/GpuTaskManagerImpl.cc TaskManager/Gpu/GpuTaskManagerImpl.h TaskManager/Gpu/GpuTaskManagerImpl.o TaskManager/Gpu/GpuThreads.cc TaskManager/Gpu/GpuThreads.cc.orig TaskManager/Gpu/GpuThreads.h.orig TaskManager/Gpu/GpuThreads.o TaskManager/include/types.h TaskManager/include/types.h.orig TaskManager/kernel/ppe/CpuThreads.cc TaskManager/kernel/ppe/CpuThreads.cc.orig TaskManager/kernel/ppe/CpuThreads.h TaskManager/kernel/ppe/CpuThreads.h.orig TaskManager/test/GpuRegistTaskTest/GpuRegistTaskTest TaskManager/test/GpuRegistTaskTest/GpuRegistTaskTest.cc TaskManager/test/GpuRegistTaskTest/GpuRegistTaskTest.cc.orig TaskManager/test/GpuRegistTaskTest/GpuRegistTaskTest.o TaskManager/test/GpuRunTest/GpuRunTest.cc.orig example/Bulk/main.o example/Bulk/ppe/Twice.o example/Bulk/ppe/task_init.o example/Bulk/twice example/get_segment/mainMem example/get_segment/ppe/Hello.o example/get_segment/ppe/task_init.o
diffstat 32 files changed, 110 insertions(+), 1672 deletions(-) [+]
line wrap: on
line diff
--- a/TaskManager/Cell/CellTaskManagerImpl.cc	Thu Jul 19 11:56:35 2012 +0900
+++ b/TaskManager/Cell/CellTaskManagerImpl.cc	Thu Jul 19 14:03:49 2012 +0900
@@ -334,7 +334,7 @@
 void CellTaskManagerImpl::show_profile() {
 	for (int id = 0; id < machineNum; id++) {
 		HTaskPtr t = schedTaskManager->create_task(ShowTime, 0, 0, 0, 0);
-		t->set_cpu((CPU_TYPE) (id + 2));
+		t->set_cpu((CPU_TYPE) (id + SPE_0));
 		t->spawn();
 	}
 }
@@ -342,7 +342,7 @@
 void CellTaskManagerImpl::start_profile() {
 	for (int id = 0; id < machineNum; id++) {
 		HTaskPtr t = schedTaskManager->create_task(StartProfile, 0, 0, 0, 0);
-		t->set_cpu((CPU_TYPE) (id + 2));
+		t->set_cpu((CPU_TYPE) (id + SPE_0));
 		t->spawn();
 	}
 }
--- a/TaskManager/Cell/CellTaskManagerImpl.cc.orig	Thu Jul 19 11:56:35 2012 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,374 +0,0 @@
-#define DEBUG
-#include "error.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "CellTaskManagerImpl.h"
-#include "HTask.h"
-#include "QueueInfo.h"
-#include "ExportTaskLog.h"
-#include "SchedTask.h"
-#include "MainScheduler.h"
-#include "types.h"
-#include "SysFunc.h"
-
-static void send_alloc_reply(CellTaskManagerImpl *tm, int id,
-		Threads *speThreads);
-
-CellTaskManagerImpl::~CellTaskManagerImpl() {
-
-	delete speThreads;
-	delete[] speTaskList;
-
-	delete ppeManager;
-}
-
-void CellTaskManagerImpl::init(int spuIdle_,int useRefDma) {
-	spe_running = 0;
-	spuIdle = spuIdle_;
-
-	// 実行される Task 用の パイプライン用のダブルバッファ
-	speTaskList = new QueueInfo<TaskList>*[machineNum]; // spe上の走っている Task の配列
-	taskListInfo = new QueueInfo<TaskList>*[machineNum]; // 次に走る Task の配列
-
-
-	for (int i = 0; i < machineNum; i++) {
-		taskListInfo[i] = new QueueInfo<TaskList> (taskListPool);
-		speTaskList[i] = new QueueInfo<TaskList> (taskListPool);
-	}
-
-	// PPE 側の管理をする Manager
-	ppeManager = new FifoTaskManagerImpl(machineNum);
-	// 大半のTaskQueueInfoは、共有される
-	MainScheduler *mscheduler = new MainScheduler;
-	set_scheduler(mscheduler);
-	ppeManager->init(mscheduler, this, useRefDma); // ここで HTaskInfo が共有される。
-
-	speThreads->init();
-
-	// 実行可能な HTask のリスト。 FifoTaskManager と共有される
-	activeTaskQueue = ppeManager->activeTaskQueue;
-	// HTask の factory。 HTaskInfo ならなんでもいい。
-	htaskImpl = activeTaskQueue; // any HTaskInfo
-
-
-	ppeManager->get_scheduler()->set_manager(this);
-
-	// Task 内からManager->task_create() とかするときに必要なTaskManager。
-	// 現状では ppe 側からしか動かない
-	// spe 側から Task create できない
-	schedTaskManager = new SchedTask();
-	schedTaskManager->init(0, 0, ppeManager->get_scheduler(), 0);
-	ppeManager->schedTaskManager = schedTaskManager;
-}
-
-void CellTaskManagerImpl::append_activeTask(HTaskPtr task) {
-	if (task->cpu_type == CPU_PPE) {
-		ppeManager->append_activeTask(task);
-	} else {
-		activeTaskQueue->addLast(task);
-	}
-}
-
-// SPE_ANY が指定されていた時に
-// これをインクリメントしつつ呼ぶことにする。
-unsigned int cur_anySpeid = 0;
-
-/**
- * ActiveTaskQueue から Task を
- * 各 SPE に渡す (backgound) TaskList に入れる
- *
- * ここの activeTaskQueue は FifoTaskManagerImpl のと意味が違い、
- * spe に渡される Task だけ入っている
- */
-void CellTaskManagerImpl::set_runTaskList(QueueInfo<HTask> *activeTaskQueue) {
-	int speid;
-	HTaskPtr htask = activeTaskQueue->getFirst();
-	while (htask != NULL) {
-
-		if (htask->cpu_type == CPU_PPE) {
-
-			htask = activeTaskQueue->getNext(htask);
-
-		} else {
-			if (htask->cpu_type == SPE_ANY) {
-				speid = cur_anySpeid++;
-			} else {
-				// -1 してるのは
-				// htask->cpu_type - CPU_SPE で
-				// SPE0 = 1, SPE1 = 2, ... SPE5 = 6 ってなってるので
-				// 配列的 (SPE0 = arr[0], SPE1 = arr[1]) にするため
-				speid = htask->cpu_type - CPU_SPE - 1;
-			}
-
-			speid %= machineNum;
-			set_taskList(htask, taskListInfo[speid]);
-
-			HTaskPtr next = activeTaskQueue->getNext(htask);
-			activeTaskQueue->remove(htask);
-			htask = next;
-
-		}
-	}
-}
-
-void CellTaskManagerImpl::sendTaskList() {
-	for (int id = 0; id < machineNum; id++) {
-		mail_check(id);
-		if (!speTaskList[id]->empty()) {
-			continue; // まだ、走ってる
-		}
-		if (!taskListInfo[id]->empty()) {
-			// SPE に送る TaskList の準備
-			send_taskList(id);
-			spe_running++;
-		}
-	}
-}
-
-void CellTaskManagerImpl::poll() {
-	set_runTaskList(activeTaskQueue);
-	// TaskList 待ちの SPE に TaskList を送る
-	sendTaskList();
-}
-
-void CellTaskManagerImpl::debug_check_spe_idle(
-		QueueInfo<HTask> * activeTaskQueue, int spe_running_) {
-	printf("spu_idle! spe_running = %d : activeTaskQueue->length = %d \n",
-			spe_running_, activeTaskQueue->length());
-	HTaskPtr task = activeTaskQueue->getFirst();
-	int tmp_i = 0;
-	do {
-		printf("task_name = %s ,", ppeManager->get_task_name(task));
-		printf("cpu = [%d], count = %d", task->cpu_type, tmp_i);
-		tmp_i++;
-	} while ((task = activeTaskQueue->getNext(task)) != 0);
-	printf("\n");
-}
-
-void CellTaskManagerImpl::run() {
-    int spu_limit = spuIdle;
-    if (machineNum == 0) {
-        ppeManager->run();
-        return;
-    }
-
-    do {
-        // PPE side
-        ppeManager->poll();
-        // SPE side
-        do {
-            poll();
-        } while (ppeManager->activeTaskQueue->empty() && spe_running > 0);
-
-        if (spe_running < spu_limit) {
-            debug_check_spe_idle(ppeManager->activeTaskQueue, spe_running);
-        }
-
-    } while (!ppeManager->activeTaskQueue->empty() || !activeTaskQueue->empty() || spe_running > 0);
-    if (!waitTaskQueue->empty()) {
-        show_dead_lock_info();
-    }
-
-}
-
-static void loop_check(HTask *p, HTask *me, int depth) {
-	if (p == me)
-		printf("*%lx ", (long) p); // loop
-	if (depth == 0)
-		return;
-	QueueInfo<TaskQueue> *w = p->wait_i;
-	if (w) {
-		for (TaskQueue *q = w->getFirst(); q; q = w->getNext(q)) {
-			loop_check(q->task, me, depth - 1);
-		}
-	}
-}
-
-void CellTaskManagerImpl::show_dead_lock_info() {
-	get_scheduler()-> printf("Dead lock detected\n   ppe queue %d\n",
-			ppeManager->activeTaskQueue->length());
-	// 確か waitQueue は共通...
-	// get_scheduler()-> printf("   wait queue %d\n",ppeManager->waitTaskQueue->length());
-	get_scheduler()-> printf("   wait queue %d\n", waitTaskQueue->length());
-	for (HTask *p = waitTaskQueue->getFirst(); p; p = waitTaskQueue->getNext(p)) {
-		printf("  Waiting task%d %lx", p->command, (long) p);
-		QueueInfo<TaskQueue> *w = p->wait_i;
-		if (w) {
-			for (TaskQueue *q = w->getFirst(); q; q = w->getNext(q)) {
-				printf("    waiting task%d %lx", q->task->command,
-						(long) q->task);
-				if (!waitTaskQueue->find(q->task)) {
-					printf("!"); // stray task
-				}
-				loop_check(q->task, p, 10);
-			}
-		}
-		printf("\n");
-	}
-	get_scheduler()-> printf("   spe queue %d\n", activeTaskQueue->length());
-	for (int i = 0; i < machineNum; i++) {
-		get_scheduler()-> printf("   spe %d send %d wait %d\n", i,
-				speTaskList[i]->length(), taskListInfo[i]->length());
-	}
-}
-
-/**
- * SPE からのメールをチェックする
- */
-
-void CellTaskManagerImpl::mail_check(int id) {
-	memaddr data;
-
-	// SPE Scheduler からの mail check
-	while (speThreads->has_mail(id, 1, &data)) {
-		if (data == (memaddr) MY_SPE_STATUS_READY) {
-			//  MY_SPE_STATUS_READY: SPE が持ってた Task 全て終了
-			// freeAll する前に循環リストに戻す
-			speTaskList[id]->getLast()->next = speTaskList[id];
-			speTaskList[id]->freeAll();
-			spe_running--;
-			// printf("SPE %d status ready, %d running\n",id, spe_running);
-		} else if (data == (memaddr) MY_SPE_COMMAND_MALLOC) {
-			// MY_SPE_COMMAND_MALLOC   SPE からのmain memory request
-			send_alloc_reply(this, id, speThreads);
-		} else if (data > (memaddr) MY_SPE_NOP) {
-#ifdef TASK_LIST_MAIL
-			TaskListPtr list = (TaskListPtr)data;
-			check_task_list_finish(schedTaskManager, list, waitTaskQueue);
-#else
-			// 終了したタスク(PPEにあるのでアドレス)
-			HTaskPtr task = (HTaskPtr) data;
-#if 0
-			if (task->cpu_type != CPU_SPE) {
-				const char *name = get_task_name(task);
-				if (name != NULL) {
-					printf("[SPE] ");
-					printf("Task id : %d, ", task->command);
-					printf("Task name : %s\n", name);
-				}
-			}
-#endif
-#ifndef NOT_CHECK
-
-			if (task != NULL) {
-				//SPE で処理された Task が返ってくるはず。それがもし、type PPE なら・・・
-				if (task->cpu_type == CPU_PPE) {
-					printf("attention : PPE task run on SPE\n");
-					printf("Task id : %d\n", task->command);
-					const char *name = get_task_name(task);
-					if (name != NULL) {
-						printf("Task name : %s\n", name);
-					}
-				}
-			}
-
-#endif
-
-			task->post_func(schedTaskManager, task->post_arg1, task->post_arg2);
-			check_task_finish(task, waitTaskQueue);
-#endif
-		}
-		// MY_SPE_NOP: 特に意味のないコマンド
-	}
-}
-
-void CellTaskManagerImpl::polling() {
-	// may  call recursively check_task_list_finish()
-	// we need fifo here
-	for (int i = 0; i < machineNum; i++) {
-		mail_check(i);
-	}
-}
-
-static void send_alloc_reply(CellTaskManagerImpl *tm, int id,
-		Threads *speThreads) {
-
-	/**
-	 * info[0] = alloc_id; (CellScheduler::mainMem_alloc 参照)
-	 * info[1] = alloc_addr;
-	 */
-	memaddr alloc_info[2];
-	long alloc_size;
-	long command;
-
-	speThreads->get_mail(id, 2, alloc_info);
-	command = (long) alloc_info[0];
-	alloc_size = (long) alloc_info[1];
-
-	alloc_info[1] = (memaddr) tm->allocate(alloc_size);
-	//__debug_ppe("[PPE] MALLOCED 0x%lx from [SPE %d]\n", alloc_info[1],id);
-	// 今のところ何もしてない。どうも、この allocate を free
-	// するのは、SPE task が返した値を見て行うらしい。それは、
-	// 忘れやすいのではないか?
-	speThreads->add_output_tasklist(command, alloc_info[1], alloc_size);
-
-	speThreads->send_mail(id, 2, alloc_info);
-}
-
-/**
- * 条件を満たしたら SPE に TaskList を送信する
- * 条件1. SPE が持ってた TaskList を終了して、次の TaskList を待ってる
- * 条件2. SPE に送る TaskList に Task がある
- *
- * SPE で実行終了した speTaskList  と
- * これから実行する taskListInfo  のバッファを入れ替える
- */
-void CellTaskManagerImpl::send_taskList(int id) {
-	// speTaskList は走り終わった ppe の Task の List.
-	// taskListInfo はこれから走る Task の List.
-	// 交換して実行する
-	QueueInfo<TaskList> *tmp = taskListInfo[id];
-	taskListInfo[id] = speTaskList[id];
-	speTaskList[id] = tmp;
-
-	// speTaskList は本来は循環リストなのだけど、実行中は線形リストである。
-	// spe の Task が終了した時点でなおす。
-	tmp->getLast()->next = 0;
-	TaskListPtr p = tmp->getFirst();
-	// printf("SPE %d task list sending\n",id);
-	speThreads->send_mail(id, 1, (memaddr *) &p);
-	// printf("SPE %d task list sent\n",id);
-}
-
-void CellTaskManagerImpl::show_profile() {
-	for (int id = 0; id < machineNum; id++) {
-		HTaskPtr t = schedTaskManager->create_task(ShowTime, 0, 0, 0, 0);
-		t->set_cpu((CPU_TYPE) (id + SPE_0));
-		t->spawn();
-	}
-}
-
-void CellTaskManagerImpl::start_profile() {
-	for (int id = 0; id < machineNum; id++) {
-		HTaskPtr t = schedTaskManager->create_task(StartProfile, 0, 0, 0, 0);
-		t->set_cpu((CPU_TYPE) (id + SPE_0));
-		t->spawn();
-	}
-}
-
-void CellTaskManagerImpl::export_task_log() {
-    ExportTaskLog _export(taskLogQueue);
-    _export.printOut();
-}
-
-void CellTaskManagerImpl::print_arch() {
-	printf("CellTaskManager\n");
-}
-
-TaskListPtr CellTaskManagerImpl::createTaskList()
-{
-        TaskListPtr tl = taskListInfo[0]->create();
-        bzero(tl->tasks,sizeof(Task)*TASK_MAX_SIZE);
-	return tl;
-}
-
-
-#ifdef __CERIUM_CELL__
-TaskManagerImpl*
-create_impl(int num, int useRefDma)
-{
-	Threads *cpus = new SpeThreads(num);
-	return new CellTaskManagerImpl(num,cpus);
-}
-#endif // __CERIUM_CELL
--- a/TaskManager/Cell/SpeThreads.cc.orig	Thu Jul 19 11:56:35 2012 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,169 +0,0 @@
-#include <stdlib.h>
-#include "types.h"
-#include "SpeThreads.h"
-#include "Scheduler.h"
-
-
-SpeThreads::SpeThreads(int num) : cpu_num(num) {}
-
-SpeThreads::~SpeThreads(void)
-{
-    memaddr mail = (memaddr)MY_SPE_COMMAND_EXIT;
-    int ret;
-
-    for (int i = 0; i < cpu_num; i++) {
-        send_mail(i, 1, &mail);
-    }
-
-    for (int i = 0; i < cpu_num; i++) {
-        pthread_join(threads[i], NULL);
-        ret = spe_context_destroy(spe_ctx[i]);
-        if (ret) {
-            perror("[~SpeThreads] spe_context_destroy");
-        }
-    }
-
-    spe_image_close(spe_handle);
-
-    delete [] spe_ctx;
-    delete [] threads;
-    delete [] args;
-}
-
-void*
-SpeThreads::spe_thread_run(void *arg)
-{
-    unsigned int entry = SPE_DEFAULT_ENTRY;
-    //spe_context_ptr_t ctx = (spe_context_ptr_t)arg;
-    thread_arg_t *arg_t = (thread_arg_t *)arg;
-
-    spe_stop_info_t stop_info;
-    unsigned long long status;
-    
-    spe_context_run(arg_t->ctx, &entry, 0, (void*)arg_t->speid, NULL, &stop_info);
-    
-    status = ((stop_info.result.spe_exit_code & 0xff) << 8)
-	| (stop_info.result.spe_signal_code & 0xff);
-
-    switch(stop_info.stop_reason) {
-    case SPE_EXIT:	
-	break;
-    case SPE_STOP_AND_SIGNAL:
-	printf("[SPE %d] SPE_STOP_AND_SIGNAL stop_info.result.stop_signal_code=%d\n", arg_t->speid, stop_info.result.spe_signal_code);
-	break;
-    case SPE_RUNTIME_ERROR:
-	printf("[SPE %d] SPE_RUNTIME_ERROR stop_info.result.spe_runtime_error=%d\n", arg_t->speid,  stop_info.result.spe_runtime_error);
-	break;
-    case SPE_RUNTIME_EXCEPTION:
-	printf("[SPE %d] SPE_RUNTIME_EXCEPTION stop_info.result.spe_runtime_exception=%d\n", arg_t->speid,  stop_info.result.spe_runtime_exception);
-	break;
-    }
-
-    pthread_exit(NULL);
-}
-
-void*
-SpeThreads::frontend_thread_run(void *arg)
-{
-    pthread_t thread;
-    thread_arg_t *arg_t = (thread_arg_t *)arg;
-
-    pthread_create(&thread, NULL, &spe_thread_run, (void*)arg_t->ctx);
-
-    // mail read の blocking ができれば
-    // ここで呼んだ方が早い。
-
-    pthread_exit(NULL);
-}
-
-void
-SpeThreads::init(void)
-{
-    spe_handle = spe_image_open(SPE_ELF);
-
-    if (spe_handle == NULL) {
-	perror("spe_image_open");
-	exit(EXIT_FAILURE);
-    }
-
-    spe_ctx = new spe_context_ptr_t[cpu_num];
-    threads = new pthread_t[cpu_num];
-    args    = new thread_arg_t[cpu_num];
-
-    for (int i = 0; i < cpu_num; i++) {
-	args[i].speid = i;
-	spe_ctx[i] = spe_context_create(0, NULL);
-	spe_program_load(spe_ctx[i], spe_handle);
-	args[i].ctx = spe_ctx[i];
-    }
-
-    for (int i = 0; i < cpu_num; i++) {
-	pthread_create(&threads[i], NULL,
-		       &spe_thread_run, (void*)&args[i]);
-    }
-}
-
-
-/**
- * SPE からのメールを受信する。
- *
- * @param [speid] SPE ID
- *
- * @return Received 32-bit mailbox messages
- *         if ([ret] < 0) no data read
- */
-int
-SpeThreads::get_mail(int speid, int count, memaddr *ret)
-{   
-    // only used in CellTaskManagerImpl (should be removed?) 
-    return spe_out_mbox_read(spe_ctx[speid], (unsigned int*)ret, count*(sizeof(memaddr)/sizeof(int)));    
-}
-
-int
-SpeThreads::has_mail(int speid, int count, memaddr *ret)
-{
-/* 
- *  spe_out_mbox_status return only 1, waiting for multiple length
- *  does not work.
- */
-    if (spe_out_mbox_status(spe_ctx[speid]) >= 1) {    
-      return spe_out_mbox_read(spe_ctx[speid], (unsigned int*)ret, count*(sizeof(memaddr)/sizeof(int)));   
-    } else {
-	return 0;            
-    }
-}
-
-/**
- * Inbound Mailbox
- * メール送信 PPE -> SPE
- *
- * なるべく NONBLOCKING なんだけど、
- * Inbound Mailbox キューに空きがないと送信できないので
- * 送信する数だけ空いているか確認してから送る。空いて無い場合は待つ。
- *
- * 結局待つんだよな。しかも ALL_BLOCKING って実は busy wait だったりするし
- *
- * @param [speid] SPE ID
- * @param [data] Send 32-bit mailbox messages
- * @param [num] The number of messages
- */
-void
-SpeThreads::send_mail(int speid, int num, memaddr *data)
-
-{
-    spe_in_mbox_write(spe_ctx[speid], (unsigned int *)data, num*(sizeof(memaddr)/sizeof(int)), SPE_MBOX_ALL_BLOCKING);
-}
-
-void
-SpeThreads::add_output_tasklist(int command, memaddr buff, int alloc_size)
-{
-    /*
-     * output TaskList が無ければ新しく作る
-     * あれば TaskList に allocate した Task を追加
-     * command に対応した Task の初期化を実行する
-     * SPE に data が書き出し終わった後に PPE 側で初期化
-     */
-    
-}
-
-/* end */
--- a/TaskManager/Cell/SpeThreads.h.orig	Thu Jul 19 11:56:35 2012 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-#ifndef INCLUDED_SPE_THREADS
-#define INCLUDED_SPE_THREADS
-
-#include <libspe2.h>
-#include <pthread.h>
-#include "Threads.h"
-#define SPE_ELF "spe-main"
-
-typedef struct arg {
-    int speid;
-    spe_context_ptr_t ctx;
-} thread_arg_t;
-
-
-class SpeThreads : public Threads {
-public:
-    /* constructor */
-    SpeThreads(int num = 1);
-    ~SpeThreads(void);
-
-    /* functions */
-    void init(void);
-    int get_mail(int speid, int count, memaddr *ret); // BLOCKING
-    int has_mail(int speid, int count, memaddr *ret); // NONBLOCK
-    void send_mail(int speid, int num, memaddr *data); // BLOCKING
-    static void *spe_thread_run(void *arg);
-    static void *frontend_thread_run(void *arg);
-    void add_output_tasklist(int command, memaddr buff, int alloc_size);
-
-private:
-    /* variables */
-    spe_program_handle_t *spe_handle;
-    spe_context_ptr_t *spe_ctx;
-    pthread_t *threads;
-    thread_arg_t *args;
-    int cpu_num;
-};
-
-#endif
--- a/TaskManager/Gpu/GpuScheduler.cc	Thu Jul 19 11:56:35 2012 +0900
+++ b/TaskManager/Gpu/GpuScheduler.cc	Thu Jul 19 14:03:49 2012 +0900
@@ -34,50 +34,47 @@
             clFinish(command_queue);
             return ;
         }
-        
-        TaskListPtr tasklist = (TaskListPtr)connector->dma_load(this, params_addr, 
-                                                                sizeof(TaskList), DMA_READ_TASKLIST);
-        
-        for (TaskPtr nextTask = tasklist->tasks; nextTask < tasklist->last(); nextTask = nextTask->next()) {
-            cl_kernel& kernel = *task_list[nextTask->command].kernel;
-            int err = CL_SUCCESS;
-            for(int i=0;i<nextTask->param_count;i++) {
-                err |= clSetKernelArg(kernel,  i, sizeof(memaddr), (cl_mem*)nextTask->param(i));
-            }
+
+        while (params_addr) {
+            TaskListPtr tasklist = (TaskListPtr)connector->dma_load(this, params_addr, 
+                                                                    sizeof(TaskList), DMA_READ_TASKLIST);
+
+
+            for (TaskPtr nextTask = tasklist->tasks; nextTask < tasklist->last(); nextTask = nextTask->next()) {
+                cl_kernel& kernel = *task_list[nextTask->command].kernel;
+                int err = CL_SUCCESS;
+                for(int i=0;i<nextTask->param_count;i++) {
+                    err |= clSetKernelArg(kernel,  i, sizeof(memaddr), (cl_mem*)nextTask->param(i));
+                }
 
-            for(int i=0;i<nextTask->inData_count;i++) {
-                cl_mem memobj = clCreateBuffer(context, CL_MEM_READ_WRITE, nextTask->inData(i)->size, NULL, NULL);
-                err |= clEnqueueWriteBuffer(command_queue, memobj, CL_TRUE, 0, 
-                    nextTask->inData(i)->size, nextTask->inData(i)->addr, 0, NULL, NULL);
-                //clSetKernleArg(kernel, cur_index,);
-            }
-            // カーネル引数の設定
+                for(int i=0;i<nextTask->inData_count;i++) {
+                    cl_mem memobj = clCreateBuffer(context, CL_MEM_READ_WRITE, nextTask->inData(i)->size, NULL, NULL);
+                    err |= clEnqueueWriteBuffer(command_queue, memobj, CL_TRUE, 0, 
+                                                nextTask->inData(i)->size, nextTask->inData(i)->addr, 0, NULL, NULL);
+                    //clSetKernleArg(kernel, cur_index,);
+                }
+                // カーネル引数の設定
+            
+                clEnqueueTask(command_queue, kernel, 0, NULL, NULL);
             
-            clEnqueueTask(command_queue, kernel, 0, NULL, NULL);
-            
-            for(int i=0;i<nextTask->outData_count;i++) {
-                cl_mem memobj = clCreateBuffer(context, CL_MEM_READ_WRITE, nextTask->outData(i)->size, NULL, NULL);
-                err |= clEnqueueReadBuffer(command_queue, memobj, CL_TRUE, 0, 
-                    nextTask->outData(i)->size, nextTask->outData(i)->addr, 0, NULL, NULL);
+                for(int i=0;i<nextTask->outData_count;i++) {
+                    cl_mem memobj = clCreateBuffer(context, CL_MEM_READ_WRITE, nextTask->outData(i)->size, NULL, NULL);
+                    err |= clEnqueueReadBuffer(command_queue, memobj, CL_TRUE, 0, 
+                                               nextTask->outData(i)->size, nextTask->outData(i)->addr, 0, NULL, NULL);
+                }
             }
+        
+            clFlush(command_queue); // waiting for queued task
+            params_addr = (memaddr)tasklist->next;
         }
         
-        clFlush(command_queue); // waiting for queued task
-        
-#ifdef TASK_LIST_MAIL
         connector->mail_write((memaddr)(tasklist->waiter));
-#else
-        for (TaskPtr nextTask = &tasklist->tasks; nextTask < tasklist->last(); nextTask = nextTask->next()) {
-            connector->mail_write(nextTask->self);
-        }
-#endif
     }
     
     // TaskArrayの処理
+
 }
 
-
-
 void
 gpu_register_task(int cmd, const char* filename, const char* functionname)
 {
--- a/TaskManager/Gpu/GpuScheduler.cc.orig	Thu Jul 19 11:56:35 2012 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,128 +0,0 @@
-#include "GpuScheduler.h"
-#include "ReferencedDmaManager.h"
-#include "SchedTask.h"
-#include "GpuThreads.h"
-#include "stdio.h"
-#include <fcntl.h>
-#include <sys/stat.h>
-
-GpuScheduler::GpuScheduler()
-{
-    init_impl(0);
-}
-
-void
-GpuScheduler::init_impl(int useRefDma)
-{
-    fifoDmaManager = new ReferencedDmaManager();
-    connector = fifoDmaManager;
-}
-
-
-void
-GpuScheduler::run()
-{
-    for (;;) {
-        memaddr params_addr = connector->task_list_mail_read();
-        
-        // Get OpenCL infomation
-        GpuThreads* gputhreads = GpuThreads::getInstance();
-        cl_context context = gputhreads->context;
-        cl_command_queue command_queue = gputhreads->command_queue;
-        
-        if ((memaddr)params_addr == (memaddr)MY_SPE_COMMAND_EXIT) {
-            clFinish(command_queue);
-            return ;
-        }
-
-        while (params_addr) {
-            TaskListPtr tasklist = (TaskListPtr)connector->dma_load(this, params_addr, 
-                                                                    sizeof(TaskList), DMA_READ_TASKLIST);
-
-
-            for (TaskPtr nextTask = tasklist->tasks; nextTask < tasklist->last(); nextTask = nextTask->next()) {
-                cl_kernel& kernel = *task_list[nextTask->command].kernel;
-                int err = CL_SUCCESS;
-                for(int i=0;i<nextTask->param_count;i++) {
-                    err |= clSetKernelArg(kernel,  i, sizeof(memaddr), (cl_mem*)nextTask->param(i));
-                }
-
-                for(int i=0;i<nextTask->inData_count;i++) {
-                    cl_mem memobj = clCreateBuffer(context, CL_MEM_READ_WRITE, nextTask->inData(i)->size, NULL, NULL);
-                    err |= clEnqueueWriteBuffer(command_queue, memobj, CL_TRUE, 0, 
-                                                nextTask->inData(i)->size, nextTask->inData(i)->addr, 0, NULL, NULL);
-                    //clSetKernleArg(kernel, cur_index,);
-                }
-                // カーネル引数の設定
-            
-                clEnqueueTask(command_queue, kernel, 0, NULL, NULL);
-            
-                for(int i=0;i<nextTask->outData_count;i++) {
-                    cl_mem memobj = clCreateBuffer(context, CL_MEM_READ_WRITE, nextTask->outData(i)->size, NULL, NULL);
-                    err |= clEnqueueReadBuffer(command_queue, memobj, CL_TRUE, 0, 
-                                               nextTask->outData(i)->size, nextTask->outData(i)->addr, 0, NULL, NULL);
-                }
-            }
-        
-            clFlush(command_queue); // waiting for queued task
-            params_addr = (memaddr)tasklist->next;
-        }
-        
-        connector->mail_write((memaddr)(tasklist->waiter));
-    }
-    
-    // TaskArrayの処理
-
-}
-
-void
-gpu_register_task(int cmd, const char* filename, const char* functionname)
-{
-    GpuThreads* gputhreads = GpuThreads::getInstance();
-    //gputhreads->init();
-    cl_context context = gputhreads->context;
-    cl_device_id device_id = gputhreads->device_id;
-    
-    int fp;
-    char *source_str;
-    size_t source_size;
-    
-    fp = open(filename, O_RDONLY);
-    
-    if (!fp) {
-        fprintf(stderr, "Failed to load kernel.\n");
-        exit(1);
-    }
-    
-    struct stat stats;
-    fstat(fp,&stats);
-    off_t size = stats.st_size;
-    
-    if (!size) {
-        fprintf(stderr, "Failed to load kernel.\n");
-        exit(1);
-    }
-    
-    source_str = (char*)malloc(size);
-    source_size = read(fp, source_str, size);
-    close(fp);
-
-    cl_program program = NULL;
-    cl_int ret = gputhreads->ret;
-    program = clCreateProgramWithSource(context, 1, (const char **)&source_str,
-                                               (const size_t *)&source_size, &ret);
-    clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
-
-    cl_kernel *kernel = new cl_kernel; 
-    *kernel = clCreateKernel(program, functionname, &ret);
-     
-    task_list[cmd].run = null_run;
-    task_list[cmd].load = null_loader;
-    task_list[cmd].wait = null_loader;
-    task_list[cmd].name = functionname;
-    task_list[cmd].kernel = kernel;
-
-}
-
-/* end */
-
Binary file TaskManager/Gpu/GpuScheduler.o has changed
--- a/TaskManager/Gpu/GpuTaskManagerImpl.cc	Thu Jul 19 11:56:35 2012 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,216 +0,0 @@
-#include "GpuTaskManagerImpl.h"
-#include "MainScheduler.h"
-#include "SchedTask.h"
-#include "HTask.h"
-#include "QueueInfo.h"
-#include "ExportTaskLog.h"
-#include "SchedTask.h"
-#include "MainScheduler.h"
-#include "types.h"
-#include "SysFunc.h"
-#include <strings.h>
-
-static void send_alloc_reply(GpuTaskManagerImpl *tm, int id,
-                             GpuThreads *gpuThreads);
-
-extern  QueueInfo<TaskList> *taskListPool;
-
-GpuTaskManagerImpl::~GpuTaskManagerImpl() {
-}
-
-void GpuTaskManagerImpl::init(int spuIdle,int useRefDma) {
-    
-    gpuTaskList = new QueueInfo<TaskList>(taskListPool);
-    taskListInfo  = new QueueInfo<TaskList>(taskListPool);
-
-    ppeManager = new FifoTaskManagerImpl(machineNum);
-    MainScheduler *mscheduler = new MainScheduler;
-    set_scheduler(mscheduler);
-    ppeManager->init(mscheduler, this, useRefDma);
-
-    htaskImpl = activeTaskQueue;
-    mscheduler->set_manager(this);
-
-    //    GpuThreads* gpuThreads = GpuThreads::getInstance();
-    //    gpuThreads->init();
-    
-    schedTaskManager = new SchedTask();
-    schedTaskManager->init(0, 0, ppeManager->get_scheduler(), 0);
-    ppeManager->schedTaskManager = schedTaskManager;
-}
-
-void GpuTaskManagerImpl::run() {
-    do {
-        ppeManager->poll();
-        do {
-            poll();
-        } while (ppeManager->activeTaskQueue->empty());
-        // ちゃんと最後のタスクまで実行される?
-    } while (!ppeManager->activeTaskQueue->empty() || !activeTaskQueue->empty());
-}
-
-void GpuTaskManagerImpl::poll() {
-    set_runTaskList();
-    sendTaskList();
-}
-
-void GpuTaskManagerImpl::set_runTaskList() {
-    HTaskPtr htask = activeTaskQueue->getFirst();
-    while (htask != NULL) {
-        if (htask->cpu_type == CPU_PPE) {
-            htask = activeTaskQueue->getNext(htask);
-        } else {
-            set_taskList(htask, taskListInfo);
-
-            HTaskPtr next = activeTaskQueue->getNext(htask);
-            activeTaskQueue->remove(htask);
-            htask = next;
-        }
-    }
-}
-
-TaskListPtr GpuTaskManagerImpl::createTaskList()
-{
-    TaskListPtr tl = taskListInfo->create();
-    bzero(tl->tasks,sizeof(Task)*TASK_MAX_SIZE);
-    return tl;
-}
-
-
-void GpuTaskManagerImpl::sendTaskList() {
-    mail_check();
-    if (!gpuTaskList->empty()) {
-    }
-    if (!taskListInfo->empty()) {
-        send_taskList();
-    }
-}
-
-void GpuTaskManagerImpl::send_taskList() {
-    // swap gpuTaskList for taskListInfo
-    QueueInfo<TaskList> *tmp = taskListInfo;
-    taskListInfo = gpuTaskList;
-    gpuTaskList = tmp;
-
-    gpuTaskList->getLast()->next = 0;
-    TaskListPtr p = gpuTaskList->getFirst();
-
-    // send taskList
-    gpuThreads->send_mail(0, 0, (memaddr *) &p);
-}
-
-/**
- * メールをチェックする
- */
-
-static void send_alloc_reply(GpuTaskManagerImpl *tm, int id,
-                             GpuThreads *gpuThreads) {
-
-	/**
-	 * info[0] = alloc_id; (CellScheduler::mainMem_alloc 参照)
-	 * info[1] = alloc_addr;
-	 */
-	memaddr alloc_info[2];
-	long alloc_size;
-	long command;
-
-	gpuThreads->get_mail(0, 2, alloc_info);
-	command = (long) alloc_info[0];
-	alloc_size = (long) alloc_info[1];
-
-	alloc_info[1] = (memaddr) tm->allocate(alloc_size);
-	//__debug_ppe("[PPE] MALLOCED 0x%lx from [SPE %d]\n", alloc_info[1],id);
-	// 今のところ何もしてない。どうも、この allocate を free
-	// するのは、SPE task が返した値を見て行うらしい。それは、
-	// 忘れやすいのではないか?
-	gpuThreads->add_output_tasklist(command, alloc_info[1], alloc_size);
-
-	gpuThreads->send_mail(0, 2, alloc_info);
-}
-
-void GpuTaskManagerImpl::mail_check() {
-    GpuThreads* gpuThreads = GpuThreads::getInstance();
-	memaddr data;
-
-	// SPE Scheduler からの mail check
-	while (gpuThreads->has_mail(0, 0, &data)) {
-        
-		if (data == (memaddr) MY_SPE_STATUS_READY) {
-			//  MY_SPE_STATUS_READY: SPE が持ってた Task 全て終了
-			// freeAll する前に循環リストに戻す
-			gpuTaskList->getLast()->next = gpuTaskList;
-			gpuTaskList->freeAll();
-			// printf("SPE %d status ready, %d running\n",id, spe_running);
-		} else if (data == (memaddr) MY_SPE_COMMAND_MALLOC) {
-
-
-			// MY_SPE_COMMAND_MALLOC   SPE からのmain memory request
-			send_alloc_reply(this, 0, gpuThreads);
-		} else if (data > (memaddr) MY_SPE_NOP) {
-            
-#ifdef TASK_LIST_MAIL
-			TaskListPtr list = (TaskListPtr)data;
-			check_task_list_finish(schedTaskManager, list, waitTaskQueue);
-#else
-			// 終了したタスク(PPEにあるのでアドレス)
-			HTaskPtr task = (HTaskPtr) data;
-#if 0
-			if (task->cpu_type != CPU_SPE) {
-				const char *name = get_task_name(task);
-				if (name != NULL) {
-					printf("[SPE] ");
-					printf("Task id : %d, ", task->command);
-					printf("Task name : %s\n", name);
-				}
-			}
-#endif
-#ifndef NOT_CHECK
-
-			if (task != NULL) {
-				//SPE で処理された Task が返ってくるはず。それがもし、type PPE なら・・・
-				if (task->cpu_type == CPU_PPE) {
-					printf("attention : PPE task run on SPE\n");
-					printf("Task id : %d\n", task->command);
-					const char *name = get_task_name(task);
-					if (name != NULL) {
-						printf("Task name : %s\n", name);
-					}
-				}
-			}
-
-#endif
-
-			task->post_func(schedTaskManager, task->post_arg1, task->post_arg2);
-			check_task_finish(task, waitTaskQueue);
-#endif
-
-		}
-		
-        
-    }
-}
-
-
-
-
-/*
-void GpuTaskManagerImpl::mail_check() {
-    memaddr data;
-    while (gpuThreads->has_mail(0, 0, &data)) {
-        //どのメールが必要かよく考える
-        
-        //check_task_finish
-    }
-    }*/
-
-
-
-
-#ifdef __CERIUM_GPU__
-TaskManagerImpl*
-create_impl(int num, int useRefDma)
-{
-    GpuThreads *gpus = GpuThreads::getInstance();
-    return new GpuTaskManagerImpl(num, gpus);
-}
-#endif
--- a/TaskManager/Gpu/GpuTaskManagerImpl.h	Thu Jul 19 11:56:35 2012 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-#ifndef INCLUDED_GPU_TASK_MANAGER_IMPL
-#define INCLUDED_GPU_TASK_MANAGER_IMPL
-
-#include "TaskManagerImpl.h"
-#include "FifoTaskManagerImpl.h"
-#include "GpuThreads.h"
-#include "QueueInfo.h"
-
-class GpuTaskManagerImpl : public TaskManagerImpl {
- public:
-    GpuTaskManagerImpl(){};
- GpuTaskManagerImpl(int num, GpuThreads *gpus) : TaskManagerImpl(num) {/*gpuThreads = gpus;*/}
-    ~GpuTaskManagerImpl();
-    
-    void init(int spuIdle,int useRefDma);
-    void run();
-    void poll();
-    void set_runTaskList();
-    void sendTaskList();
-    void send_taskList();
-    void mail_check();
-
-    void start_profile(){}
-    void show_profile(){} 
-    void polling(){}
-    void print_arch(){}
-    TaskList* createTaskList();
-
-public:
-    QueueInfo<TaskList> *gpuTaskList;
-    QueueInfo<TaskList> *taskListInfo;
-
-    FifoTaskManagerImpl *ppeManager;
-
-    GpuThreads *gpuThreads;
-};
-
-#endif
-
Binary file TaskManager/Gpu/GpuTaskManagerImpl.o has changed
--- a/TaskManager/Gpu/GpuThreads.cc	Thu Jul 19 11:56:35 2012 +0900
+++ b/TaskManager/Gpu/GpuThreads.cc	Thu Jul 19 14:03:49 2012 +0900
@@ -1,7 +1,7 @@
 #include "GpuThreads.h"
 #include "GpuScheduler.h"
 #include "TaskManagerImpl.h"
-#include "GpuTaskManagerImpl.h"
+#include "SpeTaskManagerImpl.h"
 
 GpuThreads::GpuThreads()
 {
@@ -43,7 +43,7 @@
     gpu_arg *argt = (gpu_arg *) args;
     Scheduler *g_scheduler = argt->scheduler;
 
-    TaskManagerImpl *manager = new GpuTaskManagerImpl();
+    TaskManagerImpl *manager = new SpeTaskManagerImpl();
     g_scheduler->init(manager, argt->useRefDma);
 
     manager->set_scheduler(g_scheduler);
--- a/TaskManager/Gpu/GpuThreads.cc.orig	Thu Jul 19 11:56:35 2012 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,83 +0,0 @@
-#include "GpuThreads.h"
-#include "GpuScheduler.h"
-#include "TaskManagerImpl.h"
-#include "SpeTaskManagerImpl.h"
-
-GpuThreads::GpuThreads()
-{
-    threads = new pthread_t;
-    args = new gpu_arg;
-}
-
-GpuThreads::~GpuThreads()
-{
-    delete threads;
-    delete args;
-
-    clReleaseCommandQueue(command_queue);
-    clReleaseContext(context);
-}
-
-void
-GpuThreads::init()
-{
-    clGetPlatformIDs(1, &platform_id, &ret_num_platforms); 
-    clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, &ret_num_devices);
-    // unavailable GPU
-    if( ret_num_devices == 0) {
-        exit(EXIT_FAILURE);
-    }
-    context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
-    command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
-
-    args->scheduler = new GpuScheduler();
-    args->useRefDma = use_refdma;
-
-    // pthread_create(threads, NULL, &gpu_thread_run, args);
-
-}
-
-void *
-GpuThreads::gpu_thread_run(void *args)
-{
-    gpu_arg *argt = (gpu_arg *) args;
-    Scheduler *g_scheduler = argt->scheduler;
-
-    TaskManagerImpl *manager = new SpeTaskManagerImpl();
-    g_scheduler->init(manager, argt->useRefDma);
-
-    manager->set_scheduler(g_scheduler);
-
-    g_scheduler->run();
-    g_scheduler->finish();
-
-    return NULL;
-}
-
-int
-GpuThreads::get_mail(int speid, int count, memaddr *ret)
-{
-    *ret = args->scheduler->mail_read_from_host();
-    return 1;
-}
-
-int
-GpuThreads::has_mail(int speid, int count, memaddr *ret)
-{
-    if (args->scheduler->has_mail_from_host() != 0) {
-        return get_mail(0, 0, ret);
-    } else {
-        return 0;
-    }
-}
-
-void
-GpuThreads::send_mail(int speid, int num, memaddr *data)
-{
-    args->scheduler->mail_write_from_host(*data);
-}
-
-void
-GpuThreads::add_output_tasklist(int command, memaddr buff, int alloc_size)
-{
-}
--- a/TaskManager/Gpu/GpuThreads.h.orig	Thu Jul 19 11:56:35 2012 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,52 +0,0 @@
-#ifndef INCLUDED_GPU_THREADS
-#define INCLUDED_GPU_THREADS
-
-#include <pthread.h>
-#include "Threads.h"
-#include "GpuScheduler.h"
-
-#ifdef __APPLE__
-#include <OpenCL/opencl.h>
-#else
-#include <CL/cl.h>
-#endif
-
-// Singleton Pattern
-struct gpu_arg {
-    GpuScheduler *scheduler;
-    int useRefDma;
-};
-
-class GpuThreads : public Threads {
-public:
-    static GpuThreads* getInstance() {
-      static GpuThreads singleton;
-      return &singleton;
-    }
-    ~GpuThreads();
-
-    void init();
-    static void *gpu_thread_run(void *args);
-
-    int get_mail(int speid, int count, memaddr *ret);
-    int has_mail(int speid, int count, memaddr *ret);
-    void send_mail(int speid, int num, memaddr *data);
-    void add_output_tasklist(int command, memaddr buff, int alloc_size);
-
-public:
-    cl_platform_id platform_id;
-    cl_device_id device_id;
-    cl_uint ret_num_platforms;
-    cl_uint ret_num_devices;
-    cl_context context ;
-    cl_command_queue command_queue;
-    cl_int ret;
-
-private:
-    GpuThreads();
-    gpu_arg *args;
-    pthread_t *threads;
-    int use_refdma;
-};
-
-#endif
Binary file TaskManager/Gpu/GpuThreads.o has changed
--- a/TaskManager/include/types.h	Thu Jul 19 11:56:35 2012 +0900
+++ b/TaskManager/include/types.h	Thu Jul 19 14:03:49 2012 +0900
@@ -59,18 +59,24 @@
     MY_SPE_COMMAND_MALLOC,
 };
 
-#define MAX_USE_SPE_NUM 6
+#define MAX_USE_SPE_NUM 32
 
 typedef enum {
     CPU_PPE = 0, // default
-    CPU_SPE = 1,
+    GPU_0 = 1,
+    GPU_1 = 2,
+    GPU_2 = 3,
+    GPU_3 = 4,
+    CPU_SPE = 5,
     SPE_ANY = CPU_SPE,
-    SPE_0 = 2,
-    SPE_1 = 3,
-    SPE_2 = 4,
-    SPE_3 = 5,
-    SPE_4 = 6,
-    SPE_5 = 7,
+    SPE_0 = 6,
+    SPE_1 = 7,
+    SPE_2 = 8,
+    SPE_3 = 9,
+    SPE_4 = 10,
+    SPE_5 = 11,
+
+
 } CPU_TYPE;
 
 #endif
--- a/TaskManager/include/types.h.orig	Thu Jul 19 11:56:35 2012 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,82 +0,0 @@
-#ifndef INCLUDED_TYPES
-#define INCLUDED_TYPES
-
-#include <stdint.h>
-
-typedef uint16_t uint16;
-typedef uint32_t uint32;
-typedef uint64_t uint64;
-
-//  HOST main memory address 
-//       SPU's (void *) is always 32bit (actually 18bit (256kbyte))
-//       memaddr is different from (void *) in SPU.
-//
-#ifdef __SPU__
-#if ABIBIT>32
-typedef uint64_t memaddr;
-#else
-typedef uint32_t memaddr;
-#endif
-#else
-typedef char* memaddr;
-#endif
-
-
-#define Newq(Type,Count) ((Type *)malloc(sizeof(Type)*Count))
-#define ReAlloc(Pointer,Type,Count) ((Type *)realloc((void*)Pointer,sizeof(Type)*Count))
-
-
-#define SPE_ALIGNMENT 16
-#define SPE_ALIGNMENT_FULL 128
-#define SPE_ALIGN __attribute__((aligned(SPE_ALIGNMENT)))
-#define SPE_ALIGN_FULL __attribute__((aligned(SPE_ALIGNMENT_FULL))
-#define ROUND_UP_ALIGN(value, alignment) \
-    (((value) + ((alignment) - 1))&(~((alignment)-1)))
-#define DEFAULT_ALIGNMENT SPE_ALIGNMENT
-//#define DEFAULT_ALIGNMENT SPE_ALIGNMENT_FULL
-
-#define DMA_MAX_SIZE 16384
-
-#define round_up16(value)  ROUND_UP_ALIGN(value, 16)
-#define round_up128(value) ROUND_UP_ALIGN(value, 128)
-
-#define TaskArray (-1)
-#define TaskArray1 (-2)
-
-// SPU 依存 (よろしくないが...)
-
-// ここも typedef しとくか?
-enum {
-// どの方向かで enum 分けるだろjk...
-// PPE -> SPE 
-    MY_SPE_NOP = 0,
-    MY_SPE_COMMAND_EXIT,
-    MY_SPE_COMMAND_GO,
-
-// SPE -> PPE
-    MY_SPE_STATUS_BUSY,
-    MY_SPE_STATUS_READY,
-    MY_SPE_COMMAND_MALLOC,
-};
-
-#define MAX_USE_SPE_NUM 32
-
-typedef enum {
-    CPU_PPE = 0, // default
-    GPU_0 = 1,
-    GPU_1 = 2,
-    GPU_2 = 3,
-    GPU_3 = 4,
-    CPU_SPE = 5,
-    SPE_ANY = CPU_SPE,
-    SPE_0 = 6,
-    SPE_1 = 7,
-    SPE_2 = 8,
-    SPE_3 = 9,
-    SPE_4 = 10,
-    SPE_5 = 11,
-
-
-} CPU_TYPE;
-
-#endif
--- a/TaskManager/kernel/ppe/CpuThreads.cc	Thu Jul 19 11:56:35 2012 +0900
+++ b/TaskManager/kernel/ppe/CpuThreads.cc	Thu Jul 19 14:03:49 2012 +0900
@@ -12,7 +12,9 @@
 
 
 CpuThreads::CpuThreads(int num, int useRefDma, int start_id) : cpu_num(num), use_refdma(useRefDma), id_offset(start_id) {
-
+#ifdef __CERIUM_GPU__
+    gpu = new GpuThreds;
+#endif
     threads = new pthread_t[cpu_num];
     args    = new cpu_thread_arg_t[cpu_num];
     wait	= new Sem(0);
@@ -37,6 +39,9 @@
 
     delete [] threads;
     delete [] args;
+#ifdef __CERIUM_GPU__
+    delete gpu;
+#endif
 }
 
 void *
@@ -93,7 +98,8 @@
  */
 int
 CpuThreads::get_mail(int cpuid, int count, memaddr *ret)
-{   
+{  
+    
     *ret = args[cpuid-id_offset].scheduler->mail_read_from_host();
     return 1;
 }
--- a/TaskManager/kernel/ppe/CpuThreads.cc.orig	Thu Jul 19 11:56:35 2012 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,149 +0,0 @@
-#include <stdlib.h>
-#include "types.h"
-#include "CpuThreads.h"
-#include "MainScheduler.h"
-#include "SysFunc.h"
-#include "SchedNop.h"
-#include "SpeTaskManagerImpl.h"
-#include "CellScheduler.h"
-
-SchedExternTask(ShowTime);
-SchedExternTask(StartProfile);
-
-
-CpuThreads::CpuThreads(int num, int useRefDma, int start_id) : cpu_num(num), use_refdma(useRefDma), id_offset(start_id) {
-#ifdef __CERIUM_GPU__
-    gpu = new GpuThreds;
-#endif
-    threads = new pthread_t[cpu_num];
-    args    = new cpu_thread_arg_t[cpu_num];
-    wait	= new Sem(0);
-
-}
-
-CpuThreads::~CpuThreads()
-{
-    memaddr mail = (memaddr)MY_SPE_COMMAND_EXIT;
-
-    for (int i = 0; i < cpu_num; i++) {
-        send_mail(i, 1, &mail);
-    }
-
-    for (int i = 0; i < cpu_num; i++) {
-		pthread_join(threads[i], NULL);
-	}
-
-    for (int i = 0; i < cpu_num; i++) {
-    	delete args[i].scheduler;
-    }
-
-    delete [] threads;
-    delete [] args;
-#ifdef __CERIUM_GPU__
-    delete gpu;
-#endif
-}
-
-void *
-CpuThreads::cpu_thread_run(void *args)
-{
-    cpu_thread_arg_t *argt = (cpu_thread_arg_t *) args;
-    Scheduler *c_scheduler = argt->scheduler;
-
-    TaskManagerImpl *manager = new SpeTaskManagerImpl();
-    c_scheduler->init(manager,argt->useRefDma);
-    c_scheduler->id = (int)argt->cpuid;
-
-    manager->set_scheduler(c_scheduler);
-
-    SchedRegister(ShowTime);
-    SchedRegister(StartProfile);
-
-    argt->wait->sem_v();	//準備完了したスレッドができるたびに+1していく
-
-    c_scheduler->run(new SchedNop());
-    c_scheduler->finish();
-
-    return NULL;
-}
-
-void
-//CpuThreads::init()
-CpuThreads::init()
-{
-	for (int i = 0; i < cpu_num; i++) {
-		args[i].cpuid = i + id_offset;
-		args[i].scheduler = new MainScheduler();
-		args[i].wait = wait;
-		args[i].useRefDma = use_refdma;
-	}
-
-    for (int i = 0; i < cpu_num; i++) {
-	pthread_create(&threads[i], NULL,
-		      &cpu_thread_run, (void*)&args[i]);
-    }
-
-    for (int i = 0; i < cpu_num; i++) {
-    	wait->sem_p();
-    }
-}
-
-/**
- * このCPU からのメールを受信する。
- *
- * @param [cpuid] SPE ID
- *
- * @return Received 32-bit mailbox messages
- *         if ([ret] < 0) no data read
- */
-int
-CpuThreads::get_mail(int cpuid, int count, memaddr *ret)
-{  
-    
-    *ret = args[cpuid-id_offset].scheduler->mail_read_from_host();
-    return 1;
-}
-
-int
-CpuThreads::has_mail(int cpuid, int count, memaddr *ret)
-{
-	if (args[cpuid-id_offset].scheduler->has_mail_from_host() != 0) {
-		return get_mail(cpuid,count,ret);
-	} else {
-		return 0; //mailがないとき0を返す
-	}
-}
-
-/**
- * Inbound Mailbox
- * メール送信 Front End -> CPU
- *
- * なるべく NONBLOCKING なんだけど、
- * Inbound Mailbox キューに空きがないと送信できないので
- * 送信する数だけ空いているか確認してから送る。空いて無い場合は待つ。
- *
- * 結局待つんだよな。しかも ALL_BLOCKING って実は busy wait だったりするし
- *
- * @param [cpuid] SPE ID
- * @param [data] Send 32-bit mailbox messages
- * @param [num] The number of messages
- */
-void
-CpuThreads::send_mail(int cpuid, int num, memaddr *data)
-{
-    args[cpuid-id_offset].scheduler->mail_write_from_host(*data);
-}
-
-void
-CpuThreads::add_output_tasklist(int command, memaddr buff, int alloc_size)
-{
-    /*
-     * output TaskList が無ければ新しく作る
-     * あれば TaskList に allocate した Task を追加
-     * command に対応した Task の初期化を実行する
-     * SPE に data が書き出し終わった後に PPE 側で初期化
-     */
-
-}
-
-/* end */
--- a/TaskManager/kernel/ppe/CpuThreads.h	Thu Jul 19 11:56:35 2012 +0900
+++ b/TaskManager/kernel/ppe/CpuThreads.h	Thu Jul 19 14:03:49 2012 +0900
@@ -6,7 +6,9 @@
 #include "TaskManagerImpl.h"
 #include "MainScheduler.h"
 #include "Sem.h"
-
+#ifdef __CERIUM_GPU__
+#include "GpuThreads.h"
+#endif
 typedef struct cpu_arg {
     int cpuid;
     // should be syncrhonized
@@ -38,6 +40,9 @@
     int cpu_num;
 	int use_refdma;
     int id_offset;
+#ifdef __CERIUM_GPU__
+    GpuThreads gpu;
+#endif
 };
 
 #endif
--- a/TaskManager/kernel/ppe/CpuThreads.h.orig	Thu Jul 19 11:56:35 2012 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-#ifndef INCLUDED_CPU_THREADS
-#define INCLUDED_CPU_THREADS
-
-#include <pthread.h>
-#include "Threads.h"
-#include "TaskManagerImpl.h"
-#include "MainScheduler.h"
-#include "Sem.h"
-#ifdef __CERIUM_GPU__
-#include "GpuThreads.h"
-#endif
-typedef struct cpu_arg {
-    int cpuid;
-    // should be syncrhonized
-    MainScheduler *scheduler;
-    TaskManagerImpl *manager;
-    SemPtr wait;
-	int useRefDma;
-} cpu_thread_arg_t;
-
-class CpuThreads : public Threads {
-public:
-    /* constructor */
-    CpuThreads(int num = 1, int useRefDma = 0, int start_id = 0);
-    ~CpuThreads();
-    static void *cpu_thread_run(void *args);
-
-    /* functions */
-    virtual void init();
-    virtual int get_mail(int speid, int count, memaddr *ret); // BLOCKING
-    virtual int has_mail(int speid, int count, memaddr *ret); // NONBLOCK
-    virtual void send_mail(int speid, int num, memaddr *data); // BLOCKING
-    virtual void add_output_tasklist(int command, memaddr buff, int alloc_size);
-
-private:
-    /* variables */
-    pthread_t *threads;
-    cpu_thread_arg_t *args;
-    SemPtr wait; //スレッド生成時の待ち用
-    int cpu_num;
-	int use_refdma;
-    int id_offset;
-#ifdef __CERIUM_GPU__
-    GpuThreads gpu;
-#endif
-};
-
-#endif
Binary file TaskManager/test/GpuRegistTaskTest/GpuRegistTaskTest has changed
--- a/TaskManager/test/GpuRegistTaskTest/GpuRegistTaskTest.cc	Thu Jul 19 11:56:35 2012 +0900
+++ b/TaskManager/test/GpuRegistTaskTest/GpuRegistTaskTest.cc	Thu Jul 19 14:03:49 2012 +0900
@@ -1,105 +1,101 @@
 #include <stdio.h>
 #include <OpenCL/opencl.h>
-#include <fcntl.h>
-#include <sys/stat.h>
+#include "GpuThreads.h"
 #include "GpuScheduler.h"
 #include "CellTaskManagerImpl.h"
-#include "GpuThreads.h"
+
 #define DEFAULT 5
+extern void gpu_register_task(int, char*, char*);
 
-extern void gpu_register_task(int, const char*, const char*);
 void
-print_data(int *data, int size, const char *title)
-{
+print_data(int *data, int size, const char *title){
     printf("%s ---\n", title);
+
     for ( int i = 0; i < size; i++) {
         printf("%2d ", data[i]);
     }
+
     printf("\n");
 }
 
 void
 tester(int *indata,int *outdata, int num){
-
     //チェック
-    int check = 0;
-    for (int c=0; c<num; c++) {
-        if(outdata[c] == indata[c]*2) {
+    int check;
+    for (int c=0; c<num; c++){
+        if(outdata[c] == indata[c]*2){
             check++;
         }
+        printf("Computed '%d/%d' correct values\n",check,num);
     }
-    
-    printf("Computed '%d/%d' correct values\n",check,num);
-    
 }
 
 void
 test(int task_array_num){
+    
+    GpuThreads* gpuThreads = GpuThreads::getInstance();
+    gpuThreads->init();
 
-    GpuThreads* gputhreads = GpuThreads::getInstance();
-    gputhreads->init();
-       
-    int cmd = 1;
-    GpuSchedRegister(cmd, "twice.cl", "twice");
-
-
-    int *indata  = new int[task_array_num];
-    int *outdata = new int[task_array_num];
+    int *indata,*outdata;
     int count;
+    indata = (int *)malloc(sizeof(int)*task_array_num);
+    outdata = (int *)malloc(sizeof(int)*task_array_num);
     for (count=0; count < task_array_num ;count++) {
         indata[count] = count;
     }
-    print_data(indata, count, "before");
-    
+    printf("%d",count);
     
-    cl_int ret = gputhreads->ret;
-    cl_context context = gputhreads->context;
-    cl_mem memobj_in  = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int)*count, NULL, &ret);
-    cl_mem memobj_out = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int)*count, NULL, &ret);
-    cl_mem data_count = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int)*count, NULL, &ret);
+    GpuSchedRegister(1,"./twice.cl","Twice");
+
+
+    cl_int ret;
+    cl_context context = gpuThreads->context;
+    cl_mem memobj_in, memobj_out, data_count = NULL;
+    memobj_in  = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int)*count, NULL, &ret);
+    memobj_out = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int)*count, NULL, &ret);
+    data_count = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int)*count, NULL, &ret);
     
-    cl_command_queue command_queue = gputhreads->command_queue;
+    cl_command_queue command_queue = gpuThreads->command_queue;
     ret = clEnqueueWriteBuffer(command_queue, memobj_in, CL_TRUE, 0,
                                sizeof(int)*count, indata, 0, NULL, NULL);
     ret = clEnqueueWriteBuffer(command_queue, data_count, CL_TRUE, 0,
                                sizeof(count), &count, 0, NULL, NULL);
 
-    cl_kernel kernel = *(task_list[cmd].kernel);
-
-    clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobj_in);
+    print_data(indata, count, "before");
 
 
-    clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&memobj_out);
-    clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&data_count);
-    
-    cl_event ev = NULL;
-    clEnqueueTask(command_queue, kernel, 0, NULL,  &ev);
+    cl_kernel *kernel = task_list[1].kernel;    
+    clSetKernelArg(*kernel, 0, sizeof(cl_mem), (void *)&memobj_in);
+    clSetKernelArg(*kernel, 1, sizeof(cl_mem), (void *)&memobj_out);
+    clSetKernelArg(*kernel, 2, sizeof(cl_mem), (void *)&data_count);
+
+    cl_event ev;
+    clEnqueueTask(command_queue, *kernel, 0, NULL,  &ev);
     
     clEnqueueReadBuffer(command_queue, memobj_out, CL_TRUE, 0,
                         sizeof(int)*count, outdata, 1, &ev, NULL);
 
+
     print_data(outdata, count, "after");
+
+    free(indata);
+    free(outdata);
+    clReleaseCommandQueue(command_queue);
+    clReleaseContext(context);
+
     tester(indata,outdata,count);
 
-    delete [] indata;
-    delete [] outdata;
-    clReleaseCommandQueue(command_queue);
-    clReleaseContext(context);
-    clReleaseKernel(kernel);
-
-
+    delete gpuThreads;
 }
 
 int
 main(int argc, char* argv[])
 {   
-    int task_array_num = DEFAULT;
- 
-    if (argc > 1) { //引数が渡されていて、
-        if(atoi(argv[1])) {//数字なら
-            task_array_num = atoi(argv[1]);
-        }
+    int task_array_num;
+    if ( ((task_array_num = atoi(argv[1])) == 0)  || argc != 1 ){
+        // 無効な引数ならデフォルトの値として5を設定
+        task_array_num = DEFAULT;
     }
     test(task_array_num);
-    return 0;
+    printf("regist task succeed\n");
 }
--- a/TaskManager/test/GpuRegistTaskTest/GpuRegistTaskTest.cc.orig	Thu Jul 19 11:56:35 2012 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,101 +0,0 @@
-#include <stdio.h>
-#include <OpenCL/opencl.h>
-#include "GpuThreads.h"
-#include "GpuScheduler.h"
-#include "CellTaskManagerImpl.h"
-
-#define DEFAULT 5
-extern void gpu_register_task(int, char*, char*);
-
-void
-print_data(int *data, int size, const char *title){
-    printf("%s ---\n", title);
-
-    for ( int i = 0; i < size; i++) {
-        printf("%2d ", data[i]);
-    }
-
-    printf("\n");
-}
-
-void
-tester(int *indata,int *outdata, int num){
-    //チェック
-    int check;
-    for (int c=0; c<num; c++){
-        if(outdata[c] == indata[c]*2){
-            check++;
-        }
-        printf("Computed '%d/%d' correct values\n",check,num);
-    }
-}
-
-void
-test(int task_array_num){
-    
-    GpuThreads* gpuThreads = GpuThreads::getInstance();
-    gpuThreads->init();
-
-    int *indata,*outdata;
-    int count;
-    indata = (int *)malloc(sizeof(int)*task_array_num);
-    outdata = (int *)malloc(sizeof(int)*task_array_num);
-    for (count=0; count < task_array_num ;count++) {
-        indata[count] = count;
-    }
-    printf("%d",count);
-    
-    GpuSchedRegister(1,"./twice.cl","Twice");
-
-
-    cl_int ret;
-    cl_context context = gpuThreads->context;
-    cl_mem memobj_in, memobj_out, data_count = NULL;
-    memobj_in  = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int)*count, NULL, &ret);
-    memobj_out = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int)*count, NULL, &ret);
-    data_count = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int)*count, NULL, &ret);
-    
-    cl_command_queue command_queue = gpuThreads->command_queue;
-    ret = clEnqueueWriteBuffer(command_queue, memobj_in, CL_TRUE, 0,
-                               sizeof(int)*count, indata, 0, NULL, NULL);
-    ret = clEnqueueWriteBuffer(command_queue, data_count, CL_TRUE, 0,
-                               sizeof(count), &count, 0, NULL, NULL);
-
-    print_data(indata, count, "before");
-
-
-    cl_kernel *kernel = task_list[1].kernel;    
-    clSetKernelArg(*kernel, 0, sizeof(cl_mem), (void *)&memobj_in);
-    clSetKernelArg(*kernel, 1, sizeof(cl_mem), (void *)&memobj_out);
-    clSetKernelArg(*kernel, 2, sizeof(cl_mem), (void *)&data_count);
-
-    cl_event ev;
-    clEnqueueTask(command_queue, *kernel, 0, NULL,  &ev);
-    
-    clEnqueueReadBuffer(command_queue, memobj_out, CL_TRUE, 0,
-                        sizeof(int)*count, outdata, 1, &ev, NULL);
-
-
-    print_data(outdata, count, "after");
-
-    free(indata);
-    free(outdata);
-    clReleaseCommandQueue(command_queue);
-    clReleaseContext(context);
-
-    tester(indata,outdata,count);
-
-    delete gpuThreads;
-}
-
-int
-main(int argc, char* argv[])
-{   
-    int task_array_num;
-    if ( ((task_array_num = atoi(argv[1])) == 0)  || argc != 1 ){
-        // 無効な引数ならデフォルトの値として5を設定
-        task_array_num = DEFAULT;
-    }
-    test(task_array_num);
-    printf("regist task succeed\n");
-}
Binary file TaskManager/test/GpuRegistTaskTest/GpuRegistTaskTest.o has changed
--- a/TaskManager/test/GpuRunTest/GpuRunTest.cc.orig	Thu Jul 19 11:56:35 2012 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,92 +0,0 @@
-#include <stdio.h>
-#include <fcntl.h>
-#include <sys/stat.h>
-#include "TaskManager.h"
-#include "GpuScheduler.h"
-#include "GpuThreads.h"
-#include "Gpufunc.h"
-#include "types.h"
-#define DEFAULT 5
-
-char usr_help_str[] = "GpuRun [length]\n";
-
-extern void gpu_register_task(int, const char*, const char*);
-extern void task_init(void);
-
-void
-print_data(int *data, int size, const char *title)
-{
-    printf("%s ---\n", title);
-    for ( int i = 0; i < size; i++) {
-        printf("%2d ", data[i]);
-    }
-    printf("\n");
-}
-
-
-
-void
-tester(int *indata, int *outdata, int num) {
-    
-    //チェック
-    int check = 0;
-    for (int c=0; c<num; c++) {
-        if(outdata[c] == indata[c]*2) {
-            check++;
-        }
-    }
-    
-    printf("Computed '%d/%d' correct values\n",check,num);
-    
-}
-
-void
-task_init()
-{
-    int cmd = SchedRun;
-
-    GpuThreads* gputhreads = GpuThreads::getInstance();
-    gputhreads->init();
-
-    GpuSchedRegister(cmd, "twice.cl", "twice");
-}
-
-void
-test(TaskManager *manager, long int length) {
-    
-    int *indata  = (int *)manager->allocate(sizeof(int)*length);
-    int *outdata = new int[length];
-    int count;
-    for (count=0; count < length ;count++) {
-        indata[count] = count;
-    }
-    print_data(indata, count, "before");
-    
-    HTaskPtr schedtask = manager->create_task(SchedRun);
-    schedtask->set_inData(0, indata, sizeof (int)*length);
-    schedtask->set_outData(1, outdata, sizeof (int)*length);
-    schedtask->set_inData(2, &count, sizeof (int));
-    schedtask->set_cpu(SPE_ANY);   // これでは、GPU しか使えないではないか。
-    schedtask->spawn();
-    
-}
-    
-int
-TMmain(TaskManager *manager, int argc, char* argv[])
-{
-    long int length = DEFAULT;
- 
-    if (argc > 1) { //引数が渡されていて、
-        if(atoi(argv[1])) {//数字なら
-            length = atoi(argv[1]);
-        }
-    }
-
-    task_init();
-
-    test(manager, length);
-
-    return 0;
-}
-
-/* end */
Binary file example/Bulk/main.o has changed
Binary file example/Bulk/ppe/Twice.o has changed
Binary file example/Bulk/ppe/task_init.o has changed
Binary file example/Bulk/twice has changed
Binary file example/get_segment/mainMem has changed
Binary file example/get_segment/ppe/Hello.o has changed
Binary file example/get_segment/ppe/task_init.o has changed