Mercurial > hg > Members > kono > Cerium
view Renderer/Engine/spe/DrawSpan.cc @ 775:97a514cf6ad3 before-simple-task
simd
author | yutaka@henri.cr.ie.u-ryukyu.ac.jp |
---|---|
date | Tue, 16 Feb 2010 15:09:54 +0900 |
parents | dc26593f8c40 |
children | 4455e7b0caf3 |
line wrap: on
line source
#include <stdlib.h> #include <string.h> #include "DrawSpan.h" #include "polygon_pack.h" #include "texture.h" #include "viewer_types.h" #include "Func.h" #include "sys.h" #include "global_alloc.h" #include "SchedTask.h" #include "Tapestry.h" #include "SpanPack.h" #include <spu_intrinsics.h> #include <math.h> #if (__LITTLE_ENDIAN__) #define LITTLEENDIAN 1 #else #define LITTLEENDIAN 0 #endif SchedDefineTask(DrawSpan); #define TEX_LOAD1 0 #define TEX_LOAD2 1 #define SPAN_PACK_LOAD 2 #define FB_STORE 3 typedef struct { int **linebuf; float *zRow; TileListPtr tileList; int doneWrite; } G, *Gptr; static int** linebuf_init(SchedTask *smanager, int width, int height, int rgb); static float* zRow_init(SchedTask *smanager, int width, int height); // static TilePtr set_rgb(memaddr addr, int wait_tag); // static void set_rgbs(memaddr addr, uint32 *max_addr, int wait_tag); static uint32 get_rgb(int tx, int ty, TilePtr tile); // static TilePtr isAvailableTile(memaddr addr); static memaddr getTile(int tx, int ty, int tw, memaddr tex_addr_top); static int getTexBlock(int tx, int ty, int twidth); static void updateBuffer(Gptr g, float zpos, int rangex, int loaclx, int localy, int tex_x, int tex_y, float normal_x, float nomral_x, float normal_z, TilePtr tile, int world_x, int world_y, float world_z,SchedTask *smanager); // static void reboot(SpanPackPtr spack, int cur_span_x); static int drawDot1(SchedTask *smanager, Gptr g, SpanPtr span, int startx, int endx, int wait_tag); //static void drawDot2(SchedTask *smanager, SpanPtr span, int startx, int endx, int js, int wait_tag); static int drawLine1(SchedTask *smanager, Gptr g, SpanPtr span, int startx, int endx, int wait_tag); // static void drawLine2(SchedTask *smanager, SpanPtr span, int startx, int endx, int js, int wait_tag); static int infinity_light_calc(int color,float normal_x, float normal_y, float normal_z, SchedTask *smanager,int x, int y, float z, int world_x, int world_y, float world_z); static inline void normalize(vector float& v0, vector float& v1) { float norm; vector float ret __attribute__((aligned(16))) = {0,0,0,0}; ret = spu_mul(v0,v1); norm = (ret[0] + ret[1] + ret[2]); ret = (vector float)spu_splats(norm); ret = spu_rsqrte(ret); v0 = spu_mul(v1,ret); } static inline float innerProduct1(vector float& v0, vector float& v1) { vector float ret __attribute__((aligned(16))) = {0,0,0,0}; float inner; ret = spu_mul(v0,v1); inner = (ret[0] + ret[1] + ret[2]); if (inner < 0) { inner = 0; } return inner; } /** * テクスチャは、TEXTURE_SPLIT_PIXEL^2 のブロックに分割する * * +---+---+---+---+---+---+ * | 0 | 1 | 2 | 3 | 4 | 5 | * +---+---+---+---+---+---+ * | | | | | |11 | * +---+---+---+---+---+---+ * | | | | | |17 | * +---+---+---+---+---+---+ * | | | | | |23 | * +---+---+---+---+---+---+ * | | | | | |29 | * +---+---+---+---+---+---+ * | | | | | |35 | * +---+---+---+---+---+---+ * * 一辺を TEXTURE_SPLIT とする * 各ブロックの数字がブロックIDとなる。 */ /** * テクスチャの座標から、 * テクスチャのどのブロックかを求める * * @param[in] tx X coordinates of texture * @param[in] tx Y coordinates of texture * @param[in] twidth Width of texture * @return block ID */ static int getTexBlock(int tx, int ty, int twidth) { int blockX, blockY; blockX = tx / TEXTURE_SPLIT_PIXEL; blockY = ty / TEXTURE_SPLIT_PIXEL; return blockX + (twidth/TEXTURE_SPLIT_PIXEL)*blockY; } /** * block ID と、テクスチャの TOP address から * (tx,ty) で使われるテクスチャの Tile addres を求める * * @param[in] tx X coordinates of texture * @param[in] tx Y coordinates of texture * @param[in] tw Width of texture * @param[in] tex_addr_top (tx,ty) で使うテクスチャの先頭address * @return block ID */ static memaddr getTile(int tx, int ty, int tw, memaddr tex_addr_top) { int block = getTexBlock(tx, ty, tw); return tex_addr_top + block * TEXTURE_BLOCK_SIZE * sizeof(uint32); } /** * FrameBuffer に書き込む rgb の領域初期化 * * @param width Width of Buffer * @param height Height of Buffer * @param rgb Initial value of RGB at Buffer * @return Buffer */ static int ** linebuf_init(SchedTask *smanager, int width, int height, int rgb) { int **linebuf = (int**)smanager->allocate(height*sizeof(int*)); for(int h = 0;h<height;h++) { int *buf = linebuf[h] = (int*)smanager->get_output(h); for (int i = 0; i < width; i++) { buf[i] = rgb; } } return linebuf; } /** * Z-Buffer の初期化 * * @param width Width of Z-Buffer * @param height Height of Z-Buffer * @return Z-Buffer */ static float* zRow_init(SchedTask *smanager, int width, int height) { float *buf = (float*)smanager->allocate(sizeof(float)*width*height); float def = 65535.0f; for (int i = 0; i < width*height; i++) { buf[i] = def; } return buf; } static uint32 get_rgb(int tx, int ty, TilePtr tile) { uint32 *data = (uint32 *)tile->data; return data[(TEXTURE_SPLIT_PIXEL)*ty+tx]; } /** * zRow と Linebuf を更新する * * @param zpos 更新する pixel のZ座標 * @param rangex このタスクが処理する描画領域の x の長さ * @param x pixel の、描画領域内での x 座標 * @param y 〃 の、y 座標 * @param tex_x pixel が使用するテクスチャの、Tile (8x8) 内での x 座標 * @param tex_y 〃 の y 座標 * @param tex_addr テクスチャのアドレス(MainMemory) */ static void updateBuffer(Gptr g, float zpos, int rangex, int localx, int localy, int tex_x, int tex_y, float normal_x, float normal_y, float normal_z, TilePtr tile, int world_x, int world_y, float world_z, SchedTask *smanager) { int color = get_rgb(tex_x, tex_y, tile); /*下位4bitを抽出*/ #if LITTLEENDIAN int alpha = color & 0x000000ff; #else int alpha = color & 0xff000000; #endif /*完全に透けているか判断*/ int flag = (alpha != 0); color = infinity_light_calc(color,normal_x,normal_y,normal_z, smanager,localx,localy,zpos, world_x,world_y,world_z); g->zRow[localx + (rangex*localy)] = zpos*flag + g->zRow[localx + (rangex*localy)]*(1-flag); int *point = &g->linebuf[localy][localx] ; *point = color*flag + *point *(1-flag); } /** * 長さが 1 の Span の描画 (要するに 1 pixel) * * @param span Span * @param startx 描画開始範囲 * @param endx 描画終了範囲 */ static int drawDot1(SchedTask *smanager, Gptr g, SpanPtr span, int startx, int endx, int wait_tag) { int rangex = endx - startx + 1; float normal_x = span->normal_x; float normal_y = span->normal_y; float normal_z = span->normal_z; /* span->x に対応する Texture の座標 (tex_xpos, tex_ypos) */ int tex_xpos, tex_ypos; // span の始点に対応する Texture の座標 (tex1, tey1) float tex = span->tex_x1; float tey = span->tex_y1; // span の始点に対応する z 座標 float zpos = span->start_z; /* Tile 内での座標 */ int localx = getLocalX(span->x-1); int localy = getLocalY(span->y-1); /** * (tex_xpos, tex_ypos) の、Tile 内(上の図参照)での座標と * そのブロックのアドレス(MainMemory) */ int tex_localx; int tex_localy; memaddr tex_addr; if (span->x < startx || endx < span->x) { return -1; } tex_xpos = (int)((span->tex_width-1) * tex); tex_ypos = (int)((span->tex_height-1) * tey); if (zpos < g->zRow[localx + (rangex*localy)]) { tex_addr = getTile(tex_xpos, tex_ypos, span->tex_width, (memaddr)span->tex_addr); tex_localx = tex_xpos % TEXTURE_SPLIT_PIXEL; tex_localy = tex_ypos % TEXTURE_SPLIT_PIXEL; TilePtr tile = smanager->get_segment(tex_addr,g->tileList); smanager->wait_segment(tile); updateBuffer(g, zpos, rangex, localx, localy, tex_localx, tex_localy, normal_x,normal_y,normal_z,tile, span->x, span->y, zpos, smanager); } return -1; } #if 0 static void drawDot2(SchedTask *smanager, SpanPtr span, int startx, int end, int js, int wait_tag) { //printf("%d\n", js); } #endif /** * 長さが 1 より大きい Span の描画 * * 本来の目的として、この関数(drawLine1) では * : 既に SPE 上に Tile のある pixel だけ描画 * : それ以外は、ここで予め DMA load しておき、 * : drawLine2 で一気に描画する * ってものだったんだけど、どうも上手く行かなかったので * 今は drawLine1 で load -> wait -> rendering を全部やってます * (rendering といっても、rendering buffer に書き込むだけで * まだ main memory (frame buffer) に dma store してるわけではない) * * @param span Span * @param startx 描画開始範囲 * @param endx 描画終了範囲 * @return 「span のどの位置まで rendering が終わったか」の x 座標 */ static int drawLine1(SchedTask *smanager, Gptr g, SpanPtr span, int startx, int endx, int wait_tag) { int x = span->x; int rangex = endx - startx + 1; int x_len = span->length_x; float normal_x = span->normal_x; float normal_y = span->normal_y; float normal_z = span->normal_z; int js = (x < startx) ? startx - x : 0; int je = (x + x_len > endx) ? endx - x : x_len; /* span->x に対応する Texture の座標 (tex_xpos, tex_ypos) */ int tex_xpos, tex_ypos; // span の始点に対応する座標 (tex1, tey1) float tex1 = span->tex_x1; float tey1 = span->tex_y1; // span の終点に対応する座標 (tex2, tey2) float tex2 = span->tex_x2; float tey2 = span->tex_y2; // span の始点、終点に対応する z 座標 float zpos1 = span->start_z; float zpos2 = span->end_z; //spanを右から左に見ていくうちに、zが下がるのか、上がっていくのか。 float z_inclination = (zpos2 - zpos1) / x_len; float world_z = zpos2; // Tile 内での座標 int localx, localy = getLocalY(span->y-1); int ret = je+1; //for (int j = js; j <= je; j++) { for (int j = je; j >= js; j--) { float tex_x, tex_y, tex_z; world_z -= z_inclination; localx = getLocalX(x-1+j); tex_z = zpos1*(x_len-1-j)/(x_len-1) + zpos2*j/(x_len-1); tex_x = tex1*(x_len-1-j)/(x_len-1) + tex2*j/(x_len-1); tex_y = tey1*(x_len-1-j)/(x_len-1) + tey2*j/(x_len-1); if (tex_x > 1) tex_x = 1; if (tex_x < 0) tex_x = 0; if (tex_y > 1) tex_y = 1; if (tex_y < 0) tex_y = 0; tex_xpos = (int)((span->tex_width-1) * tex_x); tex_ypos = (int)((span->tex_height-1) * tex_y); if (tex_z < g->zRow[localx + (rangex*localy)]) { // (tex_xpos, tex_ypos) の、Tile 内(上の図参照)での座標と // そのブロックのアドレス(MainMemory) memaddr tex_addr; int tex_localx; int tex_localy; tex_addr = getTile(tex_xpos, tex_ypos, span->tex_width, (memaddr)span->tex_addr); tex_localx = tex_xpos % TEXTURE_SPLIT_PIXEL; tex_localy = tex_ypos % TEXTURE_SPLIT_PIXEL; TilePtr tile = smanager->get_segment(tex_addr,g->tileList); smanager->wait_segment(tile); updateBuffer(g, tex_z, rangex, localx, localy, tex_localx, tex_localy, normal_x, normal_y, normal_z, tile, span->x+j, span->y, world_z, smanager); } } return ret; } static int infinity_light_calc(int color,float normal_x, float normal_y, float normal_z, SchedTask *smanager, int x, int y, float z, int world_x, int world_y, float world_z) { unsigned char rgb[4]; // 引数で受け取った color の rgb 情報の抜き出し #if LITTLEENDIAN rgb[0] = (color & 0xff000000) >> 24; rgb[1] = (color & 0x00ff0000) >> 16; rgb[2] = (color & 0x0000ff00) >> 8; rgb[3] = (color & 0x000000ff); #else rgb[3] = (color & 0xff000000) >> 24; rgb[2] = (color & 0x00ff0000) >> 16; rgb[1] = (color & 0x0000ff00) >> 8; rgb[0] = (color & 0x000000ff); #endif vector float *light_xyz = (vector float*)smanager->global_get(Light); vector float v_rgb __attribute__((aligned(16))) = {(float)rgb[0],(float)rgb[1],(float)rgb[2],0}; vector float normal_vector __attribute__((aligned(16))) = {normal_x,normal_y,normal_z,0}; vector float light_vector __attribute__((aligned(16))) = {0,0,0,0}; vector float v_inner __attribute__((aligned(16))); vector float v_world[4] __attribute__((aligned(16))) = {{world_x, world_y, -world_z, 0}, {world_x, world_y, -world_z, 0}, {world_x, world_y, -world_z, 0}, {0, 0, 0, 0}}; int light_rgb; float inner_product; normalize(normal_vector, normal_vector); vector float vtmp_rgb __attribute__((aligned(16))) = {0,0,0,0}; int light_num = 4; for (int i = 0; i < light_num; i++) { light_vector = spu_sub(v_world[i],light_xyz[i]); normalize(light_vector, light_vector); inner_product = innerProduct1(normal_vector,light_vector); v_inner = spu_splats(inner_product); vtmp_rgb = spu_madd(v_rgb,v_inner,vtmp_rgb); } vector unsigned int v_flag __attribute__((aligned(16))); vector float max_rgb __attribute__((aligned(16))) = (vector float)spu_splats((float)255.0f); v_flag = spu_cmpgt(max_rgb,vtmp_rgb); vtmp_rgb = spu_sel(max_rgb,vtmp_rgb,v_flag); vector unsigned int last_rgb __attribute__((aligned(16))); last_rgb = spu_convtu(vtmp_rgb,0); //計算した rgb を light_rgb にまとめる。 #if LITTLEENDIAN light_rgb = (last_rgb[0] << 24) + (last_rgb[1] << 16) + (last_rgb[2] << 8) + (last_rgb[3]); #else light_rgb = (last_rgb[3] << 24) + (last_rgb[2] << 16) + (last_rgb[1] << 8) + (last_rgb[0]); #endif return light_rgb; } static int run(SchedTask *smanager, void *rbuf, void *wbuf) { G g0; Gptr g = &g0; SpanPackPtr spack = (SpanPackPtr)smanager->get_input(0); SpanPackPtr next_spack = (SpanPackPtr)smanager->allocate(sizeof(SpanPack)); SpanPackPtr free_spack = next_spack; // next_spack の free() 用 Span *span; Span nop_span; nop_span.length_x = 1; // uint32 display = (long)smanager->get_param(0); // int screen_width = (long)smanager->get_param(1); int rangex_start = (long)smanager->get_param(2); int rangex_end = (long)smanager->get_param(3); // このタスクが担当する x の範囲 int rangex = rangex_end - rangex_start + 1; // y の範囲 int rangey = (long)smanager->get_param(4); g->tileList = (TileListPtr)smanager->global_get(GLOBAL_TILE_LIST); g->zRow = zRow_init(smanager, rangex, rangey); g->linebuf = linebuf_init(smanager, rangex, rangey, 0); int tl_tag[2] = {TEX_LOAD1, TEX_LOAD2}; int tl_tag_flg1 = 0; int tl_tag_flg2 = 1; do { /** * SpanPack->next が存在する場合、 * 現在の SpanPack を処理してる間に * 次の SpanPack の DMA 転送を行う */ if (spack->next != NULL) { smanager->dma_load(next_spack, (memaddr)spack->next, sizeof(SpanPack), SPAN_PACK_LOAD); } else { next_spack = NULL; } SpanPtr resume_span = &nop_span; for (int t = 0; t < spack->info.size; t++) { SpanPtr next_span; span = &spack->span[t]; if (span->length_x != 1) { drawLine1( smanager, g, span, rangex_start, rangex_end, tl_tag[tl_tag_flg1]); } else { drawDot1( smanager, g, span, rangex_start, rangex_end, tl_tag[tl_tag_flg1]); } next_span = span; resume_span = next_span; tl_tag_flg1 ^= 1; tl_tag_flg2 ^= 1; } smanager->dma_wait(SPAN_PACK_LOAD); SpanPackPtr tmp_spack = spack; spack = next_spack; next_spack = tmp_spack; } while (spack); free(g->zRow); free(g->linebuf); //FINISH: /** * goto FINISH; の時は reboot なので * linebuf, zRow は free() しない */ free(free_spack); return 0; } /* end */