Mercurial > hg > Game > Cerium
changeset 764:f1d92ef5cbaa draft
simd
author | yutaka@henri.cr.ie.u-ryukyu.ac.jp |
---|---|
date | Wed, 10 Feb 2010 19:33:27 +0900 |
parents | 4cc05c023ff9 |
children | be8ffe653840 |
files | Renderer/Engine/SceneGraphRoot.cc Renderer/Engine/spe/DrawSpan.cc |
diffstat | 2 files changed, 70 insertions(+), 56 deletions(-) [+] |
line wrap: on
line diff
--- a/Renderer/Engine/SceneGraphRoot.cc Sun Feb 07 17:52:52 2010 +0900 +++ b/Renderer/Engine/SceneGraphRoot.cc Wed Feb 10 19:33:27 2010 +0900 @@ -382,6 +382,10 @@ light_vector[i*4] /= light_vector[i*4+2]; light_vector[i*4+1] /= light_vector[i*4+2]; + /*SIMD演算のため*/ + light_vector[i*4+2] *= -1; + light_vector[i*4+3] *= -1; + }
--- a/Renderer/Engine/spe/DrawSpan.cc Sun Feb 07 17:52:52 2010 +0900 +++ b/Renderer/Engine/spe/DrawSpan.cc Wed Feb 10 19:33:27 2010 +0900 @@ -10,6 +10,7 @@ #include "SchedTask.h" #include "Tapestry.h" #include "SpanPack.h" +#include <spu_intrinsics.h> #include <math.h> #if (__LITTLE_ENDIAN__) @@ -57,27 +58,39 @@ void -normalize(float *v0, float *v1) +normalize(vector float *v0, vector float *v1) { - float norm, dnorm; + float norm; + float ret[4] __attribute__((aligned(16))); + vector float *vret = (vector float *) ret; + *vret = spu_mul(*v0,*v1); - norm = sqrt(v1[0]*v1[0] + v1[1]*v1[1] + v1[2]*v1[2]); - if (norm > 0) { - dnorm = 1.0/norm; - v0[0] = v1[0]*dnorm; - v0[1] = v1[1]*dnorm; - v0[2] = v1[2]*dnorm; - v0[3] = v1[3]*dnorm; - } + norm = (ret[0] + ret[1] + ret[2]); + + *vret = (vector float)spu_splats(norm); + *vret = spu_rsqrte(*vret); + *v0 = spu_mul(*v1,*vret); } static float -innerProduct1(float *v0, float *v1) +innerProduct1(vector float *v0, vector float *v1) { - return (v0[0]*v1[0] + v0[1]*v1[1] + v0[2]*v1[2]); + + float ret[4] __attribute__((aligned(16))); + float inner; + vector float *vret = (vector float *) ret; + *vret = spu_mul(*v0,*v1); + + inner = (ret[0] + ret[1] + ret[2]); + if (inner < 0) { + inner = 0; + } + + return inner; } + /** * テクスチャは、TEXTURE_SPLIT_PIXEL^2 のブロックに分割する * @@ -403,17 +416,8 @@ int world_x, int world_y, float world_z) { + unsigned char rgb[4]; - int light_rgb; - int flag; - float normal_vector[4] = {normal_x,normal_y,normal_z,0}; - // 光のベクトル,きめうちしちゃった。どうにかする - //float light_vector[4] = {0,0,-1,0}; - float light_vector[4]; - float inner_product; - float *light_xyz = (float*)smanager->global_get(Light); - - normalize(normal_vector, normal_vector); // 引数で受け取った color の rgb 情報の抜き出し #if LITTLEENDIAN @@ -428,52 +432,58 @@ rgb[0] = (color & 0x000000ff); #endif - int tmp_rgb[3] = {0,0,0}; + + vector float v_rgb __attribute__((aligned(16))) = {(float)rgb[0],(float)rgb[1],(float)rgb[2],0}; + int light_rgb; + float normal_vector[4] __attribute__((aligned(16))) = {normal_x,normal_y,normal_z,0}; + vector float *vnormal_vector = (vector float *) normal_vector; + float light_vector[4]; + vector float *vlight_vector = (vector float *) light_vector; + float inner_product; + float *light_xyz = (float*)smanager->global_get(Light); + vector float *vlight_xyz = (vector float *) light_xyz; + vector float v_inner __attribute__((aligned(16))); + + vector float v_world[4] __attribute__((aligned(16))) = {{world_x, world_y, -world_z, 0}, + {world_x, world_y, -world_z, 0}, + {world_x, world_y, -world_z, 0}, + {0, 0, 0, 0}}; + + + normalize(vnormal_vector, vnormal_vector); + + + float tmp_rgb[4] __attribute__((aligned(16))) = {0,0,0,0}; + vector float *vtmp_rgb = (vector float *) tmp_rgb; + int light_num = 4; + for (int i = 0; i < light_num; i++) { - //printf("light_xyz[%d] %f\n",i*4,light_xyz[i*4]); - //printf("light_xyz[%d] %f\n",i*4+1,light_xyz[i*4+1]); - //printf("light_xyz[%d] %f\n",i*4+2,light_xyz[i*4+2]); - //printf("light_xyz[%d] %f\n",i*4+3fg,light_xyz[i*4+3]); - - light_vector[0] = world_x - light_xyz[i*4]; - light_vector[1] = world_y - light_xyz[i*4+1]; - light_vector[2] = light_xyz[i*4+2] - world_z; - light_vector[3] = light_xyz[i*4+3]; - - normalize(light_vector, light_vector); - - // 法線ベクトルと光源ベクトルとの内積をとる - inner_product = innerProduct1(normal_vector,light_vector); - - //printf("inner_product %f\n",inner_product); - - // 内積がマイナスの場合は色がない。 - flag = (inner_product > 0); - - // 内積を rgb にかけていく - tmp_rgb[0] += (unsigned char)(rgb[0]*inner_product*flag); - tmp_rgb[1] += (unsigned char)(rgb[1]*inner_product*flag); - tmp_rgb[2] += (unsigned char)(rgb[2]*inner_product*flag); + *vlight_vector = spu_sub(v_world[i],vlight_xyz[i]); + normalize(vlight_vector, vlight_vector); + inner_product = innerProduct1(vnormal_vector,vlight_vector); + v_inner = spu_splats(inner_product); + *vtmp_rgb = spu_madd(v_rgb,v_inner,*vtmp_rgb); } - int rgb_flag[3]; - for (int i = 0; i < 3; i++) { - rgb_flag[i] = (tmp_rgb[i] > 255); - } + vector unsigned int v_flag __attribute__((aligned(16))); + vector float max_rgb __attribute__((aligned(16))) = (vector float)spu_splats((float)255); - rgb[0] = tmp_rgb[0]*(1 - rgb_flag[0]) + 255*(rgb_flag[0]); - rgb[1] = tmp_rgb[1]*(1 - rgb_flag[1]) + 255*(rgb_flag[1]); - rgb[2] = tmp_rgb[2]*(1 - rgb_flag[2]) + 255*(rgb_flag[2]); + v_flag = spu_cmpgt(max_rgb,*vtmp_rgb); + *vtmp_rgb = spu_sel(max_rgb,*vtmp_rgb,v_flag); + + vector unsigned int vlast_rgb __attribute__((aligned(16))); + vlast_rgb = spu_convtu(*vtmp_rgb,0); + unsigned int *last_rgb = (unsigned int*) &vlast_rgb; //計算した rgb を light_rgb にまとめる。 #if LITTLEENDIAN - light_rgb = (rgb[0] << 24) + (rgb[1] << 16) + (rgb[2] << 8) + (rgb[3]); + light_rgb = (last_rgb[0] << 24) + (last_rgb[1] << 16) + (last_rgb[2] << 8) + (last_rgb[3]); #else - light_rgb = (rgb[3] << 24) + (rgb[2] << 16) + (rgb[1] << 8) + (rgb[0]); + light_rgb = (last_rgb[3] << 24) + (last_rgb[2] << 16) + (last_rgb[1] << 8) + (last_rgb[0]); #endif return light_rgb;