Mercurial > hg > Members > kono > Cerium
changeset 775:97a514cf6ad3 before-simple-task
simd
author | yutaka@henri.cr.ie.u-ryukyu.ac.jp |
---|---|
date | Tue, 16 Feb 2010 15:09:54 +0900 |
parents | 5102e1751d6f |
children | 4455e7b0caf3 |
files | Renderer/Engine/spe/DrawSpan.cc |
diffstat | 1 files changed, 43 insertions(+), 48 deletions(-) [+] |
line wrap: on
line diff
--- a/Renderer/Engine/spe/DrawSpan.cc Tue Feb 16 15:00:50 2010 +0900 +++ b/Renderer/Engine/spe/DrawSpan.cc Tue Feb 16 15:09:54 2010 +0900 @@ -57,40 +57,40 @@ SchedTask *smanager,int x, int y, float z, int world_x, int world_y, float world_z); -void -normalize(vector float *v0, vector float *v1) + +static inline void +normalize(vector float& v0, vector float& v1) { - float norm; - float ret[4] __attribute__((aligned(16))); - vector float *vret = (vector float *) ret; - *vret = spu_mul(*v0,*v1); + float norm; + vector float ret __attribute__((aligned(16))) = {0,0,0,0}; - norm = (ret[0] + ret[1] + ret[2]); - - *vret = (vector float)spu_splats(norm); - *vret = spu_rsqrte(*vret); - *v0 = spu_mul(*v1,*vret); + ret = spu_mul(v0,v1); + norm = (ret[0] + ret[1] + ret[2]); + + ret = (vector float)spu_splats(norm); + ret = spu_rsqrte(ret); + v0 = spu_mul(v1,ret); } -static float -innerProduct1(vector float *v0, vector float *v1) +static inline float +innerProduct1(vector float& v0, vector float& v1) { - float ret[4] __attribute__((aligned(16))); - float inner; - vector float *vret = (vector float *) ret; - *vret = spu_mul(*v0,*v1); - - inner = (ret[0] + ret[1] + ret[2]); - if (inner < 0) { - inner = 0; - } - - return inner; + vector float ret __attribute__((aligned(16))) = {0,0,0,0}; + float inner; + ret = spu_mul(v0,v1); + + inner = (ret[0] + ret[1] + ret[2]); + if (inner < 0) { + inner = 0; + } + + return inner; } + /** * テクスチャは、TEXTURE_SPLIT_PIXEL^2 のブロックに分割する * @@ -433,50 +433,45 @@ #endif + vector float *light_xyz = (vector float*)smanager->global_get(Light); + vector float v_rgb __attribute__((aligned(16))) = {(float)rgb[0],(float)rgb[1],(float)rgb[2],0}; - int light_rgb; - float normal_vector[4] __attribute__((aligned(16))) = {normal_x,normal_y,normal_z,0}; - vector float *vnormal_vector = (vector float *) normal_vector; - float light_vector[4]; - vector float *vlight_vector = (vector float *) light_vector; - float inner_product; - float *light_xyz = (float*)smanager->global_get(Light); - vector float *vlight_xyz = (vector float *) light_xyz; + vector float normal_vector __attribute__((aligned(16))) = {normal_x,normal_y,normal_z,0}; + vector float light_vector __attribute__((aligned(16))) = {0,0,0,0}; vector float v_inner __attribute__((aligned(16))); - vector float v_world[4] __attribute__((aligned(16))) = {{world_x, world_y, -world_z, 0}, {world_x, world_y, -world_z, 0}, {world_x, world_y, -world_z, 0}, {0, 0, 0, 0}}; - normalize(vnormal_vector, vnormal_vector); - + int light_rgb; + float inner_product; - float tmp_rgb[4] __attribute__((aligned(16))) = {0,0,0,0}; - vector float *vtmp_rgb = (vector float *) tmp_rgb; + normalize(normal_vector, normal_vector); + + vector float vtmp_rgb __attribute__((aligned(16))) = {0,0,0,0}; int light_num = 4; for (int i = 0; i < light_num; i++) { - *vlight_vector = spu_sub(v_world[i],vlight_xyz[i]); - normalize(vlight_vector, vlight_vector); - inner_product = innerProduct1(vnormal_vector,vlight_vector); + light_vector = spu_sub(v_world[i],light_xyz[i]); + normalize(light_vector, light_vector); + inner_product = innerProduct1(normal_vector,light_vector); v_inner = spu_splats(inner_product); - *vtmp_rgb = spu_madd(v_rgb,v_inner,*vtmp_rgb); + vtmp_rgb = spu_madd(v_rgb,v_inner,vtmp_rgb); } vector unsigned int v_flag __attribute__((aligned(16))); - vector float max_rgb __attribute__((aligned(16))) = (vector float)spu_splats((float)255); + vector float max_rgb __attribute__((aligned(16))) = (vector float)spu_splats((float)255.0f); - v_flag = spu_cmpgt(max_rgb,*vtmp_rgb); - *vtmp_rgb = spu_sel(max_rgb,*vtmp_rgb,v_flag); - - vector unsigned int vlast_rgb __attribute__((aligned(16))); - vlast_rgb = spu_convtu(*vtmp_rgb,0); - unsigned int *last_rgb = (unsigned int*) &vlast_rgb; + v_flag = spu_cmpgt(max_rgb,vtmp_rgb); + vtmp_rgb = spu_sel(max_rgb,vtmp_rgb,v_flag); + + vector unsigned int last_rgb __attribute__((aligned(16))); + last_rgb = spu_convtu(vtmp_rgb,0); //計算した rgb を light_rgb にまとめる。