Mercurial > hg > Members > kono > Cerium
comparison Renderer/Engine/spe/DrawSpan.cc @ 775:97a514cf6ad3 before-simple-task
simd
author | yutaka@henri.cr.ie.u-ryukyu.ac.jp |
---|---|
date | Tue, 16 Feb 2010 15:09:54 +0900 |
parents | dc26593f8c40 |
children | 4455e7b0caf3 |
comparison
equal
deleted
inserted
replaced
774:5102e1751d6f | 775:97a514cf6ad3 |
---|---|
55 | 55 |
56 static int infinity_light_calc(int color,float normal_x, float normal_y, float normal_z, | 56 static int infinity_light_calc(int color,float normal_x, float normal_y, float normal_z, |
57 SchedTask *smanager,int x, int y, float z, int world_x, int world_y, float world_z); | 57 SchedTask *smanager,int x, int y, float z, int world_x, int world_y, float world_z); |
58 | 58 |
59 | 59 |
60 void | 60 |
61 normalize(vector float *v0, vector float *v1) | 61 static inline void |
62 { | 62 normalize(vector float& v0, vector float& v1) |
63 float norm; | 63 { |
64 float ret[4] __attribute__((aligned(16))); | 64 float norm; |
65 vector float *vret = (vector float *) ret; | 65 vector float ret __attribute__((aligned(16))) = {0,0,0,0}; |
66 *vret = spu_mul(*v0,*v1); | 66 |
67 | 67 ret = spu_mul(v0,v1); |
68 norm = (ret[0] + ret[1] + ret[2]); | 68 norm = (ret[0] + ret[1] + ret[2]); |
69 | 69 |
70 *vret = (vector float)spu_splats(norm); | 70 ret = (vector float)spu_splats(norm); |
71 *vret = spu_rsqrte(*vret); | 71 ret = spu_rsqrte(ret); |
72 *v0 = spu_mul(*v1,*vret); | 72 v0 = spu_mul(v1,ret); |
73 } | 73 } |
74 | 74 |
75 static float | 75 static inline float |
76 innerProduct1(vector float *v0, vector float *v1) | 76 innerProduct1(vector float& v0, vector float& v1) |
77 { | 77 { |
78 | 78 |
79 float ret[4] __attribute__((aligned(16))); | 79 vector float ret __attribute__((aligned(16))) = {0,0,0,0}; |
80 float inner; | 80 float inner; |
81 vector float *vret = (vector float *) ret; | 81 ret = spu_mul(v0,v1); |
82 *vret = spu_mul(*v0,*v1); | 82 |
83 | 83 inner = (ret[0] + ret[1] + ret[2]); |
84 inner = (ret[0] + ret[1] + ret[2]); | 84 if (inner < 0) { |
85 if (inner < 0) { | 85 inner = 0; |
86 inner = 0; | 86 } |
87 } | 87 |
88 | 88 return inner; |
89 return inner; | 89 } |
90 } | 90 |
91 | 91 |
92 | 92 |
93 | 93 |
94 /** | 94 /** |
95 * テクスチャは、TEXTURE_SPLIT_PIXEL^2 のブロックに分割する | 95 * テクスチャは、TEXTURE_SPLIT_PIXEL^2 のブロックに分割する |
431 rgb[1] = (color & 0x0000ff00) >> 8; | 431 rgb[1] = (color & 0x0000ff00) >> 8; |
432 rgb[0] = (color & 0x000000ff); | 432 rgb[0] = (color & 0x000000ff); |
433 #endif | 433 #endif |
434 | 434 |
435 | 435 |
436 vector float *light_xyz = (vector float*)smanager->global_get(Light); | |
437 | |
436 vector float v_rgb __attribute__((aligned(16))) = {(float)rgb[0],(float)rgb[1],(float)rgb[2],0}; | 438 vector float v_rgb __attribute__((aligned(16))) = {(float)rgb[0],(float)rgb[1],(float)rgb[2],0}; |
437 int light_rgb; | 439 vector float normal_vector __attribute__((aligned(16))) = {normal_x,normal_y,normal_z,0}; |
438 float normal_vector[4] __attribute__((aligned(16))) = {normal_x,normal_y,normal_z,0}; | 440 vector float light_vector __attribute__((aligned(16))) = {0,0,0,0}; |
439 vector float *vnormal_vector = (vector float *) normal_vector; | |
440 float light_vector[4]; | |
441 vector float *vlight_vector = (vector float *) light_vector; | |
442 float inner_product; | |
443 float *light_xyz = (float*)smanager->global_get(Light); | |
444 vector float *vlight_xyz = (vector float *) light_xyz; | |
445 vector float v_inner __attribute__((aligned(16))); | 441 vector float v_inner __attribute__((aligned(16))); |
446 | |
447 vector float v_world[4] __attribute__((aligned(16))) = {{world_x, world_y, -world_z, 0}, | 442 vector float v_world[4] __attribute__((aligned(16))) = {{world_x, world_y, -world_z, 0}, |
448 {world_x, world_y, -world_z, 0}, | 443 {world_x, world_y, -world_z, 0}, |
449 {world_x, world_y, -world_z, 0}, | 444 {world_x, world_y, -world_z, 0}, |
450 {0, 0, 0, 0}}; | 445 {0, 0, 0, 0}}; |
451 | 446 |
452 | 447 |
453 normalize(vnormal_vector, vnormal_vector); | 448 int light_rgb; |
454 | 449 float inner_product; |
455 | 450 |
456 float tmp_rgb[4] __attribute__((aligned(16))) = {0,0,0,0}; | 451 normalize(normal_vector, normal_vector); |
457 vector float *vtmp_rgb = (vector float *) tmp_rgb; | 452 |
453 vector float vtmp_rgb __attribute__((aligned(16))) = {0,0,0,0}; | |
458 | 454 |
459 int light_num = 4; | 455 int light_num = 4; |
460 | 456 |
461 for (int i = 0; i < light_num; i++) { | 457 for (int i = 0; i < light_num; i++) { |
462 | 458 |
463 *vlight_vector = spu_sub(v_world[i],vlight_xyz[i]); | 459 light_vector = spu_sub(v_world[i],light_xyz[i]); |
464 normalize(vlight_vector, vlight_vector); | 460 normalize(light_vector, light_vector); |
465 inner_product = innerProduct1(vnormal_vector,vlight_vector); | 461 inner_product = innerProduct1(normal_vector,light_vector); |
466 v_inner = spu_splats(inner_product); | 462 v_inner = spu_splats(inner_product); |
467 *vtmp_rgb = spu_madd(v_rgb,v_inner,*vtmp_rgb); | 463 vtmp_rgb = spu_madd(v_rgb,v_inner,vtmp_rgb); |
468 | 464 |
469 } | 465 } |
470 | 466 |
471 vector unsigned int v_flag __attribute__((aligned(16))); | 467 vector unsigned int v_flag __attribute__((aligned(16))); |
472 vector float max_rgb __attribute__((aligned(16))) = (vector float)spu_splats((float)255); | 468 vector float max_rgb __attribute__((aligned(16))) = (vector float)spu_splats((float)255.0f); |
473 | 469 |
474 v_flag = spu_cmpgt(max_rgb,*vtmp_rgb); | 470 v_flag = spu_cmpgt(max_rgb,vtmp_rgb); |
475 *vtmp_rgb = spu_sel(max_rgb,*vtmp_rgb,v_flag); | 471 vtmp_rgb = spu_sel(max_rgb,vtmp_rgb,v_flag); |
476 | 472 |
477 vector unsigned int vlast_rgb __attribute__((aligned(16))); | 473 vector unsigned int last_rgb __attribute__((aligned(16))); |
478 vlast_rgb = spu_convtu(*vtmp_rgb,0); | 474 last_rgb = spu_convtu(vtmp_rgb,0); |
479 unsigned int *last_rgb = (unsigned int*) &vlast_rgb; | |
480 | 475 |
481 | 476 |
482 //計算した rgb を light_rgb にまとめる。 | 477 //計算した rgb を light_rgb にまとめる。 |
483 #if LITTLEENDIAN | 478 #if LITTLEENDIAN |
484 light_rgb = (last_rgb[0] << 24) + (last_rgb[1] << 16) + (last_rgb[2] << 8) + (last_rgb[3]); | 479 light_rgb = (last_rgb[0] << 24) + (last_rgb[1] << 16) + (last_rgb[2] << 8) + (last_rgb[3]); |