comparison Renderer/Engine/spe/DrawSpan.cc @ 775:97a514cf6ad3 before-simple-task

simd
author yutaka@henri.cr.ie.u-ryukyu.ac.jp
date Tue, 16 Feb 2010 15:09:54 +0900
parents dc26593f8c40
children 4455e7b0caf3
comparison
equal deleted inserted replaced
774:5102e1751d6f 775:97a514cf6ad3
55 55
56 static int infinity_light_calc(int color,float normal_x, float normal_y, float normal_z, 56 static int infinity_light_calc(int color,float normal_x, float normal_y, float normal_z,
57 SchedTask *smanager,int x, int y, float z, int world_x, int world_y, float world_z); 57 SchedTask *smanager,int x, int y, float z, int world_x, int world_y, float world_z);
58 58
59 59
60 void 60
61 normalize(vector float *v0, vector float *v1) 61 static inline void
62 { 62 normalize(vector float& v0, vector float& v1)
63 float norm; 63 {
64 float ret[4] __attribute__((aligned(16))); 64 float norm;
65 vector float *vret = (vector float *) ret; 65 vector float ret __attribute__((aligned(16))) = {0,0,0,0};
66 *vret = spu_mul(*v0,*v1); 66
67 67 ret = spu_mul(v0,v1);
68 norm = (ret[0] + ret[1] + ret[2]); 68 norm = (ret[0] + ret[1] + ret[2]);
69 69
70 *vret = (vector float)spu_splats(norm); 70 ret = (vector float)spu_splats(norm);
71 *vret = spu_rsqrte(*vret); 71 ret = spu_rsqrte(ret);
72 *v0 = spu_mul(*v1,*vret); 72 v0 = spu_mul(v1,ret);
73 } 73 }
74 74
75 static float 75 static inline float
76 innerProduct1(vector float *v0, vector float *v1) 76 innerProduct1(vector float& v0, vector float& v1)
77 { 77 {
78 78
79 float ret[4] __attribute__((aligned(16))); 79 vector float ret __attribute__((aligned(16))) = {0,0,0,0};
80 float inner; 80 float inner;
81 vector float *vret = (vector float *) ret; 81 ret = spu_mul(v0,v1);
82 *vret = spu_mul(*v0,*v1); 82
83 83 inner = (ret[0] + ret[1] + ret[2]);
84 inner = (ret[0] + ret[1] + ret[2]); 84 if (inner < 0) {
85 if (inner < 0) { 85 inner = 0;
86 inner = 0; 86 }
87 } 87
88 88 return inner;
89 return inner; 89 }
90 } 90
91 91
92 92
93 93
94 /** 94 /**
95 * テクスチャは、TEXTURE_SPLIT_PIXEL^2 のブロックに分割する 95 * テクスチャは、TEXTURE_SPLIT_PIXEL^2 のブロックに分割する
431 rgb[1] = (color & 0x0000ff00) >> 8; 431 rgb[1] = (color & 0x0000ff00) >> 8;
432 rgb[0] = (color & 0x000000ff); 432 rgb[0] = (color & 0x000000ff);
433 #endif 433 #endif
434 434
435 435
436 vector float *light_xyz = (vector float*)smanager->global_get(Light);
437
436 vector float v_rgb __attribute__((aligned(16))) = {(float)rgb[0],(float)rgb[1],(float)rgb[2],0}; 438 vector float v_rgb __attribute__((aligned(16))) = {(float)rgb[0],(float)rgb[1],(float)rgb[2],0};
437 int light_rgb; 439 vector float normal_vector __attribute__((aligned(16))) = {normal_x,normal_y,normal_z,0};
438 float normal_vector[4] __attribute__((aligned(16))) = {normal_x,normal_y,normal_z,0}; 440 vector float light_vector __attribute__((aligned(16))) = {0,0,0,0};
439 vector float *vnormal_vector = (vector float *) normal_vector;
440 float light_vector[4];
441 vector float *vlight_vector = (vector float *) light_vector;
442 float inner_product;
443 float *light_xyz = (float*)smanager->global_get(Light);
444 vector float *vlight_xyz = (vector float *) light_xyz;
445 vector float v_inner __attribute__((aligned(16))); 441 vector float v_inner __attribute__((aligned(16)));
446
447 vector float v_world[4] __attribute__((aligned(16))) = {{world_x, world_y, -world_z, 0}, 442 vector float v_world[4] __attribute__((aligned(16))) = {{world_x, world_y, -world_z, 0},
448 {world_x, world_y, -world_z, 0}, 443 {world_x, world_y, -world_z, 0},
449 {world_x, world_y, -world_z, 0}, 444 {world_x, world_y, -world_z, 0},
450 {0, 0, 0, 0}}; 445 {0, 0, 0, 0}};
451 446
452 447
453 normalize(vnormal_vector, vnormal_vector); 448 int light_rgb;
454 449 float inner_product;
455 450
456 float tmp_rgb[4] __attribute__((aligned(16))) = {0,0,0,0}; 451 normalize(normal_vector, normal_vector);
457 vector float *vtmp_rgb = (vector float *) tmp_rgb; 452
453 vector float vtmp_rgb __attribute__((aligned(16))) = {0,0,0,0};
458 454
459 int light_num = 4; 455 int light_num = 4;
460 456
461 for (int i = 0; i < light_num; i++) { 457 for (int i = 0; i < light_num; i++) {
462 458
463 *vlight_vector = spu_sub(v_world[i],vlight_xyz[i]); 459 light_vector = spu_sub(v_world[i],light_xyz[i]);
464 normalize(vlight_vector, vlight_vector); 460 normalize(light_vector, light_vector);
465 inner_product = innerProduct1(vnormal_vector,vlight_vector); 461 inner_product = innerProduct1(normal_vector,light_vector);
466 v_inner = spu_splats(inner_product); 462 v_inner = spu_splats(inner_product);
467 *vtmp_rgb = spu_madd(v_rgb,v_inner,*vtmp_rgb); 463 vtmp_rgb = spu_madd(v_rgb,v_inner,vtmp_rgb);
468 464
469 } 465 }
470 466
471 vector unsigned int v_flag __attribute__((aligned(16))); 467 vector unsigned int v_flag __attribute__((aligned(16)));
472 vector float max_rgb __attribute__((aligned(16))) = (vector float)spu_splats((float)255); 468 vector float max_rgb __attribute__((aligned(16))) = (vector float)spu_splats((float)255.0f);
473 469
474 v_flag = spu_cmpgt(max_rgb,*vtmp_rgb); 470 v_flag = spu_cmpgt(max_rgb,vtmp_rgb);
475 *vtmp_rgb = spu_sel(max_rgb,*vtmp_rgb,v_flag); 471 vtmp_rgb = spu_sel(max_rgb,vtmp_rgb,v_flag);
476 472
477 vector unsigned int vlast_rgb __attribute__((aligned(16))); 473 vector unsigned int last_rgb __attribute__((aligned(16)));
478 vlast_rgb = spu_convtu(*vtmp_rgb,0); 474 last_rgb = spu_convtu(vtmp_rgb,0);
479 unsigned int *last_rgb = (unsigned int*) &vlast_rgb;
480 475
481 476
482 //計算した rgb を light_rgb にまとめる。 477 //計算した rgb を light_rgb にまとめる。
483 #if LITTLEENDIAN 478 #if LITTLEENDIAN
484 light_rgb = (last_rgb[0] << 24) + (last_rgb[1] << 16) + (last_rgb[2] << 8) + (last_rgb[3]); 479 light_rgb = (last_rgb[0] << 24) + (last_rgb[1] << 16) + (last_rgb[2] << 8) + (last_rgb[3]);