diff TaskManager/Test/simple_render/spe/CreatePolygonPack.cpp @ 106:c9efdb17e8d2

*** empty log message ***
author gongo
date Tue, 04 Mar 2008 15:33:23 +0900
parents 3e331f7576a1
children 5c194c71eca8
line wrap: on
line diff
--- a/TaskManager/Test/simple_render/spe/CreatePolygonPack.cpp	Mon Mar 03 18:55:28 2008 +0900
+++ b/TaskManager/Test/simple_render/spe/CreatePolygonPack.cpp	Tue Mar 04 15:33:23 2008 +0900
@@ -1,108 +1,152 @@
 #include <stdio.h>
-#include <stdlib.h>
-#include <malloc.h>
+#include <spu_intrinsics.h>
 #include "CreatePolygonPack.h"
 #include "polygon_pack.h"
 #include "scene_graph_pack.h"
+#include "sys.h"
 
 
 void
 CreatePolygonPack::read(void)
 {
-     SchedTask::read();
+    printf("CreatePolgonPack\n");
+
+    SchedTask::read();
+}
+
+//create_pp(SceneGraphPack *sgp, PolygonPack *pp)
+//create_pp(void *read, void *write)
+//CreatePolygonPack::run(SceneGraphPack *sgp, PolygonPack *pp)
+inline float
+CreatePolygonPack::sum_across_float4(vector float v)
+{
+    vector float c12, c2, c3, c4, c34;
+    vector float result;
+
+    c2 = spu_rlqwbyte(v, 4);
+    c3 = spu_rlqwbyte(v, 8);
+    c4 = spu_rlqwbyte(v, 12);
+    c12 = spu_add(v,  c2);
+    c34 = spu_add(c3, c4);
+
+    result = spu_add(c12, c34);
+    return (spu_extract(result, 0));
 }
 
 int
 CreatePolygonPack::run(void *rbuf, void *wbuf)
 {
-     SceneGraphPack *sgp = (SceneGraphPack*)rbuf;
-     PolygonPack *pp = (PolygonPack*)wbuf;
+    SceneGraphPack *sgp = (SceneGraphPack*)rbuf;
+    PolygonPack *pp = (PolygonPack*)wbuf;
 
-     float xyz1[4],xyz2[4],xyz3[4];
+    float xyz1[4],xyz2[4],xyz3[4];
 
-     for (int i = 0; i < sgp->info.size; i++) {
-	  SceneGraphNodePtr node = &sgp->node[i];
+    for (int i = 0; i < sgp->info.size; i++) {
+	SceneGraphNodePtr node = &sgp->node[i];
     
-	  int n,nt,pt;
-	  for(n=0,nt=0,pt=0; n<node->size*3; n+=9,nt+=6,pt++) {
-	       xyz1[0] = node->vertex[n];
-	       xyz1[1] = node->vertex[n+1];
-	       xyz1[2] = node->vertex[n+2]*-1;
-	       xyz1[3] = 1;
-	       xyz2[0] = node->vertex[n+3];
-	       xyz2[1] = node->vertex[n+3+1];
-	       xyz2[2] = node->vertex[n+3+2]*-1;
-	       xyz2[3] = 1;
-	       xyz3[0] = node->vertex[n+6];
-	       xyz3[1] = node->vertex[n+6+1];
-	       xyz3[2] = node->vertex[n+6+2]*-1;
-	       xyz3[3] = 1;
+	int n,nt,pt;
+	for(n=0,nt=0,pt=0; n<node->size*3; n+=9,nt+=6,pt++) {
+	    xyz1[0] = node->vertex[n];
+	    xyz1[1] = node->vertex[n+1];
+	    xyz1[2] = node->vertex[n+2]*-1;
+	    xyz1[3] = 1;
+	    xyz2[0] = node->vertex[n+3];
+	    xyz2[1] = node->vertex[n+3+1];
+	    xyz2[2] = node->vertex[n+3+2]*-1;
+	    xyz2[3] = 1;
+	    xyz3[0] = node->vertex[n+6];
+	    xyz3[1] = node->vertex[n+6+1];
+	    xyz3[2] = node->vertex[n+6+2]*-1;
+	    xyz3[3] = 1;
 
-	       rotate(xyz1, node->translation);
-	       rotate(xyz2, node->translation);
-	       rotate(xyz3, node->translation);
+	    rotate(xyz1, node->translation);
+	    rotate(xyz2, node->translation);
+	    rotate(xyz3, node->translation);
 
-	       pp->tri[pt].ver1.x = xyz1[0];
-	       pp->tri[pt].ver1.y = xyz1[1];
-	       pp->tri[pt].ver1.z = xyz1[2];
-	       pp->tri[pt].ver1.tex_x = node->texture[nt];
-	       pp->tri[pt].ver1.tex_y = node->texture[nt+1];
+	    pp->tri[pt].ver1.x = xyz1[0];
+	    pp->tri[pt].ver1.y = xyz1[1];
+	    pp->tri[pt].ver1.z = xyz1[2];
+	    pp->tri[pt].ver1.tex_x = node->texture[nt];
+	    pp->tri[pt].ver1.tex_y = node->texture[nt+1];
 
-	       pp->tri[pt].ver2.x = xyz2[0];
-	       pp->tri[pt].ver2.y = xyz2[1];
-	       pp->tri[pt].ver2.z = xyz2[2];
-	       pp->tri[pt].ver2.tex_x = node->texture[nt+2];
-	       pp->tri[pt].ver2.tex_y = node->texture[nt+2+1];
+	    pp->tri[pt].ver2.x = xyz2[0];
+	    pp->tri[pt].ver2.y = xyz2[1];
+	    pp->tri[pt].ver2.z = xyz2[2];
+	    pp->tri[pt].ver2.tex_x = node->texture[nt+2];
+	    pp->tri[pt].ver2.tex_y = node->texture[nt+2+1];
 
-	       pp->tri[pt].ver3.x = xyz3[0];
-	       pp->tri[pt].ver3.y = xyz3[1];
-	       pp->tri[pt].ver3.z = xyz3[2];
-	       pp->tri[pt].ver3.tex_x = node->texture[nt+4];
-	       pp->tri[pt].ver3.tex_y = node->texture[nt+4+1];
+	    pp->tri[pt].ver3.x = xyz3[0];
+	    pp->tri[pt].ver3.y = xyz3[1];
+	    pp->tri[pt].ver3.z = xyz3[2];
+	    pp->tri[pt].ver3.tex_x = node->texture[nt+4];
+	    pp->tri[pt].ver3.tex_y = node->texture[nt+4+1];
 
-	       pp->tri[pt].tex_width = node->tex_width;
-	       pp->tri[pt].tex_height = node->tex_height;
-	  }
-	  pp->info.size = pt;
-	  pp->ssl = sgp->ssl;
-     }
+	    pp->tri[pt].tex_width = node->tex_width;
+            pp->tri[pt].tex_height = node->tex_height;
+	}
+	pp->info.size = pt;
+	pp->ssl = sgp->ssl;
+    }
 
-     return sizeof(PolygonPack);
+    return sizeof(PolygonPack);
 }
 
-void
-CreatePolygonPack::write(void)
-{
-     SchedTask::write();
-
-     free(readbuf);
-     free(writebuf);
-}
 
 void
 CreatePolygonPack::rotate(float *xyz, float *matrix)
 {
-     float abc[4];
+#if 1
+    float abc[4];
 
-     abc[0] = xyz[0];
-     abc[1] = xyz[1];
-     abc[2] = xyz[2];
-     abc[3] = xyz[3];
+    abc[0] = xyz[0];
+    abc[1] = xyz[1];
+    abc[2] = xyz[2];
+    abc[3] = xyz[3];
+    
+    // SIMD 使えるよね
+    for (int i=0; i<4; i++)
+    {
+	xyz[i] = abc[0]*matrix[i] + abc[1]*matrix[i+4] + abc[2]*matrix[i+8] + abc[3]*matrix[i+12];
+    }
+#else
+    vector float *abc = (vector float *)xyz;
+    float tmp[4];
     
-     // SIMD 使えるよね
-     for (int i=0; i<4; i++)
-     {
-	  xyz[i] = abc[0]*matrix[i] + abc[1]*matrix[i+4]
-	       + abc[2]*matrix[i+8] + abc[3]*matrix[i+12];
-     }
+    vector float matrixT0 = (vector float){matrix[0], matrix[4], matrix[8], matrix[12]};
+    vector float matrixT1 = (vector float){matrix[1], matrix[5], matrix[9], matrix[13]};
+    vector float matrixT2 = (vector float){matrix[2], matrix[6], matrix[10], matrix[14]};
+    vector float matrixT3 = (vector float){matrix[3], matrix[7], matrix[11], matrix[15]};
+
+#if 1
+    vector float *v_tmp = (vector float *)tmp;
+
+    *v_tmp = spu_mul(*abc, matrixT0);
+    xyz[0] = tmp[0] + tmp[1] + tmp[2] + tmp[3];
+    *v_tmp = spu_mul(*abc, matrixT1);
+    xyz[1] = tmp[0] + tmp[1] + tmp[2] + tmp[3];
+    *v_tmp = spu_mul(*abc, matrixT2);
+    xyz[2] = tmp[0] + tmp[1] + tmp[2] + tmp[3];
+    *v_tmp = spu_mul(*abc, matrixT3);
+    xyz[3] = tmp[0] + tmp[1] + tmp[2] + tmp[3];
+#else
+    vector float v_tmp;
+
+    v_tmp = spu_mul(*abc, matrixT0);
+    xyz[0] = sum_across_float4(v_tmp);
+    v_tmp = spu_mul(*abc, matrixT1);
+    xyz[1] = sum_across_float4(v_tmp);
+    v_tmp = spu_mul(*abc, matrixT2);
+    xyz[2] = sum_across_float4(v_tmp);
+    v_tmp = spu_mul(*abc, matrixT3);
+    xyz[3] = sum_across_float4(v_tmp);
+#endif
+#endif
+
 }
 
 SchedTask*
 createTask_createPolygonPack(TaskListPtr _taskList, TaskPtr _task,
 			     void *rbuff, void *wbuff, DmaManager *dma)
 {
-     rbuff = memalign(16, sizeof(SceneGraphPack));
-     wbuff = memalign(16, sizeof(PolygonPack));
-
-     return new CreatePolygonPack(_taskList, _task, rbuff, wbuff, dma);
+    return new CreatePolygonPack(_taskList, _task, rbuff, wbuff, dma);
 }