Mercurial > hg > Papers > 2012 > aplas

--- a/paper/rectype.ind	Fri Jun 15 03:55:36 2012 +0900
+++ b/paper/rectype.ind	Fri Jun 15 05:08:00 2012 +0900
@@ -288,14 +288,179 @@

 \section{Comparision}

+Here is our bench mark program.
+{\small
+    f0(int i) {
+        int k,j;
+        k = 3+i;
+        j = g0(i+3);
+        return k+4+j;
+    }
+
+    g0(int i) {
+        return h0(i+4)+i;
+    }
+
+    h0(int i) {
+        return i+4;
+    }
+}
+
+It is written in C, we perform CPS transformation in several
+steps by hands. There are several optimization is possible.
+
+{\small
+    /* straight conversion case (1) */
+
+    typedef char *stack;
+
+    struct cont_interface {
+      // General Return Continuation
+        __code (*ret)();
+    };
+
+    __code f(int i,stack sp) {
+        int k,j;
+        k = 3+i;
+        goto f_g0(i,k,sp);
+    }
+
+    struct f_g0_interface {
+       // Specialized Return Continuation
+        __code (*ret)();
+        int i_,k_,j_;
+    };
+
+    __code f_g1(int j,stack sp);
+
+    __code f_g0(int i,int k,stack sp) { // Caller
+        struct f_g0_interface *c =
+            (struct f_g0_interface *)(
+         sp -= sizeof(struct f_g0_interface));
+
+        c->ret = f_g1;
+        c->k_ = k;
+        c->i_ = i;
+
+        goto g(i+3,sp);
+    }
+
+    __code f_g1(int j,stack sp) {  // Continuation
+        struct f_g0_interface *c =
+          (struct f_g0_interface *)sp;
+        int k = c->k_;
+        sp+=sizeof(struct f_g0_interface);
+        c = (struct f_g0_interface *)sp;
+        goto (c->ret)(k+4+j,sp);
+    }
+
+    __code g_h1(int j,stack sp);
+
+    __code g(int i,stack sp) { // Caller
+        struct f_g0_interface *c =
+            (struct f_g0_interface *)(
+           sp -= sizeof(struct f_g0_interface));
+
+        c->ret = g_h1;
+        c->i_ = i;
+
+        goto h(i+3,sp);
+    }
+
+    __code g_h1(int j,stack sp) {
+        // Continuation
+        struct f_g0_interface *c =
+          (struct f_g0_interface *)sp;
+        int i = c->i_;
+        sp+=sizeof(struct f_g0_interface);
+        c = (struct f_g0_interface *)sp;
+        goto (c->ret)(j+i,sp);
+    }
+
+    __code h(int i,stack sp) {
+        struct f_g0_interface *c =
+          (struct f_g0_interface *)sp;
+        goto (c->ret)(i+4,sp);
+    }
+
+    struct main_continuation {
+        // General Return Continuation
+        __code (*ret)();
+        __code (*main_ret)();
+        void *env;
+    };
+
+    __code main_return(int i,stack sp) {
+        if (loop-->0)
+            goto f(233,sp);
+        printf("#0103:%d\n",i);
+        goto (( (struct main_continuation *)sp)->main_ret)(0),
+               ((struct main_continuation *)sp)->env;
+    }
+}
+
+This is awfully long, but it is straight forward. Several forward
+prototyping is necessary, and we find strict prototyping is
+painful in CbC, because we have to use many code segments to
+perform simple thing.  CbC is not a language for human, but for
+automatic generation, verification or IDE directed programming.
+
+We can shorten the result in this way.
+{\small
+    /* little optimized case (3) */
+
+    __code f2_1(int i,char *sp) {
+        int k,j;
+        k = 3+i;
+        goto g2_1(k,i+3,sp);
+    }
+
+    __code g2_1(int k,int i,char *sp) {
+        goto h2_11(k,i+4,sp);
+    }
+
+    __code f2_0_1(int k,int j,char *sp);
+    __code h2_1_1(int i,int k,int j,char *sp) {
+        goto f2_0_1(k,i+j,sp);
+    }
+
+    __code h2_11(int i,int k,char *sp) {
+        goto h2_1_1(i,k,i+4,sp);
+    }
+
+    __code f2_0_1(int k,int j,char *sp) {
+        goto (( (struct cont_interface *)
+           sp)->ret)(k+4+j,sp);
+    }
+
+    __code main_return2_1(int i,stack sp) {
+        if (loop-->0)
+            goto f2_1(233,sp);
+        printf("#0165:%d\n",i);
+        goto (( (struct main_continuation *)sp)->main_ret)(0),
+               ((struct main_continuation *)sp)->env;
+    }
+}
+
+In this example, CPS transformed source is faster than
+original function call form. There are not so much area for the
+optimization in function call form, because function call API
+have to be strict.
+CbC does not need standard call API other than interface which
+is simply a struct and there are no need for register save. (This
+bench mark is designed to require the register save).
+
+Here is the result in IA32 architecture (Table.\ref{tab:mc,gcc,compare}).
+Micro-C is our previous implementation in tiny C. \verb+conv1 1+
+is function call. \verb+conv1 2+, \verb+conv1 3+ is optimized CPS transformed
+source.
+

 \begin{table}[htpb]
 \centering
 \small
 \begin{tabular}{|l|r|r|r|} \hline
 (unit: s) & ./conv1 1 & ./conv1 2 &  ./conv1 3 \\ \hline
-Micro-C(32bit)       & 9.93 & 6.31 & 7.18 \\ \hline
-Micro-C(64bit)       & 5.03 & 5.12 & 5.00 \\ \hline \hline
 GCC -O3(32bit)       & 2.52 & 2.34 & 1.53 \\ \hline
 GCC -O3(64bit)       & 1.80 & 1.20 & 1.44 \\ \hline
 \end{tabular}
@@ -305,3 +470,6 @@


+
+--Conclusion
+