Mercurial > hg > Papers > 2012 > aplas
changeset 19:dc62dc1fe059
modify compression
author | Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 15 Jun 2012 05:08:00 +0900 |
parents | 33b7a54edaa9 |
children | 203699cf5384 |
files | paper/rectype.ind |
diffstat | 1 files changed, 170 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/paper/rectype.ind Fri Jun 15 03:55:36 2012 +0900 +++ b/paper/rectype.ind Fri Jun 15 05:08:00 2012 +0900 @@ -288,14 +288,179 @@ \section{Comparision} +Here is our bench mark program. +{\small + f0(int i) { + int k,j; + k = 3+i; + j = g0(i+3); + return k+4+j; + } + + g0(int i) { + return h0(i+4)+i; + } + + h0(int i) { + return i+4; + } +} + +It is written in C, we perform CPS transformation in several +steps by hands. There are several optimization is possible. + +{\small + /* straight conversion case (1) */ + + typedef char *stack; + + struct cont_interface { + // General Return Continuation + __code (*ret)(); + }; + + __code f(int i,stack sp) { + int k,j; + k = 3+i; + goto f_g0(i,k,sp); + } + + struct f_g0_interface { + // Specialized Return Continuation + __code (*ret)(); + int i_,k_,j_; + }; + + __code f_g1(int j,stack sp); + + __code f_g0(int i,int k,stack sp) { // Caller + struct f_g0_interface *c = + (struct f_g0_interface *)( + sp -= sizeof(struct f_g0_interface)); + + c->ret = f_g1; + c->k_ = k; + c->i_ = i; + + goto g(i+3,sp); + } + + __code f_g1(int j,stack sp) { // Continuation + struct f_g0_interface *c = + (struct f_g0_interface *)sp; + int k = c->k_; + sp+=sizeof(struct f_g0_interface); + c = (struct f_g0_interface *)sp; + goto (c->ret)(k+4+j,sp); + } + + __code g_h1(int j,stack sp); + + __code g(int i,stack sp) { // Caller + struct f_g0_interface *c = + (struct f_g0_interface *)( + sp -= sizeof(struct f_g0_interface)); + + c->ret = g_h1; + c->i_ = i; + + goto h(i+3,sp); + } + + __code g_h1(int j,stack sp) { + // Continuation + struct f_g0_interface *c = + (struct f_g0_interface *)sp; + int i = c->i_; + sp+=sizeof(struct f_g0_interface); + c = (struct f_g0_interface *)sp; + goto (c->ret)(j+i,sp); + } + + __code h(int i,stack sp) { + struct f_g0_interface *c = + (struct f_g0_interface *)sp; + goto (c->ret)(i+4,sp); + } + + struct main_continuation { + // General Return Continuation + __code (*ret)(); + __code (*main_ret)(); + void *env; + }; + + __code main_return(int i,stack sp) { + if (loop-->0) + goto f(233,sp); + printf("#0103:%d\n",i); + goto (( (struct main_continuation *)sp)->main_ret)(0), + ((struct main_continuation *)sp)->env; + } +} + +This is awfully long, but it is straight forward. Several forward +prototyping is necessary, and we find strict prototyping is +painful in CbC, because we have to use many code segments to +perform simple thing. CbC is not a language for human, but for +automatic generation, verification or IDE directed programming. + +We can shorten the result in this way. +{\small + /* little optimized case (3) */ + + __code f2_1(int i,char *sp) { + int k,j; + k = 3+i; + goto g2_1(k,i+3,sp); + } + + __code g2_1(int k,int i,char *sp) { + goto h2_11(k,i+4,sp); + } + + __code f2_0_1(int k,int j,char *sp); + __code h2_1_1(int i,int k,int j,char *sp) { + goto f2_0_1(k,i+j,sp); + } + + __code h2_11(int i,int k,char *sp) { + goto h2_1_1(i,k,i+4,sp); + } + + __code f2_0_1(int k,int j,char *sp) { + goto (( (struct cont_interface *) + sp)->ret)(k+4+j,sp); + } + + __code main_return2_1(int i,stack sp) { + if (loop-->0) + goto f2_1(233,sp); + printf("#0165:%d\n",i); + goto (( (struct main_continuation *)sp)->main_ret)(0), + ((struct main_continuation *)sp)->env; + } +} + +In this example, CPS transformed source is faster than +original function call form. There are not so much area for the +optimization in function call form, because function call API +have to be strict. +CbC does not need standard call API other than interface which +is simply a struct and there are no need for register save. (This +bench mark is designed to require the register save). + +Here is the result in IA32 architecture (Table.\ref{tab:mc,gcc,compare}). +Micro-C is our previous implementation in tiny C. \verb+conv1 1+ +is function call. \verb+conv1 2+, \verb+conv1 3+ is optimized CPS transformed +source. + \begin{table}[htpb] \centering \small \begin{tabular}{|l|r|r|r|} \hline (unit: s) & ./conv1 1 & ./conv1 2 & ./conv1 3 \\ \hline -Micro-C(32bit) & 9.93 & 6.31 & 7.18 \\ \hline -Micro-C(64bit) & 5.03 & 5.12 & 5.00 \\ \hline \hline GCC -O3(32bit) & 2.52 & 2.34 & 1.53 \\ \hline GCC -O3(64bit) & 1.80 & 1.20 & 1.44 \\ \hline \end{tabular} @@ -305,3 +470,6 @@ + +--Conclusion +