changeset 668:adbb9c25eb1a

non push version of function call (half done)
author kono
date Tue, 01 May 2007 16:46:21 +0900
parents dbfbeb05210c
children 1530b1a636ac
files Makefile mc-code-ia32.c
diffstat 2 files changed, 371 insertions(+), 180 deletions(-) [+]
line wrap: on
line diff
--- a/Makefile	Tue May 01 13:26:42 2007 +0900
+++ b/Makefile	Tue May 01 16:46:21 2007 +0900
@@ -1,12 +1,12 @@
 # CC = gcc -std=c99
 CC = cc 
 # -O3
-CFLAGS = -g -Wall -I. -DUSE_CODE_KEYWORD
+CFLAGS = -g -O -Wall -I. -DUSE_CODE_KEYWORD
 # CFLAGS = -g -Wall -I. -pg -fprofile-arcs -ftest-coverage
 # LDFLAGS = -pg
 # for Linux Zaurus
 # CFLAGS = -fsigned-char -pipe -g -I. -I/home/zaurus/develop/include
-CFLAGS1 = -g  -I.
+CFLAGS1 = -g -O -I.
 BASE=0
 STAGE=1
 MFLAGS=$(MFALGS) BASE=$(BASE) STAGE=$(STAGE)
--- a/mc-code-ia32.c	Tue May 01 13:26:42 2007 +0900
+++ b/mc-code-ia32.c	Tue May 01 16:46:21 2007 +0900
@@ -329,17 +329,6 @@
 static int goffset_label;
 #endif
 
-static int
-lvar(int l)
-{
-    if (is_code(fnptr)) {
-	return l+code_disp_offset;
-    } else if (l<0) {
-	return l+disp_offset;
-    } else {
-	return l+arg_offset;
-    }
-}
 
 /*
     creg   current register
@@ -391,6 +380,7 @@
 
 //  defalut current register
 #define CREG_REGISTER   REG_ECX
+#define FREG_FREGISTER  0
 
 static char *reg_name[8+1]; 
 static char *reg_name_l[4+1];
@@ -406,7 +396,7 @@
 static int get_data_register(void);
 
 static void local_table(void);
-static int push_struct(int e4,int t) ;
+static int push_struct(int e4,int t, int arg) ;
 static void code_clear_stack_reg(int reg1);
 #if FLOAT_CODE
 static char * fload(int d);
@@ -420,6 +410,84 @@
 static int code_l2(long long d);
 #endif
 
+#define round16(i)   ((i+0xf)&~0xf)
+#define round4(i)   ((i+0x3)&~0x3)
+
+
+#define arg_offset  24
+#define arg_offset1  24
+#define ARG_LVAR_OFFSET 0x10000000
+
+#define code_disp_offset0 (0)
+
+#define CODE_LVAR(l) ((l)+code_disp_offset0)
+#define CODE_CALLER_ARG(l) ((l)+arg_offset1)
+#define FUNC_LVAR(l) ((l)+disp_offset)
+#define CALLER_ARG(l) ((l)+arg_offset1)
+#define CALLEE_ARG(l) ((l)+arg_offset)
+static int r1_offset_label;
+static const char lpfx[] = "_";
+static int lvar_offset_label;
+static int max_func_args,max_func_arg_label;
+void
+code_offset_set()
+{
+#if 0
+    int l;
+#endif
+    int lvar_offsetv = 
+        round16(-disp+max_func_args*SIZE_OF_INT+func_disp_offset);
+    int r1_offsetv = round16(lvar_offsetv);
+    printf(".set %s%d,%d\n",lpfx,lvar_offset_label,r1_offsetv-lvar_offsetv);
+    if (r1_offsetv-lvar_offsetv > 65000) error(-1);
+        // too large function arguments?
+    printf(".set %s%d,%d\n",lpfx,r1_offset_label,r1_offsetv);
+    if (max_func_arg_label) {
+        printf(".set %s%d,%d\n",lpfx,max_func_arg_label,
+            round16(max_func_args*SIZE_OF_INT)+24);
+        max_func_arg_label = 0;
+    }
+
+#if 0
+printf("## reg_save %d\n",reg_save);
+printf("## function %s\n",fnptr->nm);
+    l = ARG_LVAR_OFFSET;
+printf("## offset call0\t%d\n",CALLER_ARG);
+    l = ARG_LVAR_OFFSET+max_func_args*SIZE_OF_INT;
+printf("## offset calln\t%d %d\n",CALLER_ARG,max_func_args*SIZE_OF_INT);
+    l = disp;
+printf("## offset lvarn\t%d %d\n",FUNC_LVAR+lvar_offsetv,disp);
+    l = 0;
+printf("## offset lvar0\t%d\n",FUNC_LVAR+lvar_offsetv);
+    l = -reg_save;
+printf("## offset regs\t%d\n",FUNC_LVAR+lvar_offsetv);
+printf("## offset r1off\t%d\n",r1_offsetv);
+    l = 0;
+printf("## offset carg0\t%d\n",CALLEE_ARG+r1_offsetv);
+    l = my_func_args;
+printf("## offset cargn\t%d %d\n",CALLEE_ARG+r1_offsetv,my_func_args);
+#endif
+}
+
+
+static void
+lvar(int l)
+{
+    if (is_code(fnptr)) {
+        if (l>=ARG_LVAR_OFFSET) {  /* caller's arguments */
+            printf("%d(%%esp)",CODE_CALLER_ARG(l-ARG_LVAR_OFFSET));
+        } else
+            printf("%d(%%ebp)",CODE_LVAR(l));
+    } else if (l<0) {  /* local variable */
+        printf("%d+_%d(%%ebp)",FUNC_LVAR(l),lvar_offset_label);
+    } else if (l>=ARG_LVAR_OFFSET) {  /* caller's arguments */
+        printf("%d(%%esp)",CALLER_ARG(l-ARG_LVAR_OFFSET));
+    } else { /* callee's arguments */
+        printf("%d(%%ebp)",CALLEE_ARG(l));
+    }
+}
+
+
 #define use_int(reg)   if (reg==-1) reg=use_int0()
 
 static int
@@ -494,6 +562,11 @@
 }
 
 static void
+set_freg(int reg,int mode)
+{
+}
+
+static void
 set_ireg(int reg,int mode)
 {
     if (!is_int_reg(reg)) error(-1);
@@ -568,7 +641,7 @@
     size_of_longlong = SIZE_OF_LONGLONG;
     endian = ENDIAN;
 
-    arg_offset = 8;
+
     // MAX_REGISTER=6;
     MAX_DATA_REG=4;    
     MAX_POINTER=3;    
@@ -1088,7 +1161,8 @@
 void
 code_lvar(int e2,int creg) {
     use_int(creg);
-    printf("\tlea %d(%%ebp),%s\n",lvar(e2),register_name(creg,0));
+    printf("\tlea "); lvar(e2);
+    printf(",%s\n",register_name(creg,0));
 }
 
 
@@ -1103,7 +1177,8 @@
 void
 code_rlvar(int e2,int reg) {
     use_int(reg);
-    printf("\tmovl %d(%%ebp),%s\n",lvar(e2),register_name(reg,0));
+    printf("\tmovl "); lvar(e2);
+    printf(",%s\n",register_name(reg,0));
 }
 
 extern void
@@ -1141,7 +1216,8 @@
 void
 code_crlvar(int e2,int reg,int sign,int sz) {
     use_int(reg);
-    printf("\t%s %d(%%ebp),%s\n",cload(sign,sz),lvar(e2),register_name(reg,0));
+    printf("\t%s ",cload(sign,sz)); lvar(e2);
+    printf(",%s\n",register_name(reg,0));
 
 }
 
@@ -1347,10 +1423,11 @@
 void
 code_cmp_crlvar(int e1,int reg,int sz,int label,int cond) {
     use_int(reg);
-    if (sz==1)
-	printf("\tcmpb $0,%d(%%ebp)\n",lvar(e1));
-    else if (sz==SIZE_OF_SHORT)
-	printf("\tcmpw $0,%d(%%ebp)\n",lvar(e1));
+    if (sz==1) {
+	printf("\tcmpb $0,"); lvar(e1); printf("\n");
+    } else if (sz==SIZE_OF_SHORT) {
+	printf("\tcmpw $0,"); lvar(e1); printf("\n"); 
+    }
     jcond(label,cond);
 }
 
@@ -1377,7 +1454,7 @@
 void
 code_cmp_rlvar(int e1,int reg,int label,int cond) {
     use_int(reg);
-    printf("\tcmpl $0,%d(%%ebp)\n",lvar(e1));
+    printf("\tcmpl $0,"); lvar(e1); printf("\n");
     jcond(label,cond);
 }
 
@@ -1526,190 +1603,297 @@
 }
 
 static int
-push_struct(int e4,int t) 
+push_struct(int e4,int t, int arg) 
 {
-    int length,count;
+    int length,dreg;
     g_expr(e4);
     length=size(t); 
     if(length%SIZE_OF_INT) {
 	length += SIZE_OF_INT - (length%SIZE_OF_INT);
     }
+    emit_push();
+    code_lvar(cadr(arg),USE_CREG);
+    dreg = emit_pop(0);
+
+    // copy dreg to creg with length
     // try small amount copy
-    for(count=0;length<MAX_COPY_LEN;count++,length-=SIZE_OF_INT) {
-	if (length==0) return count;
-	else {
-	    printf("\tpushl %d(%s)\n",
-		length-SIZE_OF_INT,register_name(creg,0));
-	    stack_depth += SIZE_OF_INT;
-	}
+    /* downward direction copy */
+    emit_copy(dreg,creg,length,0,0,1);
+    /* we have value in creg, it may be changed */
+    return length/SIZE_OF_INT;
+}
+
+static int
+simple_arg(int e3)
+{
+    return !contains_p(e3,not_simple_p);
+}
+
+#define caller_arg_offset_v(arg) (ARG_LVAR_OFFSET+(arg)*SIZE_OF_INT)
+
+static int
+compute_complex_arg(int e3,int reg_arg_list,int arg) {
+    int t=caddr(e3);
+    int e4 = car(e3);
+    reg_arg_list = list2(arg,reg_arg_list);
+    g_expr_u(assign_expr0(arg,e4,t,t));
+
+
+
+    car(e3) = arg;
+    return reg_arg_list;
+}
+
+
+static void
+increment_function_arg(int e3,int *pnargs,int *preg_arg,int *pfreg_arg) {
+    int nargs=0,reg_arg=0,freg_arg=0;
+    int t=caddr(e3);
+    if(scalar(t)) {
+        nargs ++ ; reg_arg++; freg_arg++;
+    } else if (t==LONGLONG||t==ULONGLONG||t==DOUBLE) {
+        nargs ++ ; reg_arg++;
+        nargs ++ ; reg_arg++;
+    } else if (t==FLOAT) {
+        reg_arg ++ ; freg_arg++;
+        nargs += size(t)/SIZE_OF_INT;
+    } else if (t>=0&&(car(t)==STRUCT||car(t)==UNION)) {
+        nargs += round4(size(t))/SIZE_OF_INT;
+    } else {
+        error(TYERR);
+        nargs ++ ;
     }
-    // alignment may remain
-    printf("\tsubl $%d,%%esp\n",length);
-    /* downward direction copy */
-    emit_copy(creg,REG_ESP,length,0,0,1);
-    /* we have value in creg, it may be changed */
-    stack_depth += length;
-    return length/SIZE_OF_INT;
+    *pnargs += nargs;
+    *preg_arg += reg_arg;
+    *pfreg_arg += freg_arg;
+}
+
+#define AS_SAVE 1
+#define AS_ARG  0
+
+static int
+get_input_arg(int t,int mode,int nargs,int reg_arg,int freg_arg) 
+{
+    if(scalar(t)) {
+        if (mode==AS_SAVE) {
+            return get_register_var(0);
+        } else 
+            return list3(LVAR,caller_arg_offset_v(nargs),0);
+    } else if (t==LONGLONG||t==ULONGLONG) {
+        if (mode==AS_SAVE) {
+            return get_lregister_var(0);
+        } else 
+            return list3(LVAR,caller_arg_offset_v(nargs),0);
+    } else if (t==FLOAT) {
+        if (mode==AS_SAVE) {
+            return get_dregister_var(0,0);
+        } else
+            return list3(LVAR,caller_arg_offset_v(nargs),0);
+    } else if (t==DOUBLE) {
+        if (mode==AS_SAVE) {
+            return get_dregister_var(0,1);
+        } else
+            return list3(LVAR,caller_arg_offset_v(nargs),0);
+    } else if (t>=0&&(car(t)==STRUCT||car(t)==UNION)) {
+        if (mode==AS_SAVE) {
+            return get_register_var(0);
+        } else
+            return list3(LVAR,caller_arg_offset_v(nargs),0);
+    } else {
+        error(-1);
+        return get_register_var(0);
+    }
+}
+
+static void
+code_call(int e2,NMTBL *fn,int jmp)
+{
+    if (car(e2) == FNAME) {     
+#ifdef __APPLE__
+        printf("\tcall\tL_%s$stub\n",fn->nm);
+#else
+        printf("\tcall\t%s\n",fn->nm);
+#endif
+    } else {
+        printf("\tcall\t*%s\n",register_name(REG_EAX,0));
+    }
 }
 
 int
 function(int e1)
 {
-    int e2,e3,e4,nargs,t,ret_type;
-    NMTBL *n=0;
-    int stack_depth_save = stack_depth;
-
-    ret_type = cadr(cadddr(e1));
-    if (ret_type==CHAR) ret_type=INT;
-
-
+
+    int e2,e3,e4,e5,nargs,t;
+    int arg,reg_arg,freg_arg,arg_assign;
+    int dots;
+    int reg_arg_list=0,ret_type,special_lvar;
+    NMTBL *fn = 0;
+    int jmp = 0;
+    int complex_;
+    int pnargs,preg_arg,pfreg_arg;
+    int stargs;
+    int half_register = 0;
+
+    special_lvar = -1;
+    ret_type = function_type(cadddr(e1),&dots);
+    if (caddr(cadddr(e1))==0) dots=1;
+
+    arg_assign = 0;
+    e2 = cadr(e1);
+    if (car(e2) == FNAME) {     
+        fn=(NMTBL *)cadr(e2);
+    } else {    
+        if (car(e2)==INDIRECT) e2=cadr(e2); // (*func)(i) case
+        jmp = get_register_var(0);
+
+        if (!simple_arg(e2)) {
+            e3=get_register_var(0);
+            reg_arg_list = list2(e3,reg_arg_list);
+            g_expr_u(assign_expr0(e3,e2,INT,INT));
+            e2=e3;
+        }
+        reg_arg_list = list2(jmp,reg_arg_list);
+        arg_assign = list2(assign_expr0(jmp,e2,INT,INT),arg_assign);
+    }
+    /* First we execute complex argument to avoid interaction with
+       input variables. Remain the last complex argument in complex_. */
+    stargs = 0;
+    complex_ = 0;
+    nargs = reg_arg = freg_arg = 0;
+    pnargs = preg_arg = pfreg_arg = 0;
+    for (e3 = e1 = reverse0(caddr(e1)); e3; e3 = cadr(e3)) {    
+        t=caddr(e3);
+        if (reg_arg==3 && (t==DOUBLE||t==LONGLONG||t==ULONGLONG)) {
+            half_register=1;
+        }
+        if ((e5= !simple_arg(car(e3)))) {
+            if (complex_) {
+                arg = get_input_arg(caddr(complex_),AS_SAVE,
+                                        pnargs,preg_arg,pfreg_arg);
+                reg_arg_list = compute_complex_arg(complex_,reg_arg_list,arg);
+            }
+            // memorise last complex arg parameter
+            pnargs=nargs;preg_arg=reg_arg;pfreg_arg=freg_arg;
+            complex_ = e3;
+        }
+        if (t>=0&&(car(t)==STRUCT||car(t)==UNION)) {
+            // The struct should be pushed after complex arguments.
+            if (e5) { // compute address only, complex_ is me now. Clear it.
+                complex_ = 0;
+                e4 = car(e3);
+                if (car(e4)!=RSTRUCT) error(-1);
+                if (!simple_arg(cadr(e4))) {
+                    // Calculate complex struct address here.
+                    // If simple, leave it.
+                    arg = get_register_var(0);
+                    g_expr_u(assign_expr0(arg,list2(ADDRESS,car(e3)),INT,INT));
+                    car(e3)=arg;
+                    reg_arg_list = list2(arg,reg_arg_list);
+
+                    car(e3) = rvalue_t(arg,INT);
+                }
+            }
+            stargs = list4(e3,stargs,nargs,reg_arg);
+        }
+        increment_function_arg(e3,&nargs,&reg_arg,&freg_arg);
+    }
+
+    /* now all input register vars are free */
     code_save_stacks();
 #if FLOAT_CODE
     code_save_fstacks();
 #endif
 
-    e2 = cadr(e1);
-#ifdef __APPLE__
-    //   esp must align 16byte for mmx operation
-    nargs = 0;
-    int length;
-    for (e3 = caddr(e1); e3; e3 = cadr(e3)) {	
-	t=caddr(e3);
-	if(scalar(t)) {
-	} else if (t==LONGLONG||t==ULONGLONG) {
-	    ++nargs;
-	} else if (t==DOUBLE) {
-	    nargs += SIZE_OF_DOUBLE/SIZE_OF_INT;
-	    continue;
-	} else if (t==FLOAT) {
-	    nargs += SIZE_OF_FLOAT/SIZE_OF_INT;
-	    continue;
-	} else if (car(t)==STRUCT||car(t)==UNION) {
-	    // struct must align 16 (but how?)
-	    length = size(t);
-	    if (length%SIZE_OF_INT)
-		length += SIZE_OF_INT - (length%SIZE_OF_INT);
-	    nargs += length/SIZE_OF_INT;
-	    continue;
-	} else {
-	    error(TYERR);
-	}
-	++nargs;
-    }
-    length = 16-((nargs*4+stack_depth)%16);
-    if (length) {
-	stack_depth += length;
-	printf("\tleal %d(%%esp),%%esp\n",-length);
+    // set_lreg(LREG_LREGISTER,0);
+    set_freg(FREG_FREGISTER,0);
+    set_ireg(CREG_REGISTER,0);
+
+    //  Struct arguments need emit_copy. it destructs 3 input registers.
+    //  But it returns no value on a register. So calculate it here.
+    //  We cannot do this in the previous loop, because the copied struct may be
+    //  override by other complex arguments. But before this we have to check
+    //  complex_.
+
+    if (stargs) {
+        if (complex_) {
+            arg = get_input_arg(caddr(complex_),AS_SAVE,
+                                    pnargs,preg_arg,pfreg_arg);
+            reg_arg_list = compute_complex_arg(complex_,reg_arg_list,arg);
+        }
+        for(stargs=reverse0(stargs);stargs;stargs = cadr(stargs)) {
+            e3 = car(stargs);
+            e4 = car(e3);
+            t  = caddr(e3);
+            arg = get_input_arg(t,AS_ARG,caddr(stargs),cadddr(stargs),0);
+            push_struct(e4,t,arg);
+            car(e3)=0;  // done
+
+
+        }
+    } else {
+        //  last complex argument can use input register
+        if (complex_) {
+            arg = get_input_arg(caddr(complex_),AS_ARG,pnargs,preg_arg,pfreg_arg)
+;
+            reg_arg_list = compute_complex_arg(complex_,reg_arg_list,arg);
+
+            car(complex_) = 0; // done.
+
+
+        }
     }
-#endif
-    nargs = 0;
-    for (e3 = caddr(e1); e3; e3 = cadr(e3)) {	
-	t=caddr(e3);
-	e4 = car(e3);
-	if(scalar(t)) {
-	    if (car(e4)==REGISTER) {
-		printf("\tpushl %s\n",register_name(cadr(e4),0));
-	    } else if (car(e4)==CONST) {
-		use_int0();
-		printf("\tpushl $%d\n",cadr(e4));
-	    } else {
-		g_expr(e4);
-		printf("\tpushl %s\n",register_name(creg,0));
-	    }
-	    stack_depth += SIZE_OF_INT;
-	} else if (t==LONGLONG||t==ULONGLONG) {
-	    if (car(e4)==LREGISTER) {
-		printf("\tpushl %s\n\tpushl %s\n",
-			l_edx(cadr(e4)),l_eax(cadr(e4)));
-	    } else if (car(e4)==LCONST) {
-		use_longlong0(USE_CREG);
-		printf("\tpushl $%d\n\tpushl $%d\n",
-			code_l2(lcadr(e4)),code_l1(lcadr(e4)));
-	    } else {
-		g_expr(e4);
-		printf("\tpushl %%edx\n\tpushl %%eax\n");
-	    }
-	    ++nargs;
-	    stack_depth += SIZE_OF_INT*2;
-	} else if (t==DOUBLE) {
-	    g_expr(e4);
-	    printf("\tleal\t-8(%%esp),%%esp\n\tfstpl\t(%%esp)\n");
-	    nargs += SIZE_OF_DOUBLE/SIZE_OF_INT;
-	    stack_depth += SIZE_OF_DOUBLE;
-	    continue;
-	} else if (t==FLOAT) {
-	    g_expr(e4);
-	    printf("\tleal\t-4(%%esp),%%esp\n\tfstps\t(%%esp)\n");
-	    nargs += SIZE_OF_FLOAT/SIZE_OF_INT;
-	    stack_depth += SIZE_OF_FLOAT;
-	    continue;
-	} else if (car(t)==STRUCT||car(t)==UNION) {
-	    nargs += push_struct(e4,t);
-	    continue;
-	} else {
-	    error(TYERR);
-	}
-	++nargs;
+
+    nargs = reg_arg = freg_arg = 0;
+    // calc stack arguments first, it may requires extra registers,
+    // and we can still use input registers now.
+    for (e3 = e1; e3; 
+                increment_function_arg(e3,&nargs,&reg_arg,&freg_arg),
+                e3 = cadr(e3)) {        
+        if (!(e4=car(e3))) continue;
+        t=caddr(e3);
+        arg = get_input_arg(t,AS_ARG,nargs,reg_arg,freg_arg);
+        if (car(arg)!=LVAR) continue;
+        g_expr_u(assign_expr0(arg,e4,t,t));
+        car(e3)=0;  // done
     }
-    if (car(e2) == FNAME) {	
-	n=(NMTBL *)cadr(e2);
-	// use_register(creg,REG_EAX,0);  /* will be destroyed */
-    } else {	
-	g_expr(e2);
-	use_register(creg,REG_EAX,1);  /* will be destroyed */
+    if (max_func_args<nargs) max_func_args=nargs;
+    for(;arg_assign;arg_assign=cadr(arg_assign)) {
+        g_expr_u(car(arg_assign));
     }
-#ifdef __APPLE__
     clear_ptr_cache();
-#endif
-    if (car(e2) == FNAME) {	
-#ifdef __APPLE__
-	printf("\tcall\tL_%s$stub\n",n->nm);
-#else
-	printf("\tcall\t%s\n",n->nm);
-#endif
-    } else {
-	printf("\tcall\t*%s\n",register_name(REG_EAX,0));
+    code_call(e2,fn,jmp);
+    for(;reg_arg_list;reg_arg_list=cadr(reg_arg_list)) {
+        arg = car(reg_arg_list);
+        if (car(arg)==REGISTER||car(arg)==DREGISTER||car(arg)==FREGISTER
+                ||car(arg)==LREGISTER) 
+            free_register(cadr(arg));
+        else if (car(arg)==LVAR&&cadr(arg)<0) free_lvar(cadr(arg));
     }
-    if (stack_depth!=stack_depth_save)
-	printf("\taddl $%d,%%esp\n",stack_depth-stack_depth_save);
     if (ret_type==DOUBLE||ret_type==FLOAT) {
     } else if (ret_type==LONGLONG||ret_type==ULONGLONG) {
-	use_longlong0(USE_CREG);
+        use_longlong0(USE_CREG);
     } else if (ret_type==VOID) {
     } else {
-	if (use)
-	    set_ireg(RET_REGISTER,0);
-	else
-	    set_ireg(CREG_REGISTER,0);
+        if (use)
+            set_ireg(RET_REGISTER,0);
+        else
+            set_ireg(CREG_REGISTER,0);
     }
-    stack_depth = stack_depth_save;
     return ret_type;
 }
 
 void
 code_alloca(int e1,int reg)
 {
-    char *crn,*drn;
-    int edx;
+    char *crn;
   
     g_expr(list3(BAND,list3(ADD,e1,list2(CONST,15)),list2(CONST,~15))); 
     use_int(reg);
-    if (stack_depth>0) {
-	edx = get_register();
-	crn = register_name(reg,0);
-	drn = register_name(edx,0);
-	printf("\tmovl %%esp,%s\n",drn);
-	printf("\tsubl %s,%%esp\n",crn);
-	printf("\tmovl %%esp,%s\n",crn);
-	emit_copy(edx,creg,stack_depth,0,1,1);
-	free_register(edx);  // should be free before emit copy but...
-	printf("\taddl $%d,%s\n",stack_depth,register_name(creg,0));
-
-    } else {
-	crn = register_name(reg,0);
-	printf("\tsubl %s,%%esp\n",crn);
-	printf("\tmovl %%esp,%s\n",crn);
-    }
+    crn = register_name(reg,0);
+    printf("\tsubl\t%s, %%esp\n",crn);
+    if (!max_func_arg_label) max_func_arg_label = fwdlabel();
+    printf("\tmovl $%s%d,%s\n",lpfx,max_func_arg_label ,crn);
+    printf("\tadd\t%%esp, %s\n",crn);
 }
 
 void
@@ -1824,7 +2008,8 @@
 code_assign_lvar(int e2,int creg,int byte) {
     if (byte) { use_data_reg(creg,1); 
     } else { use_int(creg); }
-    printf("\t%s %s,%d(%%ebp)\n",move(byte),register_name(creg,byte),lvar(e2));
+    printf("\t%s %s,",move(byte),register_name(creg,byte));
+    lvar(e2); printf("\n");
 }
 
 void
@@ -2229,6 +2414,11 @@
 	printf(".globl _%s\n",name);
     printf("_%s:\n",name);
 #endif
+
+    lvar_offset_label = fwdlabel();
+    r1_offset_label = fwdlabel();
+    max_func_args = 0;
+
     printf("\tpushl %%ebp\n");
     printf("\tmovl %%esp,%%ebp\n");
     printf("\tpushl %%ebx\n");
@@ -2300,6 +2490,7 @@
         }
     }
     fwddef(retlabel);
+    code_offset_set(fnptr);
 
     printf("\tlea %d(%%ebp),%%esp\n",disp_offset);
     printf("\tpopl %%edi\n");
@@ -2773,7 +2964,7 @@
 
 void code_dassign_lvar(int e2,int freg,int d)
 { 
-    printf("\t%s %d(%%ebp)\n",fstore(d),lvar(e2));
+    printf("\t%s ",fstore(d)); lvar(e2); printf("\n");
 }
 
 void code_dassign_dregister(int e,int d,int freg)
@@ -2929,7 +3120,7 @@
 
 void code_drlvar(int e2,int d,int freg)
 { 
-    printf("\t%s %d(%%ebp)\n",fload(d),lvar(e2));
+    printf("\t%s ",fload(d)); lvar(e2); printf("\n");
 }
 
 void code_cmp_drgvar(int e2,int reg,int d,int label,int cond)
@@ -2951,7 +3142,7 @@
 
 void code_cmp_drlvar(int e2,int reg,int d,int label,int cond)
 { 
-    printf("\tfcomp %d(%%ebp)\n",lvar(e2));
+    printf("\tfcomp "); lvar(e2); printf("\n");
     jcond(label,cond);
 }
 
@@ -3335,8 +3526,8 @@
     char *crn;
     use_int(e2);
     crn = register_name(e2,0);
-    printf("\tmovl %d(%%ebp),%s\n",lvar(e1),crn);
-    printf("\torl  %d(%%ebp),%s\n",lvar(e1)+4,crn);
+    printf("\tmovl "); lvar(e1); printf(",%s\n",crn);
+    printf("\torl  "); lvar(e1+4); printf(",%s\n",crn);
     printf("\ttestl %s,%s\n",crn,crn);
     jcond(label,cond);
 }
@@ -3385,8 +3576,8 @@
 {
     use_longlong(e2);
 #if ENDIAN_L==0
-    printf("\tmovl %s,%d(%%ebp)\n",l_eax(e2),lvar(e1));
-    printf("\tmovl %s,%d(%%ebp)\n",l_edx(e2),lvar(e1)+4);
+    printf("\tmovl %s,",l_eax(e2)); lvar(e1); printf("\n");
+    printf("\tmovl %s,",l_edx(e2)); lvar(e1+4); printf("\n");
 #endif
 }
 
@@ -3450,8 +3641,8 @@
 {
     use_longlong(e2);
 #if ENDIAN_L==0
-    printf("\tmovl %d(%%ebp),%s\n",lvar(e1),l_eax(e2));
-    printf("\tmovl %d(%%ebp),%s\n",lvar(e1)+4,l_edx(e2));
+    printf("\tmovl "); lvar(e1); printf(",%s\n",l_eax(e2));
+    printf("\tmovl "); lvar(e1+4); printf(",%s\n",l_edx(e2));
 #endif
 }