view mc-code-ia32.c @ 705:0554b7f985ee parse-mode

parse mode done.
author kono
date Wed, 24 Oct 2007 10:39:57 +0900
parents 22e0330a6d5b
children 8b54c40081de
line wrap: on
line source

/* Micro-C Code Generation Part for intel386 */

/************************************************************************
** Copyright (C) 2006 Shinji Kono
** 連絡先: 琉球大学情報工学科 河野 真治  
** (E-Mail Address: kono@ie.u-ryukyu.ac.jp)
**
**    このソースのいかなる複写,改変,修正も許諾します。ただし、
**    その際には、誰が貢献したを示すこの部分を残すこと。
**    再配布や雑誌の付録などの問い合わせも必要ありません。
**    営利利用も上記に反しない範囲で許可します。
**    バイナリの配布の際にはversion messageを保存することを条件とします。
**    このプログラムについては特に何の保証もしない、悪しからず。
**
**    Everyone is permitted to do anything on this program 
**    including copying, modifying, improving,
**    as long as you don't try to pretend that you wrote it.
**    i.e., the above copyright notice has to appear in all copies.  
**    Binary distribution requires original version messages.
**    You don't have to ask before copying, redistribution or publishing.
**    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
***********************************************************************/



#include <stdio.h>
#include "mc.h"
#include "mc-parse.h"
#include "mc-codegen.h"
#include "mc-code.h"

#ifdef __APPLE__
#define USE_SSE2
#define USE_PIC
#endif

#if defined(__GNUC__) && __GNUC__ >= 4

#include "mc-include.c"

static
char *init_src0 = "\
#define __builtin_va_list int\n\
#define __builtin_va_start(ap,arg) ap=(((int)(&arg))+sizeof(arg))\n\
#define __builtin_va_arg(ap,type)  (*((type *)ap)++)\n\
#define alloca __builtin_alloca\n\
#define __STDC__ 1\n\
#define __extension__\n\
#define __flexarr\n\
#define __const const\n\
#define __THORW\n\
#define __inline__ inline\n\
#define __inline inline\n\
\n\
#define __DBL_MIN_EXP__ (-1021)\n\
#define __FLT_MIN__ 1.17549435e-38F\n\
#define __CHAR_BIT__ 8\n\
#define __WCHAR_MAX__ 2147483647\n\
#define __DBL_DENORM_MIN__ 4.9406564584124654e-324\n\
#define __FLT_EVAL_METHOD__ 0\n\
#define __DBL_MIN_10_EXP__ (-307)\n\
#define __FINITE_MATH_ONLY__ 0\n\
#define __SHRT_MAX__ 32767\n\
#define __LDBL_MAX__ 1.18973149535723176502e+4932L\n\
#define __APPLE_CC__ 5367\n\
#define __UINTMAX_TYPE__ long long unsigned int\n\
#define __SCHAR_MAX__ 127\n\
#define __USER_LABEL_PREFIX__ _\n\
#define __STDC_HOSTED__ 1\n\
#define __DBL_DIG__ 15\n\
#define __FLT_EPSILON__ 1.19209290e-7F\n\
#define __LDBL_MIN__ 3.36210314311209350626e-4932L\n\
#define __strong \n\
#define __DECIMAL_DIG__ 21\n\
#define __LDBL_HAS_QUIET_NAN__ 1\n\
#define __DYNAMIC__ 1\n\
#define __GNUC__ 4\n\
#define __MMX__ 1\n\
#define __DBL_MAX__ 1.7976931348623157e+308\n\
#define __DBL_HAS_INFINITY__ 1\n\
#define __weak \n\
#define __DBL_MAX_EXP__ 1024\n\
#define __SSE2_MATH__ 1\n\
#define __LONG_LONG_MAX__ 9223372036854775807LL\n\
#define __GXX_ABI_VERSION 1002\n\
#define __FLT_MIN_EXP__ (-125)\n\
#define __DBL_MIN__ 2.2250738585072014e-308\n\
#define __DBL_HAS_QUIET_NAN__ 1\n\
#define __REGISTER_PREFIX__ \n\
#define __NO_INLINE__ 1\n\
#define __i386 1\n\
#define __FLT_MANT_DIG__ 24\n\
#define __tune_nocona__ 1\n\
#define i386 1\n\
#define __i386__ 1\n\
#define __SIZE_TYPE__ long unsigned int\n\
#define __FLT_RADIX__ 2\n\
#define __LDBL_EPSILON__ 1.08420217248550443401e-19L\n\
#define __SSE_MATH__ 1\n\
#define __FLT_HAS_QUIET_NAN__ 1\n\
#define __FLT_MAX_10_EXP__ 38\n\
#define __LONG_MAX__ 2147483647L\n\
#define __FLT_HAS_INFINITY__ 1\n\
#define __LITTLE_ENDIAN__ 1\n\
#define __LDBL_MANT_DIG__ 64\n\
#define __CONSTANT_CFSTRINGS__ 1\n\
#define __WCHAR_TYPE__ int\n\
#define __FLT_DIG__ 6\n\
#define __INT_MAX__ 2147483647\n\
#define __nocona 1\n\
#define __FLT_MAX_EXP__ 128\n\
#define __DBL_MANT_DIG__ 53\n\
#define __WINT_TYPE__ int\n\
#define __SSE__ 1\n\
#define __LDBL_MIN_EXP__ (-16381)\n\
#define __MACH__ 1\n\
#define __LDBL_MAX_EXP__ 16384\n\
#define __LDBL_MAX_10_EXP__ 4932\n\
#define __DBL_EPSILON__ 2.2204460492503131e-16\n\
#define __GNUC_PATCHLEVEL__ 1\n\
#define __LDBL_HAS_INFINITY__ 1\n\
#define __INTMAX_MAX__ 9223372036854775807LL\n\
#define __FLT_DENORM_MIN__ 1.40129846e-45F\n\
#define __PIC__ 1\n\
#define __FLT_MAX__ 3.40282347e+38F\n\
#define __SSE2__ 1\n\
#define __FLT_MIN_10_EXP__ (-37)\n\
#define __INTMAX_TYPE__ long long int\n\
#define __nocona__ 1\n\
#define __GNUC_MINOR__ 0\n\
#define __DBL_MAX_10_EXP__ 308\n\
#define __LDBL_DENORM_MIN__ 3.64519953188247460253e-4951L\n\
#define __PTRDIFF_TYPE__ int\n\
#define __LDBL_MIN_10_EXP__ (-4931)\n\
#define __LDBL_DIG__ 18\n\
#define __i386 1\n\
#define __FLT_MANT_DIG__ 24\n\
#define __VERSION__ \"micro-c mc-code-ia32.c kono Exp $\"\n\
#define i386 1\n\
#define __i486__ 1\n\
#define unix 1\n\
#define __i386__ 1\n\
#define __SIZE_TYPE__ unsigned int\n\
#define __ELF__ 1\n\
#define __FLT_RADIX__ 2\n\
#define __FLT_HAS_QUIET_NAN__ 1\n\
#define __FLT_MAX_10_EXP__ 38\n\
#define __LONG_MAX__ 2147483647L\n\
#define __FLT_HAS_INFINITY__ 1\n\
#define linux 1\n\
#define __LDBL_MANT_DIG__ 64\n\
#define __WCHAR_TYPE__ int\n\
#define __FLT_DIG__ 6\n\
#define __INT_MAX__ 2147483647\n\
#define __i486 1\n\
#define __FLT_MAX_EXP__ 128\n\
#define __DBL_MANT_DIG__ 53\n\
#define __WINT_TYPE__ unsigned int\n\
#define __LDBL_MIN_EXP__ (-16381)\n\
#define __LDBL_MAX_10_EXP__ 4932\n\
#define __DBL_EPSILON__ 2.2204460492503131e-16\n\
#define __tune_i486__ 1\n\
#define __INTMAX_MAX__ 9223372036854775807LL\n\
#define __FLT_DENORM_MIN__ 1.40129846e-45F\n\
#define __FLT_MAX__ 3.40282347e+38F\n\
#define __FLT_MIN_10_EXP__ (-37)\n\
#define __INTMAX_TYPE__ long long int\n\
#define __GNUC_MINOR__ 0\n\
#define __DBL_MAX_10_EXP__ 308\n\
#define __PTRDIFF_TYPE__ int\n\
#define __LDBL_MIN_10_EXP__ (-4931)\n\
#define __LDBL_DIG__ 18\n\
"
#ifdef __APPLE__
"#define __APPLE__ 1\n"
"#define __GNUC__ 4\n"
"#define __BIG_ENDIAN__ 1\n"
#endif
;

/*

  #define size_t int\n\

*/

#else

#include "mc-include.c"

#if defined(__GNUC__) && __GNUC__ >= 3
static
char *init_src0 = "\
#define __builtin_va_start(ap,arg) ap=(((int)(&arg))+sizeof(arg))\n\
#define __builtin_va_arg(ap,type)  (*((type *)ap)++)\n\
#define __builtin_va_end\n\
#define __i386__ 1\n\
#define __LITTLE_ENDIAN__ 1\n\
#define __STDC__ 1\n\
#define __extension__\n\
// #define __restrict\n\
#define __flexarr\n\
#define __const const\n\
#define __THORW\n\
// #define __attribute__(a)\n\
#define __inline__ inline\n\
#define __inline inline\n\
#define __GNUC__ 3\n\
#define __builtin_va_list int\n\
typedef long unsigned int __SIZE_TYPE__ ;\n\
"

#else
static
char *init_src0 = "\
#define va_list int\n\
#define va_start(ap,arg) ap=(((int)(&arg))+sizeof(arg))\n\
#define va_arg(ap,type)  (*((type *)ap)++)\n\
#define va_end\n\
#define __i386__ 1\n\
#define __LITTLE_ENDIAN__ 1\n\
#define __STDC__ 1\n\
#define __extension__\n\
// #define __restrict\n\
#define __flexarr\n\
#define __const const\n\
#define __THORW\n\
// #define __attribute__(a)\n\
#define __inline__ inline\n\
#define __SIZE_TYPE__ long unsigned int\n\
#define __GNUC__ 3\n\
"

#endif


 ;

#endif

int data_alignment = 0;

#define    SIZE_OF_INT  4
#define    SIZE_OF_SHORT  2
#define    SIZE_OF_FLOAT  4
#define    SIZE_OF_DOUBLE  8
#define    SIZE_OF_LONGLONG  8
#define    ENDIAN  0
#define    ENDIAN_L  0
#define    ENDIAN_D  0

int eval_order = REVERSE;

#define TEXT_EMIT_MODE 0
#define DATA_EMIT_MODE 1
#define RODATA_EMIT_MODE 2

#ifdef __APPLE__
#else
#define DOT_SIZE 1
#endif

static int output_mode = TEXT_EMIT_MODE;

static int creg;
static int ireg;
static int lreg;

#define regv_l(r)    (r==REG_L?REG_ESI:REG_EAX)
#define regv_h(r)    (r==REG_L?REG_EDI:REG_EDX)

int code_lassop_p = 0;

#define MAX_REGISTER 6            /* intel386のレジスタを6つまで使う*/
#define REAL_MAX_REGISTER 8    /* intel386のレジスタが8つということ*/
static int MAX_DATA_REG=4;    
static int MAX_POINTER=3;    
int MAX_REGISTER_VAR=2;    
// static int MAX_FREGISTER=1;

#define MAX_FPU_STACK 7

// static int MAX_INPUT_REGISTER_VAR = 0;
int MAX_CODE_INPUT_REGISTER_VAR = 2;
// static int MAX_INPUT_DREGISTER_VAR = 0;
// static int MAX_INPUT_FREGISTER_VAR = 0;
// static int MAX_CODE_INPUT_DREGISTER_VAR = 0;

static int  reg_sp;   /* REGister Stack-Pointer */
static int reg_stack[MAX_MAX];  /* 実際のレジスタの領域 */
static int stack_depth = 0;

/* floating point registers */

static int  freg_sp;  /* floating point REGister Stack-Pointer */
static int freg_stack[MAX_MAX]; /* 実際のレジスタの領域 */

static int reg_var;


/*
                                           -28  -8 local2
                                           -24  -4 local1
                                           -20  8  arg3
                                           -16  4  arg2
                                           -12  0  arg1
         local2     -20 4                   -8    (%edi)
         local1 <-- -16 0 local variable    -4    (%esi)
        %edi        -12  <- disp_offset          %ebp
        %esi         -8
        %ebx         -4
        %ebp = %esp   0
        %eip          4   <- arg_offset
          arg1        8 0
          arg2       12 4
            see enter/enter1/leave           see code_enter
 */
// static int arg_offset;

static int code_disp_label;

#ifdef __APPLE__
static int goffset_label;
#endif


/*
    creg   current register
	ireg  current register for integer (int mode)
	lreg  current register for long long (long long mode)

    regs[]        register usage

    freg    current floating point register
	kept in FPU stack (no register)
 */

#define REAL_MAX_LREGISTER 2
static int ia32regs[1+REAL_MAX_REGISTER+REAL_MAX_LREGISTER];


static int *regs  = ia32regs;

static int ia32fregs[1];
static int *fregs  = ia32fregs;
static int freg;

// register number should start 1
// regs[] value
//    0 for not ready
//   -1 use currrent register 
//    1 used
//    2 (REG_VAR) register variable
//    3 pointer cache (not used in ia32)

#define REG_EAX   1
#define REG_EBX   2
#define REG_ECX   3    // for strange reason (code_assop)
#define REG_EDX   4
#define REG_ESI   5
#define REG_EDI   6
#define REG_EBP   7
#define REG_ESP   8
#define is_int_reg(reg) (1<=reg&&reg<REG_EBP)
#define REG_LCREG     9
#define REG_L     10
#define REG_fp REG_EBP

//  return value register
#define RET_FREGISTER
#define RET_DREGISTER
#define RET_LREGISTER   REG_LCREG
#define RET_REGISTER    REG_EAX

//  defalut current register
#define CREG_REGISTER   REG_ECX
#define FREG_FREGISTER  0

static char *reg_name[8+1]; 
static char *reg_name_l[4+1];
static char *reg_name_w[4+1];

static void ascii(char *s);

static int use_register(int virt, int real, int move);
static void shift(char *op, int reg,int creg);
static void ld_indexx(int byte, int n, int xreg,int reg,int sign);
//static void data_mode(char *name);
// static void text_mode(int align);
static int get_data_register(void);

static void local_table(void);
static int push_struct(int e4,int t, int arg) ;
static void code_clear_stack_reg(int reg1);
#if FLOAT_CODE
static char * fload(int d);
static int code_d1(double d);
static int code_d2(double d);
static void code_save_fstacks();
#endif
static void jcond(int l, char cond);
#if LONGLONG_CODE
static int code_l1(long long d);
static int code_l2(long long d);
#endif

#define round16(i)   ((i+0xf)&~0xf)
#define round4(i)   ((i+0x3)&~0x3)


#define func_disp_offset (16)
#define code_disp_offset (16)

#define arg_offset  8
#define arg_offset1  0
#define ARG_LVAR_OFFSET 0x10000000

#define code_disp_offset0 (-16)
// disp_offset
int disp_offset = code_disp_offset0;

#define CODE_LVAR(l) ((l)+code_disp_offset0)
#define CODE_CALLER_ARG(l) ((l)+arg_offset1)
#define FUNC_LVAR(l) ((l)-func_disp_offset)
#define CALLER_ARG(l) ((l)+arg_offset1)
#define CALLEE_ARG(l) ((l)+arg_offset)
static int r1_offset_label;
static const char lpfx[] = "_";
// static int lvar_offset_label;
static int max_func_args,max_func_arg_label;

/*
    function call stack frame
                     prev esp
                      <-------r1_offset------------------------------>
            <-----                   ebp-->                  <----- esp
 r+  +------------+---+---------------+----------+-------------------+    -
      callee arg   xx   register save   local      caller arg  
                          reg_save      disp       max_func_args*SIZE_OF_INT
        lvar>0                         lvar<0       lvar>0x1000 0000

code segment stack frame

                 * gotoを呼び出した関数のr1 ! r1(goto前のr1)
                                                disp_offset
   #             *                           ebp <---r1_offset---------> esp
r+ +----------+--+----------+----------------+-----------+----------+----+
    cousin arg xx  reg save !callee arg      !code local  caller arg  xx
                   r20-r29     lvar>0         lvar<0      lvar>0x1000 000
                   f20-f31  <-my_func_args--><--disp-----><-max_func_arg->
                              *SIZE_OF_INT                  *SIZE_OF_INT

  %esp should be alignment 16
 
 */

void
code_offset_set(NMTBL *fnptr)
{
#if 0
    int l;
#endif
    int code_f = is_code(fnptr);
    int lvar_offsetv = round16(-disp);
    int r1_offsetv = round16(lvar_offsetv+max_func_args*SIZE_OF_INT+func_disp_offset)+8;

    if (code_f) {
	printf("\t.set _%d,%d\n",code_disp_label,r1_offsetv);
    } else {
	//  +8 makes esp alignment 16
    //    printf(".set %s%d,%d\n",lpfx,lvar_offset_label,lvar_offsetv);
	if (r1_offsetv-lvar_offsetv > 65000) error(-1);
	    // too large function arguments?
	printf(".set %s%d,%d\n",lpfx,r1_offset_label,r1_offsetv);
    }
    if (max_func_arg_label) {
        printf(".set %s%d,%d\n",lpfx,max_func_arg_label,
            round16(max_func_args*SIZE_OF_INT));
        max_func_arg_label = 0;
    }

#if 0
printf("## reg_save %d\n",reg_save);
printf("## function %s\n",fnptr->nm);
    l = ARG_LVAR_OFFSET;
printf("## offset call0\t%d\n",CALLER_ARG);
    l = ARG_LVAR_OFFSET+max_func_args*SIZE_OF_INT;
printf("## offset calln\t%d %d\n",CALLER_ARG,max_func_args*SIZE_OF_INT);
    l = disp;
printf("## offset lvarn\t%d %d\n",FUNC_LVAR+lvar_offsetv,disp);
    l = 0;
printf("## offset lvar0\t%d\n",FUNC_LVAR+lvar_offsetv);
    l = -reg_save;
printf("## offset regs\t%d\n",FUNC_LVAR+lvar_offsetv);
printf("## offset r1off\t%d\n",r1_offsetv);
    l = 0;
printf("## offset carg0\t%d\n",CALLEE_ARG+r1_offsetv);
    l = my_func_args;
printf("## offset cargn\t%d %d\n",CALLEE_ARG+r1_offsetv,my_func_args);
#endif
}


static void
lvar(int l)
{
    if (is_code(fnptr)) {
        if (l>=ARG_LVAR_OFFSET) {  /* caller's arguments */
            printf("%d(%%esp)",CODE_CALLER_ARG(l-ARG_LVAR_OFFSET));
        } else
            printf("%d(%%ebp)",CODE_LVAR(l));
    } else if (l<0) {  /* local variable */
        printf("%d(%%ebp)",FUNC_LVAR(l));
    } else if (l>=ARG_LVAR_OFFSET) {  /* caller's arguments */
        printf("%d(%%esp)",CALLER_ARG(l-ARG_LVAR_OFFSET));
    } else { /* callee's arguments */
        printf("%d(%%ebp)",CALLEE_ARG(l));
    }
}


#define use_int(reg)   if (reg==-1) reg=use_int0()

static int
use_int0() { 
    int i = creg;
    if (!i||!ireg||!is_int_reg(i)) {
        if (lreg) { if (regs[lreg]) free_register(lreg); lreg = 0; }
        if (!ireg) {
	    ireg = get_register();
	}
        i = ireg;
    }
    if (!regs[i]) regs[i]=USING_REG;
    creg = ireg = i;
    return i;
}

#define is_data_reg(reg) (REG_EAX<=reg&&reg<=REG_EDX)
#define is_pointer_reg(reg) (REG_ESI<=reg&&reg<=REG_EBP)

static int 
use_register(int reg0, int reg1, int move)
{
    /*
	reg0 becomes reg1, if (move) copy the content.
	if reg1 is used, reg0 contains old value.
     */

    char *move_op;
    code_clear_stack_reg(reg1); 
    move_op = (regs[reg1]||regs[reg1])?"\txchg %s,%s\n":"\tmovl %s,%s\n";
    if (move && reg0!=reg1) {
	printf(move_op,reg_name[reg0],reg_name[reg1]);
	if (!regs[reg1]) regs[reg1]=USING_REG;
    } 
    return reg0;
}

#define use_data_reg(reg,keep)   \
    if (reg==-1||!is_data_reg(reg)) reg=use_data_reg0(keep,reg)

int 
use_data_reg0(int keep,int reg)
{
    int ptreg =0;
    int i;
    if (is_pointer_reg(creg)) {
	free_register(ptreg=creg); 
	ireg = creg = 0;
    }
    if (is_pointer_reg(ireg)) {
	free_register(ireg); 
	ireg = 0;
    }
    i = reg==USING_REG?creg:reg;
#ifdef __APPLE__
    if (regs[i]==PTRC_REG) clear_ptr_cache_reg(i);
#endif
    if (!i||!ireg||!is_data_reg(i)) {
        if (lreg) { if (regs[lreg]) free_register(lreg); lreg = 0; }
        if (!ireg) {
	    ireg = get_data_register();
	}
        i = ireg;
    }
    if (!regs[i]) regs[i]=USING_REG;
    creg = ireg = i;
    if (ptreg && keep) {
	printf("\tmovl %s,%s\n",reg_name[ptreg],reg_name[creg]);
    }
    return i;
}

static void
set_freg(int reg,int mode)
{
}

static void
set_ireg(int reg,int mode)
{
    if (!is_int_reg(reg)) error(-1);
    if (reg!=creg) {
#ifdef __APPLE__
	if (regs[reg]==PTRC_REG)
	    clear_ptr_cache_reg(reg);
#endif
        if (ireg && reg!=ireg ) {
            if (regs[ireg]!=REG_VAR) free_register(ireg);
            if (mode) {
                printf("\tmovl %s,%s\n",reg_name[ireg],reg_name[reg]);
            }
        }
        if (creg>0 && regs[creg]!=REG_VAR) free_register(creg);
        if (creg==lreg) lreg = 0;
    }
    creg = ireg = reg;
    if (!regs[reg]) regs[reg]=USING_REG;
}

#define is_long_reg(reg)   (reg==REG_LCREG||reg==REG_L)
#define use_longlong(reg)   \
    if (reg==-1||is_long_reg(reg)) reg=use_longlong0(reg)

static int
use_longlong0(int reg)
{
    int i = reg==USING_REG?creg:reg;
    if (ireg) { if (regs[ireg]!=REG_VAR) free_register(ireg); ireg=0; }
    if (!lreg||!regs[lreg]) {
	// long long mode use all registers
	code_save_stacks();
#ifdef __APPLE__
	clear_ptr_cache();
#endif
    }
    i = lreg = (reg==USE_CREG)?REG_LCREG:reg;
    if (!regs[i]) regs[i]=USING_REG;
    if (!regs[regv_l(i)]) regs[regv_l(i)]=USING_REG;
    if (!regs[regv_h(i)]) regs[regv_h(i)]=USING_REG;
    creg = lreg = i;
    return i;
}

static void
set_lreg(int reg,int mode)
{
    use_longlong(reg);
}

char *
l_edx(int i) {
    return i==REG_L?"%edi":"%edx";
}
char *
l_eax(int i) {
    return i==REG_L?"%esi":"%eax";
}

extern void
code_init(void)
{

    /* called only once */

    init_src = init_src0;
    size_of_int = SIZE_OF_INT;
    size_of_short = SIZE_OF_SHORT;
    size_of_float = SIZE_OF_FLOAT;
    size_of_double = SIZE_OF_DOUBLE;
    size_of_longlong = SIZE_OF_LONGLONG;
    endian = ENDIAN;


    // MAX_REGISTER=6;
    MAX_DATA_REG=4;    
    MAX_POINTER=3;    
    MAX_REGISTER_VAR=2;    

    reg_name[REG_EAX] = "%eax";
    reg_name[REG_EBX] = "%ebx";
    reg_name[REG_ECX] = "%ecx";
    reg_name[REG_EDX] = "%edx";
    reg_name[REG_ESI] = "%esi";
    reg_name[REG_EDI] = "%edi";
    reg_name[REG_EBP] = "%ebp";
    reg_name[REG_ESP] = "%esp";
    reg_name_l[REG_EAX] = "%al";
    reg_name_l[REG_EBX] = "%bl";
    reg_name_l[REG_ECX] = "%cl";
    reg_name_l[REG_EDX] = "%dl";
    reg_name_w[REG_EAX] = "%ax";
    reg_name_w[REG_EBX] = "%bx";
    reg_name_w[REG_ECX] = "%cx";
    reg_name_w[REG_EDX] = "%dx";

}

extern void
emit_reinit()
{
    /* called for each file */
    output_mode = -1;
#ifdef __APPLE__
    init_ptr_cache();
#endif
}


char *
register_name(int i,int byte)
{
    if (i<=0) {
	error(REG_ERR);
	return "%eax";
    }
    if (byte==1 && i <= REG_EDX) {
	return reg_name_l[i];
    } else if (byte==SIZE_OF_SHORT && i <= REG_EDX) {
	return reg_name_w[i];
    } else {
	return reg_name[i]; /* 0 or 4 means int */
    }
}

void
gexpr_code_init(void){
    // use_register(creg,REG_EAX,0);
    set_ireg(CREG_REGISTER,0);
}

void
code_gexpr(int e){
    if ((is_int_reg(creg))&&regs[creg]==REG_VAR)
	creg = ireg = 0;
    else if ((creg==REG_L)&&regs[creg]==REG_VAR)
	creg = lreg = 0;
}

int 
get_register(void)
{    /* 使われていないレジスタを調べる */
    int i,reg,j;
    for(i=1;i<MAX_REGISTER+1;i++) {
	if (! regs[i]) {    /* 使われていないなら */
	    regs[i]=1;      /* そのレジスタを使うことを宣言し */
	    return i;       /* その場所を表す番号を返す */
	}
    }
#ifdef __APPLE__
    /* PTR_CACHE をつぶす */
    if ((i=last_ptr_cache())) {
        clear_ptr_cache_reg(i);
        regs[i]=USING_REG;      /* そのレジスタを使うことを宣言し */
        return i;   /* その場所を表す番号を返す */
    }
#endif
    /* search register stack */
    for(i=0;i<reg_sp;i++) {
        if ((reg=reg_stack[i])>=0) {
            code_assign_lvar(
                (j=new_lvar(SIZE_OF_INT)),reg,0); 
            reg_stack[i]= j-REG_LVAR_OFFSET;
            return reg;
        }
    }
    error(RGERR);
    return -1;    /* 空いている場所がないなら、それを表す -1 を返す */
}

static int 
get_data_register(void)
{    /* 使われていないレジスタを調べる */
    int i,reg,j;
    for(i=REG_EAX;i<=REG_EDX;i++) {
	if (! regs[i]) {    /* 使われていないなら */
	    regs[i]=1;      /* そのレジスタを使うことを宣言し */
	    return i;       /* その場所を表す番号を返す */
	}
    }
#ifdef __APPLE__
    /* PTR_CACHE をつぶす */
    while ((i=last_ptr_cache())) {
        clear_ptr_cache_reg(i);
	if (is_data_reg(i)) {
	    regs[i]=USING_REG;      /* そのレジスタを使うことを宣言し */
	    return i;   /* その場所を表す番号を返す */
	}
    }
#endif
    /* search register stack */
    for(i=0;i<reg_sp;i++) {
        if (is_data_reg(i) && (reg=reg_stack[i])>=0) {
            code_assign_lvar(
                (j=new_lvar(SIZE_OF_INT)),reg,0); 
            reg_stack[i]= j-REG_LVAR_OFFSET;
            return reg;
        }
    }
    error(-1);
    return -1;    /* 空いている場所がないなら、それを表す -1 を返す */
}

void 
free_register(int i) {    /* いらなくなったレジスタを開放 */
    if (i==REG_L) {
	reg_var=0;
	regs[REG_ESI]=regs[REG_EDI]=0;
    } else if (regs[i]==REG_VAR) { reg_var--;
    } else if(i==REG_LCREG) { //? REG_L?
	regs[REG_EAX]=regs[REG_EDX]=0;
    }
    regs[i]=0;
}

extern void
use_ptr_cache(int r)
{
#ifdef __APPLE__
    regs[r]=PTRC_REG;
#else
    error(-1);
#endif
}

extern void
code_ptr_cache_def(int r,NMTBL *nptr)
{
#ifdef __APPLE__
    char *rrn = register_name(r,0);
    if (nptr->sc==STATIC && !(is_code(nptr)||is_function(nptr))) {
        printf("\tleal _%s-_%d(%%ebx),%s\n",nptr->nm,
            goffset_label,rrn);
    } else {
        printf("\tmovl L_%s$non_lazy_ptr-_%d(%%ebx),%s\n",
	    nptr->nm,
            goffset_label,rrn);
    }
#else
    error(-1);
#endif
}

/*
    ESI,EDI are used as register variable
    (both in integer and long long mode)
 */

int 
get_input_register_var(int i,NMTBL *nptr,int is_code)
{
    if (is_code) {
	if (i>=MAX_CODE_INPUT_REGISTER_VAR) return 0;
	i += REG_ESI;
	regs[i]=INPUT_REG;
	return list3(REGISTER,i,(int)nptr);
    } else {
	return 0;
    }
}

int 
get_input_dregister_var(int i,NMTBL *nptr,int is_code,int d)
{
    return 0;
}

int 
get_input_lregister_var(int i,NMTBL *nptr,int is_code)
{
    int h,l;
    if (is_code) {
	if (i+1>=MAX_CODE_INPUT_REGISTER_VAR) return 0;
	h = REG_ESI;
	l = REG_EDI;
	regs[h]=regs[l]=INPUT_REG;
	return list2(LREGISTER,REG_L);
    }
    return 0;
}

int 
get_dregister(int d)
{
    return -1;
}

int 
get_lregister_var(NMTBL *n)
{
    int h,l;
    h = REG_ESI;
    l = REG_EDI;
    if (regs[h]==0&&regs[l]==0) {
	regs[h]=regs[l]=REG_VAR; regs[REG_L]=REG_VAR;
	reg_var=2; 
	return list2(LREGISTER,REG_L);
    }
    return list3(LVAR,new_lvar(SIZE_OF_LONGLONG),0);
}

int 
get_lregister()
{
    return -1;
}


int
register_full(void)
{
    int i;
    for(i=1;i<MAX_REGISTER+1;i++) {
	if (! regs[i]) { 
	    return 0;  
	}
    }
    return 1;    
}

int
free_register_count(int d)
{
    int i,count;
    count = 0;
    for(i=1;i<MAX_REGISTER+1;i++) {
	if (! regs[i]) count++;
    }
    return d?0:count;    
}

void
free_all_register(void)
{
    int i;
    for(i=1;i<MAX_REGISTER+REAL_MAX_LREGISTER+1;i++) {
	regs[i]=0;
    }
    lreg = creg = ireg = 0;
    reg_var = 0;
    return;
}

extern int
code_register_overlap(int s,int t)
{
    if (car(s)==REGISTER) {
	if (car(t)==REGISTER) return cadr(s)==cadr(t);
	if (car(t)==LREGISTER)
	    return cadr(s)==REG_ESI|| cadr(s)==REG_EDI;
    } else if (car(s)==LREGISTER) {
	if (car(t)==LREGISTER) return 1;
	if (car(t)==REGISTER)
	    return cadr(t)==REG_ESI|| cadr(t)==REG_EDI;
    }
    return 0;
}

void
register_usage(char *s)
{
    int i;
    printf("## %d: %s:",lineno,s);
    if (creg) printf(" creg=%s ",register_name(creg,0));
    for(i=1;i<MAX_REGISTER+1;i++) {
	printf("%d",regs[i]);
    }
#if 0
    printf(" regs_stack",register_name(creg,0);
    for(i=reg_sp;i>=0;i--) {
	if(reg_stack[i]>=0)
	    printf(" %s",register_name(reg_stack[i],0));
    }
#endif
    printf("## f:%d",freg_sp);
    printf("\n");
}

void
code_arg_register(NMTBL *fnptr)
{
    int args = fnptr->dsp;
    NMTBL *n;
    int reg_var = 0;
    int freg_var = 0;
    int type;
    int reg;
    int offset = 0;
    int is_code0 = is_code(fnptr);

    while (args) {
        /* process in reverse order */
        n = (NMTBL*)caddr(args);
        type = n->ty;
	// n->dsp = offset;
// printf("###  %s %d %d\n",n->nm,n->dsp,n->ty);
        if (scalar(type)) {
            if ((reg = get_input_register_var(reg_var,n,is_code0))) {
                n->sc = REGISTER;
                n->dsp = cadr(reg);
                regs[n->dsp]= INPUT_REG;
                reg_var++;
                cadddr(args)=SIZE_OF_INT; /* why we need this? */
            }
	    offset+=SIZE_OF_INT;
        } else if (type==FLOAT||type==DOUBLE) {
            if ((reg = get_input_dregister_var(freg_var,n,is_code0,1))) {
                n->sc = DREGISTER;
                n->dsp = cadr(reg);
                fregs[n->dsp]= INPUT_REG;
                freg_var++;
                cadddr(args)=size(type); /* why we need this? */
            }
	    offset+=size(type);
        } else
	    offset+=size(type);
        args = cadr(args);
    }
}

void 
gexpr_init(void)
{
    text_mode(0);
    if (reg_sp>0) error(-1);
    if (freg_sp>0) error(-1);
    reg_sp = 0;
    freg_sp = 0;
    stack_depth = 0;
    gexpr_code_init();
    regs[creg]=1;
    register_usage("gexpr_init");
}


void 
emit_init(void)
{
    int i;
    for(i=1;i<REAL_MAX_REGISTER+1;i++) regs[i]=0; 
    free_all_register();
    reg_sp = 0;
    freg_sp = 0;
}

int 
pop_register(void)
{     /* レジスタから値を取り出す */
    return reg_stack[--reg_sp];
}

void
emit_pop_free(int xreg)
{
    if (xreg>=0 && xreg!=creg && regs[xreg]!=REG_VAR) {
	free_register(xreg);
    }
}


int
get_register_var(NMTBL *nptr)
{
    int i;
    for(i=REG_ESI;i<REG_EBP;i++) {
	if (! regs[i]) {    /* 使われていないなら */
	    regs[i]=REG_VAR;      /* そのレジスタを使うことを宣言し */
	    return list3(REGISTER,i,(int)nptr); /* その場所を表す番号を返す */
	}
    }
    return list3(LVAR,new_lvar(SIZE_OF_INT),0);
}

int
get_dregister_var(NMTBL *nptr,int d)
{
    return list3(LVAR,new_lvar(d?SIZE_OF_DOUBLE:SIZE_OF_FLOAT),0);
}


int 
emit_push()
{
    int new_reg,old;
    new_reg = get_register();       /* 絶対に取れる */
    // who free new_reg?
    if (new_reg==creg) error(-1);
    old = creg;
    reg_stack[reg_sp++] = creg;     /* push するかわりにレジスタを使う */
    ireg = creg = new_reg;
    if (!regs[creg]) regs[creg]=USING_REG;
    return old;
}

int
emit_pop(int type)
{
    int xreg,reg;
    xreg=pop_register();
    if (xreg<= -REG_LVAR_OFFSET) {
        reg = get_register();
        code_rlvar(REG_LVAR_OFFSET+xreg,reg);
        free_lvar(REG_LVAR_OFFSET+xreg);
        xreg = reg;
    }
    return xreg;
}

void 
code_label(int labelno)
{
#ifdef __APPLE__
    clear_ptr_cache();
#endif
    printf("_%d:\n",labelno);
}

void
code_gvar(int e1,int creg) {
    use_int(creg);
#ifdef __APPLE__
    int r = get_ptr_cache((NMTBL*)caddr(e1));
    if (cadr(e1)) {
	printf("\tleal %d(%s),%s\n", cadr(e1),register_name(r,0),
		register_name(creg,0));
    } else {
	printf("\tmovl %s,%s\n", register_name(r,0), register_name(creg,0));
    }
#else
    if (cadr(e1)) {
	printf("\tmovl $%s+%d,%s\n",((NMTBL*)caddr(e1))->nm,cadr(e1),
		register_name(creg,0));
    } else {
	printf("\tmovl $%s,%s\n",((NMTBL*)caddr(e1))->nm,register_name(creg,0));
    }
#endif

}

void
code_rgvar(int e1,int creg) {
    use_int(creg);
#ifdef __APPLE__
    int r = get_ptr_cache((NMTBL*)caddr(e1));
    if (cadr(e1)) {
        printf("\tmovl %d(%s),%s\n", cadr(e1),register_name(r,0),
                register_name(creg,0));
    } else {
        printf("\tmovl (%s),%s\n", register_name(r,0), register_name(creg,0));
    }
#else
    if (cadr(e1)) {
	printf("\tmovl %s+%d,%s\n",((NMTBL*)caddr(e1))->nm,cadr(e1),
		register_name(creg,0));
    } else
	printf("\tmovl %s,%s\n",((NMTBL*)caddr(e1))->nm,register_name(creg,0));
#endif

}

static char *
cload(int sign,int sz) {
    return sz==1?(sign?"movsbl":"movzbl"):
	    sz==SIZE_OF_SHORT?(sign?"movswl":"movzwl"):"movl";
}

void
code_crgvar(int e1,int creg,int sign,int sz){
    use_int(creg);
#ifdef __APPLE__
    int r = get_ptr_cache((NMTBL*)caddr(e1));
    if (cadr(e1)) {
        printf("\t%s %d(%s),%s\n", cload(sign,sz),cadr(e1),register_name(r,0),
                register_name(creg,0));
    } else {
        printf("\t%s (%s),%s\n", cload(sign,sz),
		register_name(r,0), register_name(creg,0));
    }
#else
    if (cadr(e1)) {
	printf("\t%s %s+%d,%s\n",cload(sign,sz),
		((NMTBL*)caddr(e1))->nm,cadr(e1),register_name(creg,0));
    } else
	printf("\t%s %s,%s\n",cload(sign,sz),
		((NMTBL*)caddr(e1))->nm,register_name(creg,0));
#endif

}


void
code_lvar(int e2,int creg) {
    use_int(creg);
    printf("\tlea "); lvar(e2);
    printf(",%s\n",register_name(creg,0));
}


void
code_register(int e2,int creg) {
    use_int(creg);
    if (e2!=creg)
    printf("\tmovl %s,%s\n",register_name(e2,0),register_name(creg,0));
}


void
code_rlvar(int e2,int reg) {
    use_int(reg);
    printf("\tmovl "); lvar(e2);
    printf(",%s\n",register_name(reg,0));
}

extern void
code_i2c(int reg)
{
    use_data_reg(reg,1);
    printf("\t%s %s,%s\n",cload(1,1),
	register_name(reg,1),register_name(reg,0));
}

extern void
code_i2s(int reg)
{
    use_data_reg(reg,1);
    printf("\t%s %s,%s\n",cload(1,SIZE_OF_SHORT),
	register_name(reg,2),register_name(reg,0));
}

extern void
code_u2uc(int reg)
{   
    use_data_reg(reg,1);
    printf("\t%s %s,%s\n",cload(0,1),
	register_name(reg,1),register_name(reg,0));
}

extern void
code_u2us(int reg)
{   
    use_data_reg(reg,1);
    printf("\t%s %s,%s\n",cload(0,SIZE_OF_SHORT),
	register_name(reg,2),register_name(reg,0));
}

void
code_crlvar(int e2,int reg,int sign,int sz) {
    use_int(reg);
    printf("\t%s ",cload(sign,sz)); lvar(e2);
    printf(",%s\n",register_name(reg,0));

}


void
code_fname(NMTBL *n,int creg) {
    use_int(creg);
#ifdef __APPLE__
    if (n->sc==STATIC) {
	printf("\tleal _%s-_%d(%%ebx),%s\n", n->nm, goffset_label,
	    register_name(creg,0));
	return;
    }
    int r = get_ptr_cache(n);
    printf("\tmovl %s,%s\n", register_name(r,0), register_name(creg,0));
#else
    printf("\tmovl $%s,%s\n",n->nm,register_name(creg,0));
#endif
}

void
code_label_value(int label,int reg) {
    use_int(reg);
#ifdef __APPLE__
    printf("\tleal _%d-_%d(%%ebx),%s\n",
	label,goffset_label,register_name(reg,0));
#else
    printf("\tleal _%d,%s\n",label,register_name(reg,0));
#endif
}

void
code_const(int e2,int creg) {
    use_int(creg);
    printf("\tmovl $%d,%s\n",e2,register_name(creg,0));
}

void
code_neg(int creg) {
    use_int(creg);
    printf("\tnegl %s\n", register_name(creg,0));
}


void
code_not(int creg) {
    use_int(creg);
    printf("\tnotl %s\n", register_name(creg,0));
}


void
code_lnot(int creg) {
    char *xrn;

    use_data_reg(creg,1);
    xrn = register_name(creg,1);
    printf("\tcmpl $0,%s\n", register_name(creg,0));
    printf("\tsete %s\n", xrn);
    printf("\tmovzbl %s,%s\n", xrn,register_name(creg,0));
}

void
code_preinc(int e1,int e2,int dir,int sign,int sz,int reg) {
    char *xrn;
    if (car(e2)==REGISTER) {
	use_int(reg);
	printf("\taddl $%d,%s\n",dir,register_name(cadr(e2),0));
	if (use)
	    printf("\tmovl %s,%s\n",register_name(cadr(e2),0),register_name(reg,0));
	return;
    } 
    g_expr(e2);
    xrn = register_name(creg,0);
    use_int(reg);
    printf("\t%s $%d,(%s)\n",(sz==1)?"addb":(sz==SIZE_OF_SHORT)?"addw":"addl",dir,xrn);
    if (use)
	printf("\t%s (%s),%s\n",cload(sign,sz),xrn,register_name(reg,0));
}


void
code_postinc(int e1,int e2,int dir,int sign,int sz,int reg) {
    char *xrn;
    if (car(e2)==REGISTER) {
	use_int(reg);
	if (use)
	    printf("\tmovl %s,%s\n",register_name(cadr(e2),0),register_name(reg,0));
	printf("\taddl $%d,%s\n",dir,register_name(cadr(e2),0));

	return;
    } 
    g_expr(e2);
    emit_push();  
    xrn = register_name((e2=emit_pop(0)),0);
    use_int(reg);
    if (use)
	printf("\t%s (%s),%s\n",cload(sign,sz),xrn,register_name(reg,0));
    printf("\t%s $%d,(%s)\n",(sz==1)?"addb":(sz==SIZE_OF_SHORT)?"addw":"addl",dir,xrn);
    emit_pop_free(e2);
}



void
code_return(int creg) {
    use_int(creg);
    printf("\tleal _%d,%s\n",retcont,register_name(creg,0));
}


void
code_environment(int creg) {
    use_int(creg);
    printf("\tmovl %%ebp,%s\n",register_name(creg,0));
}

static int rexpr_bool(int e1,int reg);
#if FLOAT_CODE
static int drexpr_bool(int e1,int reg);
#endif

void
code_bool(int e1,int reg) {
    char *xrn;
    int e2,e3;
    if (rexpr_bool(e1,reg)) return;
#if FLOAT_CODE
    if (drexpr_bool(e1,reg)) return;
#endif
    b_expr(e1,1,e2=fwdlabel(),1);  /* including > < ... */
    if (use) {
	use_int(reg);
	xrn = register_name(reg,0);
	printf("\txorl %s,%s\n",xrn,xrn);
	jmp(e3=fwdlabel());
	fwddef(e2);
	printf("\tmovl $1,%s\n",xrn);
	fwddef(e3);
    } else {
	fwddef(e2);
    }
}

static char *
code_gt(int cond) {
    return (cond?"g":"le");
}

static char *
code_ugt(int cond) {
    return (cond?"a":"be");
}

static char *
code_ge(int cond) {
    return (cond?"ge":"l");
}

static char *
code_uge(int cond) {
    return (cond?"ae":"b");
}

static char *
code_eq(int cond) {
    return (cond?"e":"ne");
}

void
code_cmp_crgvar(int e1,int reg,int sz,int label,int cond) {
    use_int(reg);
#ifdef __APPLE__
    int r = get_ptr_cache((NMTBL*)caddr(e1));
    if (cadr(e1)) {
	if (sz==1)
	    printf("\tcmpb $0,%d(%s)\n",cadr(e1),register_name(r,0));
	else if (sz==SIZE_OF_SHORT)
	    printf("\tcmpw $0,%d(%s)\n",cadr(e1),register_name(r,0));
    } else {
	if (sz==1)
	    printf("\tcmpb $0,(%s)\n",register_name(r,0));
	else if (sz==SIZE_OF_SHORT)
	    printf("\tcmpw $0,(%s)\n",register_name(r,0));
    }
#else
    if (cadr(e1)) {
	if (sz==1)
	    printf("\tcmpb $0,%s+%d\n",((NMTBL*)caddr(e1))->nm,cadr(e1));
	else if (sz==SIZE_OF_SHORT)
	    printf("\tcmpw $0,%s+%d\n",((NMTBL*)caddr(e1))->nm,cadr(e1));
    } else {
	if (sz==1)
	    printf("\tcmpb $0,%s\n",((NMTBL*)caddr(e1))->nm);
	else if (sz==SIZE_OF_SHORT)
	    printf("\tcmpw $0,%s\n",((NMTBL*)caddr(e1))->nm);
    }
#endif
    jcond(label,cond);
}


void
code_cmp_crlvar(int e1,int reg,int sz,int label,int cond) {
    use_int(reg);
    if (sz==1) {
	printf("\tcmpb $0,"); lvar(e1); printf("\n");
    } else if (sz==SIZE_OF_SHORT) {
	printf("\tcmpw $0,"); lvar(e1); printf("\n"); 
    }
    jcond(label,cond);
}


void
code_cmp_rgvar(int e1,int reg,int label,int cond) {
    use_int(reg);
#ifdef __APPLE__
    int r = get_ptr_cache((NMTBL*)caddr(e1));
    if (cadr(e1))
	printf("\tcmpl $0,%d(%s)\n",cadr(e1),register_name(r,0));
    else
	printf("\tcmpl $0,(%s)\n",register_name(r,0));
#else
    if (cadr(e1))
	printf("\tcmpl $0,%s+%d\n",((NMTBL*)caddr(e1))->nm,cadr(e1));
    else
	printf("\tcmpl $0,%s\n",((NMTBL*)caddr(e1))->nm);
#endif
    jcond(label,cond);
}


void
code_cmp_rlvar(int e1,int reg,int label,int cond) {
    use_int(reg);
    printf("\tcmpl $0,"); lvar(e1); printf("\n");
    jcond(label,cond);
}


void
code_cmp_register(int e2,int label,int cond) {
    use_int(e2);
    printf("\tcmpl $0,%s\n",register_name(e2,0));
    jcond(label,cond);
}


void
code_string(int e1,int creg)
{
    char *s;
    int lb;
    NMTBL *n = (NMTBL *)cadr(e1);
    if ((lb=attr_value(n,LABEL))) {
        // already defined
        return code_label_value(lb,creg) ;
    }

    use_int(creg);
    s=n->nm;
    lb = emit_string_label();
    ascii(s);
    if (output_mode==TEXT_EMIT_MODE) {
	printf(".text\n");
    } else {
	text_mode(0);
    }
#ifdef __APPLE__
    printf("\tleal _%d-_%d(%%ebx),%s\n",lb,
	    goffset_label,
	    register_name(creg,0));
#else
    printf("\tlea _%d,%s\n",lb,register_name(creg,0));
#endif
    set_attr(n,LABEL,lb);
}

#define MAX_COPY_LEN 20

/*  ARG_ORDER==1 case do not allow library call in emit_copy
*/

void 
emit_copy(int from,int  to,int length,int offset,int value,int det)
{
    int dreg;
    char *drn,*frn;
    char *trn;
    use_int(from);
    use_int(to);
    frn = register_name(from,0);
    trn = register_name(to,0);

    /* length <0 means upward direction copy */
    switch (length) {
    case 0:     break;
    case 1: case -1:
	drn = register_name(dreg = get_data_register(),1);
        printf("\tmovb %d(%s),%s\n",offset,frn,drn);
        printf("\tmovb %s,%d(%s)\n",drn,offset,trn);
	free_register(dreg);
        break;
    case 2: case -2:
	drn = register_name(dreg = get_data_register(),2);
        printf("\tmovw %d(%s),%s\n",offset,frn,drn);
        printf("\tmovw %s,%d(%s)\n",drn,offset,trn);
	free_register(dreg);
        break;
    case 4: case -4:
	drn = register_name(dreg = get_register(),0);
        printf("\tmovl %d(%s),%s\n",offset,frn,drn);
        printf("\tmovl %s,%d(%s)\n",drn,offset,trn);
	free_register(dreg);
        break;
    default:
        if (length <0) {
            if (length > -MAX_COPY_LEN) {
                for(;length<=-4;length+=4,offset-=4)
                    emit_copy(from,to,-4,offset-4,0,det);
                for(;length<=-2;length+=2,offset-=2)
                    emit_copy(from,to,-2,offset-2,0,det);
                if(length<0)
                    emit_copy(from,to,length,offset-1,0,det);
                break;
            }
        } else if (length <=MAX_COPY_LEN) {
            for(;length>=4;length-=4,offset+=4)
                emit_copy(from,to,4,offset,0,det);
            for(;length>=2;length-=2,offset+=2)
                emit_copy(from,to,2,offset,0,det);
            if(length>0)
                emit_copy(from,to,length,offset,0,det);
            break;
        }
	// should be parallel_rassign....
	// clear_ptr_cache();
	// code_save_stacks();

	printf("\tpushl %%esi\n");
	printf("\tpushl %%edi\n");
	printf("\tpushl %%ecx\n");
	printf("\tpushl %s\n",register_name(from,0));
	printf("\tpushl %s\n",register_name(to,0));
	printf("\tpopl %%edi\n");
	printf("\tpopl %%esi\n");
	if (length<0) {
	    printf("\tmovl $%d,%%ecx\n",-length/4);
	    printf("\taddl $%d,%%esi\n",-length-4);
	    printf("\taddl $%d,%%edi\n",-length-4
		+(to==REG_ESP?4*4:0)
		);
	    printf("\tstd\n\trep\n\tmovsl\n");
	    printf("\tpopl %%ecx\n");
	    printf("\tpopl %%edi\n");
	    printf("\tpopl %%esi\n");
	    if(length%4) {
		offset = offset+length/SIZE_OF_INT;
		length=length%4;
		emit_copy(from,to,length,offset,0,det);
	    }
	} else {
	    printf("\tmovl $%d,%%ecx\n",length/4);
	    if (to==REG_ESP)
		printf("\taddl $%d,%%edi\n",4*4);
	    printf("\tcld\n\trep\n\tmovsl\n");
	    printf("\tpopl %%ecx\n");
	    printf("\tpopl %%edi\n");
	    printf("\tpopl %%esi\n");
	    if(length%4) {
		offset = offset+length/SIZE_OF_INT;
		length=length%4;
		emit_copy(from,to,length,offset,0,det);
	    }
	}
    }
    if (value) {
    /* creg must point top of the destination data */
    /* this code is necessary for the value of assignment or function call */
    /* otherwise we don't need this */
	if(creg!=to) {
	    if (regs[creg]!=REG_VAR) free_register(creg); creg=to;
	}
    }
}

static int
push_struct(int e4,int t, int arg) 
{
    int length,dreg;
    g_expr(e4);
    length=size(t); 
    if(length%SIZE_OF_INT) {
	length += SIZE_OF_INT - (length%SIZE_OF_INT);
    }
    emit_push();
    code_lvar(cadr(arg),USE_CREG);
    dreg = emit_pop(0);

    // copy dreg to creg with length
    // try small amount copy
    /* downward direction copy */
    emit_copy(dreg,creg,length,0,0,1);
    emit_pop_free(dreg);
    /* we have value in creg, it may be changed */
    return length/SIZE_OF_INT;
}

static int
simple_arg(int e3)
{
    return !contains_p(e3,not_simple_p);
}

#define caller_arg_offset_v(arg) (ARG_LVAR_OFFSET+(arg)*SIZE_OF_INT)

/*
    Eary implementation uses pushl arg for function call. gcc
    use the same arguement evaluation order. Of course, the
    order is unspecified in C language, but it is better to
    use same argument evaluation order. Especially for test
    program.
 */
#define ARG_ORDER 1
#if (ARG_ORDER==1)

static int delayed_arg;

#endif

static int
compute_complex_arg(int e3,int reg_arg_list,int arg) {
    int t=caddr(e3);
    int e4 = car(e3);
    reg_arg_list = list2(arg,reg_arg_list);
#if ARG_ORDER==1
    delayed_arg = list2(assign_expr0(arg,e4,t,t),delayed_arg);
#else
    g_expr_u(assign_expr0(arg,e4,t,t));
#endif



    car(e3) = arg;
    return reg_arg_list;
}


static void
increment_function_arg(int e3,int *pnargs,int *preg_arg,int *pfreg_arg) {
    int nargs=0,reg_arg=0,freg_arg=0;
    int t=caddr(e3);
    if(scalar(t)) {
        nargs ++ ; reg_arg++; freg_arg++;
    } else if (t==LONGLONG||t==ULONGLONG||t==DOUBLE) {
        nargs ++ ; reg_arg++;
        nargs ++ ; reg_arg++;
    } else if (t==FLOAT) {
        reg_arg ++ ; freg_arg++;
        nargs += size(t)/SIZE_OF_INT;
    } else if (t>=0&&(car(t)==STRUCT||car(t)==UNION)) {
        nargs += round4(size(t))/SIZE_OF_INT;
    } else {
        error(TYERR);
        nargs ++ ;
    }
    *pnargs += nargs;
    *preg_arg += reg_arg;
    *pfreg_arg += freg_arg;
}

#define AS_SAVE 1
#define AS_ARG  0

static int
get_input_arg(int t,int mode,int nargs,int reg_arg,int freg_arg) 
{
    if(scalar(t)) {
        if (mode==AS_SAVE) {
	    return list3(LVAR,new_lvar(size(t)),0);
            // return get_register_var(0);
        } else 
            return list3(LVAR,caller_arg_offset_v(nargs),0);
    } else if (t==LONGLONG||t==ULONGLONG) {
        if (mode==AS_SAVE) {
	    return list3(LVAR,new_lvar(size(t)),0);
            // return get_lregister_var(0);
        } else 
            return list3(LVAR,caller_arg_offset_v(nargs),0);
    } else if (t==FLOAT) {
        if (mode==AS_SAVE) {
	    return list3(LVAR,new_lvar(size(t)),0);
            // return get_dregister_var(0,0);
        } else
            return list3(LVAR,caller_arg_offset_v(nargs),0);
    } else if (t==DOUBLE) {
        if (mode==AS_SAVE) {
	    return list3(LVAR,new_lvar(size(t)),0);
            // return get_dregister_var(0,1);
        } else
            return list3(LVAR,caller_arg_offset_v(nargs),0);
    } else if (t>=0&&(car(t)==STRUCT||car(t)==UNION)) {
        if (mode==AS_SAVE) {
	    return list3(LVAR,new_lvar(size(t)),0);
            // return get_register_var(0);
        } else
            return list3(LVAR,caller_arg_offset_v(nargs),0);
    } else {
        error(-1);
	return list3(LVAR,new_lvar(size(t)),0);
        // return get_register_var(0);
    }
}

static void
code_call(int e2,NMTBL *fn,int jmp)
{
    if (car(e2) == FNAME) {     
#ifdef __APPLE__
        printf("\tcall\tL_%s$stub\n",fn->nm);
#else
        printf("\tcall\t%s\n",fn->nm);
#endif
    } else {
        printf("\tcall\t*%s\n",register_name(REG_EAX,0));
    }
}

int
function(int e1)
{

    int e2,e3,e4,e5,nargs,t;
    int arg,reg_arg,freg_arg,arg_assign;
    int dots;
    int reg_arg_list=0,ret_type,special_lvar;
    NMTBL *fn = 0;
    int jmp = 0;
    int complex_;
    int pnargs,preg_arg,pfreg_arg;
    int stargs;
    int half_register = 0;
#if (ARG_ORDER==1)
    int save_delayed_arg = delayed_arg;
    int as_save = AS_ARG;  // 1st pushed argment will evaluate at the last
    delayed_arg = 0;
#else
    const int as_save = AS_SAVE;
#endif

    special_lvar = -1;
    ret_type = function_type(cadddr(e1),&dots);
    if (caddr(cadddr(e1))==0) dots=1;

    arg_assign = 0;
    e2 = cadr(e1);
    if (car(e2) == FNAME) {     
        fn=(NMTBL *)cadr(e2);
    } else {    
        if (car(e2)==INDIRECT) e2=cadr(e2); // (*func)(i) case
        jmp = list3(REGISTER,REG_EAX,0);

        if (!simple_arg(e2)) {
            e3=get_register_var(0);
            reg_arg_list = list2(e3,reg_arg_list);
            g_expr_u(assign_expr0(e3,e2,INT,INT));
            e2=e3;
        }
        arg_assign = list2(assign_expr0(jmp,e2,INT,INT),arg_assign);
    }
    /* First we execute complex argument to avoid interaction with
       input variables. Remain the last complex argument in complex_. */
    stargs = 0;
    complex_ = 0;
    nargs = reg_arg = freg_arg = 0;
    pnargs = preg_arg = pfreg_arg = 0;
    for (e3 = e1 = reverse0(caddr(e1)); e3; e3 = cadr(e3)) {    
        t=caddr(e3);
        if (reg_arg==3 && (t==DOUBLE||t==LONGLONG||t==ULONGLONG)) {
            half_register=1;
        }
        if ((e5= !simple_arg(car(e3)))) {
            if (complex_) {
                arg = get_input_arg(caddr(complex_),as_save,
                                        pnargs,preg_arg,pfreg_arg);
#if ARG_ORDER==1
		as_save = AS_SAVE;
#endif
                reg_arg_list = compute_complex_arg(complex_,reg_arg_list,arg);
            }
            // memorise last complex arg parameter
            pnargs=nargs;preg_arg=reg_arg;pfreg_arg=freg_arg;
            complex_ = e3;
        }
        if (t>=0&&(car(t)==STRUCT||car(t)==UNION)) {
            // The struct should be pushed after complex arguments.
            if (e5) { // compute address only, complex_ is me now. Clear it.
                complex_ = 0;
                e4 = car(e3);
                if (car(e4)==RSTRUCT) e4 = cadr(e4);
                if (!simple_arg(e4)) {
                    // Calculate complex struct address here.
                    // If simple, leave it.
                    arg = get_register_var(0);
#if ARG_ORDER==1
		    delayed_arg = list2(
			assign_expr0(arg,e4,INT,INT),
			delayed_arg);
#else
                    g_expr_u(assign_expr0(arg,e4,INT,INT));
#endif
                    car(e3)=arg;
                    reg_arg_list = list2(arg,reg_arg_list);

                    car(e3) = rvalue_t(arg,INT);
                }
            }
            stargs = list4(e3,stargs,nargs,reg_arg);
        }
        increment_function_arg(e3,&nargs,&reg_arg,&freg_arg);
    }
#if (ARG_ORDER==1)
    if (complex_) {
	arg = get_input_arg(caddr(complex_),as_save,
				pnargs,preg_arg,pfreg_arg);
	reg_arg_list = compute_complex_arg(complex_,reg_arg_list,arg);
    }
    for(;delayed_arg;delayed_arg = cadr(delayed_arg)) {
	g_expr_u(car(delayed_arg));
    }
#endif

    /* now all input register vars are free */
    code_save_stacks();

    // set_lreg(LREG_LREGISTER,0);
    set_freg(FREG_FREGISTER,0);
    set_ireg(CREG_REGISTER,0);

    //  Struct arguments need emit_copy. it destructs 3 input registers.
    //  But it returns no value on a register. So calculate it here.
    //  We cannot do this in the previous loop, because the copied struct may be
    //  override by other complex arguments. But before this we have to check
    //  complex_.

    //  ARG_ORDER==1 case put the last value on the top of stack.
    //  emit_copy/push_struct must preserve argument stack, i.e.
    //  no library call is allowed.

    if (stargs) {
#if (ARG_ORDER!=1)
        if (complex_) {
            arg = get_input_arg(caddr(complex_),AS_SAVE,
                                    pnargs,preg_arg,pfreg_arg);
            reg_arg_list = compute_complex_arg(complex_,reg_arg_list,arg);
        }
#endif
        for(stargs=reverse0(stargs);stargs;stargs = cadr(stargs)) {
            e3 = car(stargs);
            e4 = car(e3);
            t  = caddr(e3);
            arg = get_input_arg(t,AS_ARG,caddr(stargs),cadddr(stargs),0);
            push_struct(e4,t,arg);
            car(e3)=0;  // done


        }
#if (ARG_ORDER!=1)
    } else {
        //  last complex argument can use input register
        if (complex_) {
            arg = get_input_arg(caddr(complex_),AS_ARG,pnargs,preg_arg,pfreg_arg)
;
            reg_arg_list = compute_complex_arg(complex_,reg_arg_list,arg);

            car(complex_) = 0; // done.


        }
#endif
    }

    nargs = reg_arg = freg_arg = 0;
    // calc stack arguments first, it may requires extra registers,
    // and we can still use input registers now.
    for (e3 = e1; e3; 
                increment_function_arg(e3,&nargs,&reg_arg,&freg_arg),
                e3 = cadr(e3)) {        
        if (!(e4=car(e3))) continue;
        t=caddr(e3);
        arg = get_input_arg(t,AS_ARG,nargs,reg_arg,freg_arg);
        if (car(arg)!=LVAR) continue;
        g_expr_u(assign_expr0(arg,e4,t,t));
        car(e3)=0;  // done
    }
    if (max_func_args<nargs) max_func_args=nargs;
    for(;arg_assign;arg_assign=cadr(arg_assign)) {
        g_expr_u(car(arg_assign));
    }
    clear_ptr_cache();
    code_call(e2,fn,jmp);
    for(;reg_arg_list;reg_arg_list=cadr(reg_arg_list)) {
        arg = car(reg_arg_list);
        if (car(arg)==REGISTER||car(arg)==DREGISTER||car(arg)==FREGISTER
                ||car(arg)==LREGISTER) 
            free_register(cadr(arg));
        else if (car(arg)==LVAR&&cadr(arg)<0) free_lvar(cadr(arg));
    }
    if (ret_type==DOUBLE||ret_type==FLOAT) {
    } else if (ret_type==LONGLONG||ret_type==ULONGLONG) {
        use_longlong0(USE_CREG);
    } else if (ret_type==VOID) {
    } else {
        if (use)
            set_ireg(RET_REGISTER,0);
        else
            set_ireg(CREG_REGISTER,0);
    }
#if (ARG_ORDER==1)
    delayed_arg = save_delayed_arg;
#endif
    return ret_type;
}

void
code_alloca(int e1,int reg)
{
    char *crn;
  
    if (!is_const(e1)) {
	g_expr(list3(BAND,list3(ADD,e1,list2(CONST,15)),list2(CONST,~15))); 
	use_int(reg);
    } else {
	use_int(reg);
	code_const(round16(cadr(e1)),reg);
    }
    crn = register_name(reg,0);
    printf("\tsubl\t%s, %%esp\n",crn);
    if (!max_func_arg_label) max_func_arg_label = fwdlabel();
    printf("\tmovl $%s%d,%s\n",lpfx,max_func_arg_label ,crn);
    printf("\tadd\t%%esp, %s\n",crn);
}

void
code_frame_pointer(int e3) {
    use_int(e3);
    printf("\tmovl %s,%%ebp\n",register_name(e3,0));
}

int
code_frame_pointer_register()
{
    return list2(REGISTER,REG_fp);
}

void
code_fix_frame_pointer(int disp_offset) {
    // must be empty
}

void
code_jmp(char *s) {
#ifdef __APPLE__
    printf("\tjmp\tL_%s$stub\n",s);
#else
    printf("\tjmp %s\n",s);
#endif
}


void
code_indirect_jmp(int e2) {
    use_int(e2);
    printf("\tjmp *%s\n",register_name(e2,0));
}

void
code_rindirect(int e1, int reg,int offset, int sign,int byte)
{
    char *crn,*op;
    g_expr(e1);
    op=cload(sign,byte);
    crn = register_name(creg,0);
    use_int(reg);
    printf("\t%s %d(%s),%s\n",op,offset,crn,register_name(reg,0));
}

#if FLOAT_CODE
int
code_drindirect(int e1, int reg,int offset, int d)
{
    g_expr(e1);
    printf("\t%s %d(%s)\n",fload(d),offset,register_name(creg,0));
    return DOUBLE;
}
#endif

#if LONGLONG_CODE

static void
lload(int creg,int offset,int reg)
{
    char *crn = register_name(creg,0);
    use_longlong(reg);
    if((reg==REG_L&&creg==REG_ESI)||(creg==REG_EAX)) {
	printf("\tmovl %d(%s),%s\n",offset+SIZE_OF_INT,crn,l_edx(reg));
	printf("\tmovl %d(%s),%s\n",offset,crn,l_eax(reg));
    } else {
	printf("\tmovl %d(%s),%s\n",offset,crn,l_eax(reg));
	printf("\tmovl %d(%s),%s\n",offset+SIZE_OF_INT,crn,l_edx(reg));
    }
}

int
code_lrindirect(int e1, int reg, int offset, int us)
{
    int reg0;
    g_expr(e1);
    reg0=creg;
    use_longlong(reg);
    lload(reg0,offset,reg);
    return LONGLONG;
}
#endif

char *
move(int byte)
{
    return byte==1?"movb":byte==SIZE_OF_SHORT?"movw":"movl";
}

void
code_assign_gvar(int e2,int creg,int byte) {
    if (byte) { use_data_reg(creg,1); 
    } else { use_int(creg); }
#ifdef __APPLE__
    int r = get_ptr_cache((NMTBL*)caddr(e2));
    if (cadr(e2)) 
	printf("\t%s %s,%d(%s)\n",move(byte),register_name(creg,byte),
	    cadr(e2),register_name(r,0));
    else
	printf("\t%s %s,(%s)\n",move(byte),register_name(creg,byte),
	    register_name(r,0));
#else
    if (cadr(e2)) 
	printf("\t%s %s,%s+%d\n",move(byte),register_name(creg,byte),((NMTBL*)caddr(e2))->nm,cadr(e2));
    else
	printf("\t%s %s,%s\n",move(byte),register_name(creg,byte),((NMTBL*)caddr(e2))->nm);
#endif
}

void
code_assign_lvar(int e2,int creg,int byte) {
    if (byte) { use_data_reg(creg,1); 
    } else { use_int(creg); }
    printf("\t%s %s,",move(byte),register_name(creg,byte));
    lvar(e2); printf("\n");
}

void
code_assign_register(int e2,int byte,int creg) {
    use_int(creg);
    if (creg!=e2)
	printf("\tmovl %s,%s\n",register_name(creg,0),register_name(e2,0));
}

void
code_assign(int e2,int byte,int creg) {
    use_int(e2);
    if (byte) { use_data_reg(creg,1); 
    } else { use_int(creg); }
    printf("\t%s %s,(%s)\n",move(byte),register_name(creg,byte),register_name(e2,0));
}

void
code_register_assop(int e2,int reg, int op,int byte) {
    //  reg <= reg(e2) op=reg
    use_int(reg);
    tosop(op,e2,reg);
}

void
code_assop(int op,int creg,int byte,int sign) {
    int xreg;
    //  (*pop()) op = creg
    //     creg should be ecx

    use_int(creg);
    xreg = emit_pop(0);       /* pop e3 value */
    emit_push();
    ld_indexx(byte,0,creg,ireg,sign);
    tosop(op,ireg,xreg);
    emit_pop_free(xreg);
    if (byte) {
	use_data_reg(ireg,1);
    }
    xreg = emit_pop(0);       /* pop e3 value */
    printf("\t%s %s,(%s)\n",move(byte),register_name(ireg,byte),register_name(xreg,0));
    emit_pop_free(xreg);
}

int
tosop_operand_safe_p(int op)
{
    switch(op) {
    case ADD: case SUB: case CMP:
    case BAND: case EOR: case BOR:
    case MUL: case UMUL:
	return 1;
    default: return 0;
    }
}

void
tosop(int op,int reg,int oreg)
{
    int ox=0;
    char *orn,*crn;
    // creg = creg op oreg

    use_int(reg);

    if(oreg==-1) {
        error(-1);
    } else if (oreg<= -REG_LVAR_OFFSET) {
        ox = get_register(); if (ox<0) error(-1);
        code_rlvar(oreg+REG_LVAR_OFFSET,ox);
        free_lvar(oreg+REG_LVAR_OFFSET);
        oreg = ox;
    }

    switch(op) {
    case LSHIFT:
    case ULSHIFT:
	shift("sall",oreg,reg);
        if(ox) free_register(ox);
	return;
    case RSHIFT:
	shift("sarl",oreg,reg);
        if(ox) free_register(ox);
	return;
    case URSHIFT:
	shift("shrl",oreg,reg);
        if(ox) free_register(ox);
	return;
    }
    // regs[oreg]=1;
    orn = register_name(oreg,0);
    crn = register_name(reg,0);
    switch(op) {
    case ADD:
	printf("\taddl %s,%s\n",orn,crn);
	break;
    case SUB: 
	printf("\tsubl %s,%s\n",orn,crn);
	break;
    case CMP:
	printf("\tcmpl %s,%s\n",orn,crn);
	break;
    case BAND: 
	printf("\tandl %s,%s\n",orn,crn);
	break;
    case EOR: 
	printf("\txorl %s,%s\n",orn,crn);
	break;
    case BOR:
	printf("\torl %s,%s\n",orn,crn);
	break;
    case MUL:
    case UMUL:
	printf("\t%s %s,%s\n","imull",orn,crn);
	break;
    case DIV:
    case UDIV:
    case MOD:
    case UMOD:
#ifdef __APPLE__
	if (regs[REG_EDX]==PTRC_REG)
	    clear_ptr_cache_reg(REG_EDX);
#endif
	use_register(reg,REG_EAX,1);
	if (oreg==REG_EAX) oreg=reg;
	if (oreg==REG_EDX) {
	    use_register(oreg,REG_ECX,1);
	    oreg = REG_ECX;
	}
	orn = register_name(oreg,0);
	printf((op==DIV||op==MOD)?
	    "\tcltd\n\tidivl %s\n":
	    "\txor %%edx,%%edx\n\tdivl %s\n",orn);
	set_ireg((op==MOD||op==UMOD)?REG_EDX:REG_EAX,0);
	set_ireg(reg,1);
	break;
    }
    if(ox && ox!=ireg) free_register(ox);
}

int
code_const_op_p(int op,int e)
{
    if (car(e)!=CONST) return 0;
    if (op==DIV||op==UDIV) return ilog(cadr(e));
    if (op==MOD||op==UMOD) return 0;
    else return 1;
}

void
oprtc(int op,int reg,int orn)
{
    char *crn;
    int datareg;
    use_int(reg);
    crn = register_name(reg,0);
    orn = cadr(orn);
    datareg=is_data_reg(reg);

    switch(op) {
    case LSHIFT:
    case ULSHIFT:
	printf("\tsall $%d,%s\n",orn,crn);
	return;
    case DIV:
	orn = ilog(orn);
    case RSHIFT:
	printf("\tsarl $%d,%s\n",orn,crn);
	return;
    case UDIV:
	orn = ilog(orn);
    case URSHIFT:
	printf("\tshrl $%d,%s\n",orn,crn);
	return;
    case ADD:
	printf("\taddl $%d,%s\n",orn,crn);
	break;
    case SUB: case CMP:
	printf("\tsubl $%d,%s\n",orn,crn);
	break;
    case BAND: 
	if (datareg&&(orn & ~255)==~255)
	    printf("\tandb $%d,%s\n",orn,register_name(reg,1));
	else if (datareg&&(orn & ~65535)==~65535)
	    printf("\tandw $%d,%s\n",orn,register_name(reg,2));
	else
	    printf("\tandl $%d,%s\n",orn,crn);
	break;
    case EOR: 
	printf("\txorl $%d,%s\n",orn,crn);
	break;
    case BOR:
	if (datareg&&(orn & ~255)==0)
	    printf("\tor $%d,%s\n",orn,register_name(reg,1));
	else if (datareg&&(orn & ~65535)==0)
	    printf("\tor $%d,%s\n",orn,register_name(reg,2));
	else
	    printf("\torl $%d,%s\n",orn,crn);
	break;
    case MUL:
    case UMUL:
	if (ilog(orn)) {
	    printf("\tsall $%d,%s\n",ilog(orn),crn);
	} else
	    printf("\t%s $%d,%s\n","imull",orn,crn);
	break;
    default:
	error(-1);
    }
}


void
shift(char *op, int oreg,int reg)
{
    int dreg;
    use_register(oreg,REG_ECX,1);
    dreg = (reg==REG_ECX)?oreg:reg;
    printf("\t%s %%cl,%s\n",op,register_name(dreg,0));
    set_ireg(dreg,0);
    set_ireg(reg,1);
}

void
ld_indexx(int byte, int n, int xreg,int reg,int sign)
{	
    if (byte) {
	use_data_reg(reg,1);
    } else {
	use_int(reg);
    }
    if (n) 
	    printf("\t%s %d(%s),%s\n",cload(sign,byte),n,
		register_name(xreg,0),register_name(reg,0));
    else
	    printf("\t%s (%s),%s\n",cload(sign,byte),
		register_name(xreg,0),register_name(reg,0));
}

int
code_csvalue()
{
    return glist2(REGISTER,creg); /* for switch value */
}

void
code_cmpdimm(int e, int csreg,int label,int cond)
{
    /* used in dosiwtch() */
    set_ireg(csreg,0);
    printf("\tcmpl $%d,%s\n",e,register_name(creg,0));
    jcond(label,cond);
}

void
code_opening(char *filename)
{
    printf("\t.file \"%s\"\n",filename);
    printf("\t.version\t\"01.01\"\n");
    /* printf("gcc2_compiled.:\n"); */
    // printf(".text\n");
}

void
code_closing()
{
    global_table();
    printf("\t.ident \"Micro-C compiled\"\n");
}

static char *
code_cond(int op,int cond)
{
    switch(op) {
    case GT:  return code_gt(cond);
    case UGT: return code_ugt(cond);
    case GE:  return code_ge(cond);
    case UGE: return code_uge(cond);
    case LT:  return code_ge(!cond);
    case ULT: return code_uge(!cond);
    case LE:  return code_gt(!cond);
    case ULE: return code_ugt(!cond);
    case EQ:  return code_eq(cond);
    case NEQ: return code_eq(!cond);
    default: return 0;
    }
}

static int
rexpr_bool(int e1,int reg)
{
    char *s;
    if (!(s=code_cond(car(e1),1))) return 0;
    g_expr(list3(CMP,cadr(e1),caddr(e1)));
    use_data_reg(reg,1);
    printf("\tset%s\t%s\n",s,register_name(reg,1));
    printf("\tmovzbl %s,%s\n",register_name(reg,1),register_name(reg,0));
    return 1;
}

int
rexpr(int e1, int l1, int cond,int t)
{
    g_expr(list3(CMP,cadr(e1),caddr(e1)));
    printf("\tj%s\t_%d\n",code_cond(car(e1),cond),l1);
    return l1;
}


static void
jcond(int l, char cond)
{       
    printf("\tj%s\t_%d\n",cond==LT?code_ge(0):cond?"ne":"e",l);
}

void
jmp(int l)
{       
    printf("\tjmp\t_%d\n",l);
    /* align? */
    /*
      this is not allowed because of ? operator
    use_register(creg,REG_EAX,0);
     */
}

void
code_comment(char *s)
{
    printf("## %s",s);
}


void
code_enter(char *name)
{
    text_mode(0);
    printf("\t.align 4\n");
#ifndef __APPLE__
    if (stmode!=STATIC)
	printf(".globl %s\n",name);
    printf("\t.type\t%s,@function\n",name);
    printf("%s:\n",name);
#else
    if (stmode!=STATIC)
	printf(".globl _%s\n",name);
    printf("_%s:\n",name);
    clear_ptr_cache();
#endif
}


void
code_enter1(int args)
{
    code_disp_label=fwdlabel();
    printf("\tlea -_%d(%%ebp),%%esp\n",code_disp_label);

    // printf("## args %d disp %d  code_disp_offset=%d\n",args,disp,code_disp_offset); 
#ifdef __APPLE__
    printf("\tcall\t___i686.get_pc_thunk.bx\n");
    printf("_%d:\n",labelno);
    goffset_label = labelno;
    labelno++;
    regs[REG_EBX] = 1;
#endif
}

void
code_leave(char *name)
{
    code_offset_set(fnptr);
#ifndef __APPLE__
    printf("_%d:\n",labelno);
    printf("\t.size\t%s,_%d-%s\n",name,labelno,name);
    local_table();
    labelno++;
#else
    local_table();
#endif
    free_all_register();
}

void
enter(char *name)
{
    text_mode(0);
    printf("\t.align 2\n");
#ifndef __APPLE__
    if (stmode!=STATIC)
	printf(".globl %s\n",name);
    printf("%s:\n",name);
    printf("\t.type\t%s,@function\n",name);
#else
    if (stmode!=STATIC)
	printf(".globl _%s\n",name);
    printf("_%s:\n",name);
#endif

//    lvar_offset_label = fwdlabel();
    r1_offset_label = fwdlabel();
    max_func_args = 0;

    printf("\tpushl %%ebp\n");
    printf("\tmovl %%esp,%%ebp\n");
    printf("\tpushl %%ebx\n");
    printf("\tpushl %%esi\n");
    printf("\tpushl %%edi\n");
    printf("\tlea -%s%d(%%ebp),%%esp\n",lpfx,r1_offset_label); 
#ifdef __APPLE__
    printf("\tcall\t___i686.get_pc_thunk.bx\n");
    printf("_%d:\n",labelno);
    goffset_label = labelno;
    labelno++;
    regs[REG_EBX] = 1;
    clear_ptr_cache();
#endif

    control=1;
}

void
enter1()
{
    text_mode(0);
}

void
code_label_call(int l)
{
        printf("\tcall\tL_%d\n",l);
}

void
code_ret()
{
        printf("\tret\n");
        control=0;
}

void
leave(int control, char *name)
{
    int sz;

#ifdef __APPLE__
    disp &= -(SIZE_OF_INT*4); // 16byte alignment
    disp -= 12;
#else
    disp &= -SIZE_OF_INT;
#endif
    if (control)
        code_set_return_register(1);
    if (retcont) {
	if (control)
	    jmp(retlabel);
	fwddef(retcont);
        if (cadr(fnptr->ty)==FLOAT||cadr(fnptr->ty)==DOUBLE) {
	    printf("\tfldl %d(%%ebp)\n",-SIZE_OF_DOUBLE);
        } else if (cadr(fnptr->ty)>0&&(
            car(cadr(fnptr->ty))==STRUCT ||
            car(cadr(fnptr->ty))==UNION)) {
            sz = size(cadr(fnptr->ty));
	    set_ireg(RET_REGISTER,0);
            printf("\tmovl %d(%%ebp),%s\n",disp-SIZE_OF_INT,
		register_name(creg,0));
            // emit_copy(dreg,creg,sz,0,1,1);
        } else if (cadr(fnptr->ty)!=VOID) {
	    set_ireg(RET_REGISTER,0);
	    printf("\tmovl %s,%s\n",reg_name[REG_ESI],register_name(creg,0));
        }
    }
    fwddef(retlabel);
    code_offset_set(fnptr);

    printf("\tlea %d(%%ebp),%%esp\n",-12);
    printf("\tpopl %%edi\n");
    printf("\tpopl %%esi\n");
    printf("\tpopl %%ebx\n");
    printf("\tleave\n");
    printf("\tret\n");
#ifndef __APPLE__
    printf("_%d:\n",labelno);
    printf("\t.size\t%s,_%d-%s\n",name,labelno,name);
#endif
    local_table();
    labelno++;
    free_all_register();
}

int
code_get_fixed_creg(int reg,int type) {
    if (type==FLOAT||type==DOUBLE) {
	return 0;
    } else if (type==LONGLONG||type==ULONGLONG) {
	use_longlong(reg);
	return reg;
    } else {
        if (reg==USE_CREG) {
            if (regs[CREG_REGISTER]==0) {
                set_ireg(CREG_REGISTER,is_int_reg(creg)&&regs[creg]==USING_REG);
                return CREG_REGISTER;
            }
        }
	use_int(reg);
	return reg;
    }
}

void
code_set_fixed_creg(int reg,int mode,int type) {
    if (type==FLOAT||type==DOUBLE) {
    } else if (type==LONGLONG||type==ULONGLONG) {
    } else {
	set_ireg(reg,mode);
    }
}

int
code_set_return_register(int mode) {
    // before goto leave code, set return register
    if (cadr(fnptr->ty)==FLOAT) {
        // set_freg(RET_FREGISTER,mode);
	return 0;
    } else if (cadr(fnptr->ty)==DOUBLE) {
        // set_dreg(RET_DREGISTER,mode);
	return 0;
    } else if (cadr(fnptr->ty)==LONGLONG||cadr(fnptr->ty)==ULONGLONG) {
        set_lreg(RET_LREGISTER,mode);
	return ireg;
    } else if (cadr(fnptr->ty)==VOID) {
	return 0;
    } else {
        set_ireg(RET_REGISTER,mode);
	return ireg;
    }
}

void
gen_gdecl(char *n, int gpc)
{
    // must be empty
}

extern void
ascii(char *s)
{
#ifdef __APPLE__
    printf("\t.ascii \"");
#else
    printf("\t.string \"");
#endif
    while(*s) {
	if (*s=='\n')
	    printf("%cn",92);
	else if (*s<' ')
	    printf("%c%03o",92,*s);
	else if (*s=='\\')
	    printf("\\\\");
	else if (*s==34)
	    printf("%c%c",92,34);
	else 
	    printf("%c",*s);
	s++;
    }
#ifdef __APPLE__
    printf("\\0%c\n",34);
#else
    printf("%c\n",34);
#endif
}

extern int
emit_string_label()
{
    int lb;
    cstring_mode();
    lb=fwdlabel();
    printf("_%d:\n",lb);
    return lb;
}

extern void 
emit_string(char *s,int t)
{
    t = type_value(t);
    if (car(t)==ARRAY &&  
            (type_value(cadr(t))==CHAR||type_value(cadr(t))==UCHAR)) {
        ascii(s);
    } else {
        int l = emit_string_label();
        ascii(s);
        emit_label(l);
    }
    return;
}

void
align(int t)
{
    int d;
    switch(t) {
    case CHAR: case UCHAR: return;
    case SHORT: case USHORT: d = data_alignment & 1; break;
    default: d = data_alignment & 3;
    }
    if (d) {
        printf("\t.align 2\n");
        data_alignment = 0;
    }
}

extern void
emit_global(NMTBL *n,int a,int e)
{
    int t = type_value(n->ty);
    if (e>0 && car(e)==STRING && t>0 && car(t)==ARRAY &&  
            (type_value(cadr(t))==CHAR||type_value(cadr(t))==UCHAR)) {
        cstring_mode();
    } else
	data_mode(n->nm);
    align(a);
#ifdef __APPLE__
    if (n && n->sc!=STATIC)
	printf(".globl\t_%s\n",n->nm);
    printf("_%s:\n",n->nm); 
#else
    if (n && n->sc!=STATIC)
	printf(".globl\t%s\n",n->nm);
    printf("%s:\n",n->nm); 
#endif
}

extern void
emit_space(int sp)
{
    data_mode(0);
    printf("\t.space\t%d\n",sp);
}

extern void
emit_char(int d)
{
    data_mode(0);
    printf("\t.byte %d\n",d);
}

extern void
emit_short(int d)
{
    data_mode(0);
    printf("\t.short %d\n",d);
}

extern void
emit_int(int d)
{
    data_mode(0);
    printf("\t.long %d\n",d);
}

extern void
emit_longlong(int e)
{
#if LONGLONG_CODE
    long long ll = lcadr(e);
    data_mode(0);
#if (ENDIAN_L==0)
        printf("\t.long\t0x%x,0x%x\n",code_l1(ll),code_l2(ll));
#else
        printf("\t.long\t0x%x,0x%x\n",code_l2(ll),code_l1(ll));
#endif
#endif
}

extern void
emit_double(int e)
{
#if FLOAT_CODE
    double d = dcadr(e);
    data_mode(0);
#if (ENDIAN_D==0)
        printf("\t.long\t0x%x,0x%x\n",code_d1(d),code_d2(d));
#else
        printf("\t.long\t0x%x,0x%x\n",code_d2(d),code_d1(d));
#endif
#endif
}

extern void
emit_float(int e)
{
#if FLOAT_CODE
    float f = dcadr(e);
    data_mode(0);
    printf("\t.long\t0x%x\n",*(int *)&f);
#endif
}

extern void
emit_address(char *s,int offset)
{
    data_mode(0);
#ifdef __APPLE__
    if (offset)
	printf("\t.long _%s+%d\n",s,offset);
    else
	printf("\t.long _%s\n",s);
#else
    if (offset)
	printf("\t.long %s+%d\n",s,offset);
    else
	printf("\t.long %s\n",s);
#endif
}

extern void
emit_label(int labelno)
{
    data_mode(0);
    printf("\t.long _%d\n",labelno);
}

extern void
emit_data_closing(NMTBL *n)
{
#ifdef DOT_SIZE
    int lb;
#endif
    if (mode==GDECL) {
	data_mode(0);
#ifdef DOT_SIZE
	lb=fwdlabel();
	printf("_%d:\n",lb);
	printf("\t.size\t%s,_%d-%s\n",n->nm,lb,n->nm);
#endif
    }
}

#if LONGLONG_CODE
static long long ll0 = 1LL;

static int 
code_l1(long long d)
{
    int *i = (int *)&ll0; int *j = (int *)&d;
    return (i[1] == 1)?j[1]:j[0];
}

static int 
code_l2(long long d)
{
    int *i = (int *)&ll0; int *j = (int *)&d;
    return (i[1] == 1)?j[0]:j[1];
}
#endif

#ifndef __APPLE__
void
global_table(void)
{
    NMTBL *n;
    int init;
    init=0;
    for(n=global_list;n;n = n->next) {
	if (is_code(n) || is_function(n)) {
	}
	if ((n->sc == GVAR||n->sc == STATIC) && n->dsp != -1) {
	    if (is_code(n)||is_function(n)) continue;
	    /* n->dsp = -1 means initialized global */
	    if (init==0) {
		data_mode(0);
		init=1;
	    }
	    printf(".comm %s,%d\n",n->nm,size(n->ty));
	    // .lcomm?
	}
    }
}

#else

void
global_table(void)
{
    NMTBL *n;
    int init;
    init=0;
    for(n=global_list;n;n = n->next) {
	if ((n->sc == GVAR) && n->dsp != -1) {
	    if (is_code(n)||is_function(n)) continue;
	    /* n->dsp = -1 means initialized global */
	    if (init==0) {
		data_mode(0);
		init=1;
	    }
	    printf(".comm _%s,%d\n",n->nm,size(n->ty));
        } else if ((n->sc==STATIC) && n->dsp != -1) {
            /* n->dsp = -1 means initialized global */
            if (is_code(n)||is_function(n)) {
		printf("\t.set L_%s$stub,_%s\n",n->nm,n->nm);
		continue;
	    }
            if (init==0) {
                data_mode(0);
                init=1;
            }
            printf(".lcomm _%s,%d\n",n->nm,size(n->ty));
	}
    }
    init=0;
    for(n = global_list;n!=&null_nptr;n = n->next) {
        if (n->sc == GVAR || 
		((is_code(n) || is_function(n)) &&has_attr(n,FNAME)) ) {
            if (init==0) {
	printf("\t.section __IMPORT,__pointers,non_lazy_symbol_pointers\n");
                init=1;
            }
	printf("L_%s$non_lazy_ptr:\n\t.indirect_symbol _%s\n\t.long\t0\n",
				n->nm,n->nm);
	}
    }
    for(n = global_list;n!=&null_nptr;n = n->next) {
        if (n->sc==EXTRN1) {
	    if (!(is_code(n) || is_function(n))) {
	printf("\t.section __IMPORT,__pointers,non_lazy_symbol_pointers\n");
	printf("L_%s$non_lazy_ptr:\n\t.indirect_symbol _%s\n\t.long\t0\n",
				n->nm,n->nm);
	    }
	    printf("\t.section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5\n");
	    printf("L_%s$stub:\n",n->nm);
	    printf("\t.indirect_symbol _%s\n",n->nm);
	    printf("\thlt ; hlt ; hlt ; hlt ; hlt\n");
        }
    }
    printf("	    .subsections_via_symbols\n");
    printf("	    .section __TEXT,__textcoal_nt,coalesced,pure_instructions\n");
    printf(".weak_definition        ___i686.get_pc_thunk.bx\n");
    printf(".private_extern ___i686.get_pc_thunk.bx\n");
    printf("___i686.get_pc_thunk.bx:\n");
    printf("	    movl    (%%esp), %%ebx\n");
    printf("	    ret\n");
}

#endif

void
local_table(void)
{
    NMTBL *n;
    int init;
    init=0;
    /* static local variables */
    for(n=local_static_list;n;n = n->next) {
	if (n->sc == STATIC) {
	    if (init==0) {
		data_mode(0);
		init=1;
	    }
#ifdef __APPLE__
	    if (n->dsp!= -1) /* -1 means initialized global */
		printf(".lcomm _%s,%d\n",n->nm,size(n->ty));
#else
	    if (n->dsp!= -1) /* -1 means initialized global */
		printf(".lcomm %s,%d\n",n->nm,size(n->ty));
#endif
	}
    }
}

void
cstring_mode(int align)
{
    if (output_mode!=RODATA_EMIT_MODE) {
#ifndef __APPLE__
        printf(".section\t.rodata\n\t.align 2\n");
#else
        printf("\t.cstring\n");
#endif
        output_mode = RODATA_EMIT_MODE;
    }
}

void
text_mode(int align)
{
    if (output_mode!=TEXT_EMIT_MODE) {
	printf(".text\n");
	// printf("\t.align 2\n");
	output_mode = TEXT_EMIT_MODE;
    }
}

void
data_mode(char *name)
{
    if (output_mode!=DATA_EMIT_MODE) {
	printf(".data\n");
	output_mode = DATA_EMIT_MODE;
    }
#ifndef __APPLE__
    if (name)
	printf("\t.type\t%s,@object\n",name);
#endif
}

#if FLOAT_CODE

/* floating point */


char *
fstore(int d)
{
    return use?
	(d?"fstl":"fsts"):
	(d?"fstpl":"fstps")
    ;
}

char *
fstore_u(int d)
{
    return d?"fstpl":"fstps";
}

char *
fload(int d)
{
    return d?"fldl":"flds";
}


void code_dassign_gvar(int e2,int freg,int d)
{ 
#ifdef __APPLE__
    int r = get_ptr_cache((NMTBL*)caddr(e2));
    if (cadr(e2)) 
	printf("\t%s %d(%s)\n",fstore(d),cadr(e2),register_name(r,0));
    else
	printf("\t%s (%s)\n",fstore(d),register_name(r,0));
#else
    if (cadr(e2)) 
	printf("\t%s %s+%d\n",fstore(d),((NMTBL*)caddr(e2))->nm,cadr(e2));
    else
	printf("\t%s %s\n",fstore(d),((NMTBL*)caddr(e2))->nm);
#endif
}

void code_dassign_lvar(int e2,int freg,int d)
{ 
    printf("\t%s ",fstore(d)); lvar(e2); printf("\n");
}

void code_dassign_dregister(int e,int d,int freg)
{
    error(-1);
}

void code_dassign(int e2,int freg,int d)
{ 
    printf("\t%s (%s)\n",fstore(d),register_name(e2,0));
}

static double d0 = 1.0;

static int
code_d1(double d)
{
    int *i = (int *)&d0; int *j = (int *)&d;
    return (i[1] == 0x3ff00000)?j[0]:j[1];
}

static int
code_d2(double d)
{
    int *i = (int *)&d0; int *j = (int *)&d;
    return (i[1] == 0x3ff00000)?j[1]:j[0];
}

void code_dconst(int e2,int freg,int d)
{ 
    int lb;
    double value = dcadr(e2);

    if (value==0.0) {
	printf("\tfldz\n"); return;
    }
    if (value==1.0) {
	printf("\tfld1\n"); return;
    }
#ifdef __APPLE__
    printf(" \t.literal8\n\t.align 3\n");
#else
    printf(" \t.section\t.rodata\n\t.align 8\n");
#endif
    lb=fwdlabel();
    printf("_%d:\n",lb);
#if ENDIAN_D==0
    printf("\t.long\t0x%x,0x%x\n",code_d1(value),code_d2(value));
#endif
    if (output_mode==TEXT_EMIT_MODE) {
	printf(".text\n");
    } else {
	text_mode(0);
    }
#ifdef __APPLE__
    printf("\tfldl _%d-_%d(%%ebx)\n",lb,goffset_label);
#else
    printf("\tfldl _%d\n",lb);
#endif
}

void
code_builtin_fabsf(int e)
{
}
void
code_builtin_fabs(int e)
{
}
void
code_builtin_inff()
{
}
void
code_builtin_inf()
{
}

void code_dneg(int freg,int d)
{ 
    printf("\tfchs\n");
}

void code_d2i(int reg)
{ 
    use_int(reg);
    printf("\tlea -%d(%%esp),%%esp\n",SIZE_OF_INT*2);
    printf("\tfnstcw  (%%esp)\n");
    printf("\tmovl    (%%esp), %s\n",register_name(creg,0));
    printf("\tmovb    $12, 1(%%esp)\n");
    printf("\tfldcw   (%%esp)\n");
    printf("\tfistpl  %d(%%esp)\n",SIZE_OF_INT);
    printf("\tmovl    %s, (%%esp)\n",register_name(creg,0));
    printf("\tfldcw   (%%esp)\n");
    printf("\tpopl    %s\n",register_name(creg,0));
    printf("\tpopl    %s\n",register_name(creg,0));
}

void code_i2d(int reg)
{ 
    printf("\tpushl %s\n",register_name(creg,0));
    printf("\tfildl (%%esp)\n");
    printf("\tlea %d(%%esp),%%esp\n",SIZE_OF_INT);
}

void code_d2u(int reg)
{ 
    use_int(reg);
    printf("\tlea -%d(%%esp),%%esp\n",SIZE_OF_INT*3);
    printf("\tfnstcw  (%%esp)\n");
    printf("\tmovl    (%%esp), %s\n",register_name(reg,0));
    printf("\tmovb    $12, 1(%%esp)\n");
    printf("\tfldcw   (%%esp)\n");
    printf("\tmovl    %s, (%%esp)\n",register_name(reg,0));
    printf("\tfistpll %d(%%esp)\n",SIZE_OF_INT);
    printf("\tfldcw   (%%esp)\n");
    printf("\tmovl    %d(%%esp),%s\n",SIZE_OF_INT,register_name(reg,0));
    printf("\tlea %d(%%esp),%%esp\n",SIZE_OF_INT*3);
}

void code_u2d(int reg)
{ 
    printf("\tpushl  %s\n",register_name(creg,0));
    printf("\tpushl  %s\n",register_name(creg,0));
    printf("\tmovl   $0, %d(%%esp)\n",SIZE_OF_INT);
    printf("\tfildll (%%esp)\n");
    printf("\tlea %d(%%esp),%%esp\n",SIZE_OF_INT*2);
}

void code_d2f(int reg) { }
void code_f2d(int reg) { }
void code_f2i(int reg) { code_d2i(reg); }
void code_f2u(int reg) { code_d2u(reg); }
void code_i2f(int reg) { code_i2d(reg); }
void code_u2f(int reg) { code_u2d(reg); }

void code_drgvar(int e2,int d,int freg)
{ 
#ifdef __APPLE__
    int r = get_ptr_cache((NMTBL*)caddr(e2));
    if (cadr(e2))
	printf("\t%s %d(%s)\n",fload(d),cadr(e2),register_name(r,0));
    else
	printf("\t%s (%s)\n",fload(d),register_name(r,0));
#else
    if (cadr(e2))
	printf("\t%s %s+%d\n",fload(d),((NMTBL*)caddr(e2))->nm,cadr(e2));
    else
	printf("\t%s %s\n",fload(d),((NMTBL*)caddr(e2))->nm);
#endif
}


void code_drlvar(int e2,int d,int freg)
{ 
    printf("\t%s ",fload(d)); lvar(e2); printf("\n");
}

void code_cmp_drgvar(int e2,int reg,int d,int label,int cond)
{ 
#ifdef __APPLE__
    int r = get_ptr_cache((NMTBL*)caddr(e2));
    if (cadr(e2))
	printf("\tfcomp %d(%s)\n",cadr(e2),register_name(r,0));
    else
	printf("\tfcomp (%s)\n",register_name(r,0));
#else
    if (cadr(e2))
	printf("\tfcomp %s+%d\n",((NMTBL*)caddr(e2))->nm,cadr(e2));
    else
	printf("\tfcomp %s\n",((NMTBL*)caddr(e2))->nm);
#endif
    jcond(label,cond);
}

void code_cmp_drlvar(int e2,int reg,int d,int label,int cond)
{ 
    printf("\tfcomp "); lvar(e2); printf("\n");
    jcond(label,cond);
}

void dtosop(int op,int reg,int e1)
{
    switch(op) {
    case FADD:
    case DADD: printf("\tfaddp %%st,%%st(1)\n"); break;
    case FSUB:
    case DSUB: printf("\tfsubp %%st,%%st(1)\n"); break;
    case FDIV:
    case DDIV: printf("\tfdivp %%st,%%st(1)\n"); break;
    case FMUL:
    case DMUL: printf("\tfmulp %%st,%%st(1)\n"); break;
    case FCMP:
    case DCMP: 
	printf("\tfucompp\n");
	printf("\tfnstsw\t%%ax\n");
#ifdef __APPLE__
	if (regs[REG_EAX]==PTRC_REG)
	    clear_ptr_cache_reg(REG_EAX);
#endif
	break;
    }
}

void
code_dassop(int op,int reg,int d) {
    /* we have lvalue in creg, applied floating value is in %st(0) */
    emit_dpop(d);                            /* do nothing for 387 */
    printf("\t%s (%s)\n",fload(d),register_name(creg,0));
    dtosop(op,reg,0);
    printf("\t%s (%s)\n",fstore(d),register_name(creg,0));
}

void
code_register_dassop(int reg,int op,int d) {
    error(-1);
}

void
code_dpreinc(int e1,int e2,int d,int freg) {
    g_expr(e2);
    printf("\t%s (%s)\n",fload(d),register_name(creg,0));
    printf("\tfld1\n");
    if (caddr(e1)>0)
	printf("\tfaddp %%st,%%st(1)\n");
    else
	printf("\tfsubrp %%st,%%st(1)\n");
    printf("\t%s (%s)\n",fstore(d),register_name(creg,0));
}

void
code_dpostinc(int e1,int e2,int d,int freg) {
    g_expr(e2);
    printf("\t%s (%s)\n",fload(d),register_name(creg,0));
    if (use)
	printf("\t%s (%s)\n",fload(d),register_name(creg,0));
    printf("\tfld1\n");
    if (caddr(e1)>0)
	printf("\tfaddp %%st,%%st(1)\n");
    else
	printf("\tfsubrp %%st,%%st(1)\n");
    printf("\t%s (%s)\n",(use?fstore_u(d):fstore(d)),register_name(creg,0));
}

#define COND_BRANCH 1
#define COND_VALUE  2

/* return 1 if boolean expression */

int
drexpr0(int e1, int e2,int l1, int op,int cond,int reg,int mode)
{       
    char *s;
    if (!cond) {
	switch(op) {
	case FOP+GT:
	    return drexpr0(e2,e1,l1,FOP+GE,1,reg,mode);
	case FOP+GE:
	    return drexpr0(e2,e1,l1,FOP+GT,1,reg,mode);
	case FOP+EQ:
	    op=FOP+NEQ; break;
	case FOP+NEQ:
	    op=FOP+EQ; break;
	case DOP+GT:
	    return drexpr0(e2,e1,l1,DOP+GE,1,reg,mode);
	case DOP+GE:
	    return drexpr0(e2,e1,l1,DOP+GT,1,reg,mode);
	case DOP+EQ:
	    op=DOP+NEQ; break;
	case DOP+NEQ:
	    op=DOP+EQ; break;
	default: return 0;
	}
    }
    s = "e";
    switch(op) {
	case DOP+GE:
	case FOP+GE:
	    g_expr(list3(DCMP,e1,e2));
	    printf("\ttestb\t$5,%%ah\n");
	    break;
	case DOP+GT:
	case FOP+GT:
	    g_expr(list3(DCMP,e1,e2));
	    printf("\ttestb\t$69,%%ah\n");
	    break;
	case DOP+EQ:
	case FOP+EQ:
	    g_expr(list3(DCMP,e1,e2));
	    printf("\tandb\t$69,%%ah\n");
	    printf("\txorb\t$64,%%ah\n");
	    break;
	case DOP+NEQ:
	case FOP+NEQ:
	    g_expr(list3(DCMP,e1,e2));
	    printf("\tandb\t$69,%%ah\n");
	    printf("\txorb\t$64,%%ah\n");
	    s = "ne";
	    break;
	default:
	    return 0;
    }
    if (mode==COND_BRANCH) {
	printf("\tj%s\t_%d\n",s,l1);
    } else {
	use_data_reg(reg,0);
	printf("\tset%s\t%s\n",s,register_name(reg,1));
	printf("\tmovzbl\t%s,%s\n",
	    register_name(reg,1),register_name(reg,0));
    }
    return 1;
}

int
drexpr(int e1, int e2,int l1, int op,int cond)
{
    drexpr0(e1, e2,l1, op,cond,USE_CREG,COND_BRANCH);
    return l1;
}

static int
drexpr_bool(int e1, int reg)
{
    return drexpr0(cadr(e1), caddr(e1),0, car(e1),1,reg,COND_VALUE);
}


void 
code_dregister(int e2,int freg,int d)
{
    error(-1);
}

void
code_cmp_dregister(int e2,int d,int label,int cond)
{
    if (e2!=USE_CREG)
	error(-1);
#ifdef __APPLE__
	if (regs[REG_EAX]==PTRC_REG)
	    clear_ptr_cache_reg(REG_EAX);
#endif
    printf("\tfldz\n"); 
    printf("\tfucompp\n");
    printf("\tfnstsw\t%%ax\n");
    printf("\tandb\t$69,%%ah\n");
    printf("\txorb\t$64,%%ah\n");
    jcond(label,cond);
}

int pop_fregister()
{ 
    if (freg_sp<0) { error(-1); return -1;}
    // printf("## fpop: %d\n",freg_sp-1);
    return freg_stack[--freg_sp];
}

int
emit_dpop(int d)
{
    int xreg;
    if ((xreg=pop_fregister())==-1) {
    } else if (xreg<= -REG_LVAR_OFFSET) {
	code_drlvar(REG_LVAR_OFFSET+xreg,1,freg);
	free_lvar(xreg+REG_LVAR_OFFSET);
	/* pushed order is reversed.   We don't need this for commutable 
	    operator, but it is ok to do this.  */
        printf("\tfxch\t%%st(1)\n");
    } 
    return xreg;
}


void emit_dpop_free(int e1,int d)
{ 
}

void emit_dpush(int type)
{ 
    if (freg_sp>=MAX_FPU_STACK) code_save_fstacks();
    if (freg_sp>MAX_MAX) error(-1);
    freg_stack[freg_sp++]=-1;
    // printf("## fpush:%d\n",freg_sp);
}

#endif

void
code_save_stacks()
{
    /* registers stacks are saved in local variable */
    int i,reg;
    for(i=0;i<reg_sp;i++) {
        if ((reg=reg_stack[i])>=0) {
            code_assign_lvar(
                (reg_stack[i]=new_lvar(SIZE_OF_INT)),reg,0); 
            reg_stack[i]= reg_stack[i]-REG_LVAR_OFFSET;
            if (regs[reg]!=REG_VAR) free_register(reg);
        }
    }
#if FLOAT_CODE
    code_save_fstacks();
#endif
}

static void
code_clear_stack_reg(int reg1)
{
    /* specified registers stacks are saved in local variable */
    /* temporal registers are saved in local variable */
    int i,reg;
    if (regs[reg1]==PTRC_REG)
        clear_ptr_cache_reg(reg1);

    for(i=0;i<reg_sp;i++) {
        if ((reg=reg_stack[i])>=0 && reg==reg1) {
            code_assign_lvar(
                (reg_stack[i]=new_lvar(SIZE_OF_INT)),reg,0); 
            reg_stack[i]= reg_stack[i]-REG_LVAR_OFFSET;
            if (regs[reg]!=REG_VAR) free_register(reg);
        }
    }
}

#if FLOAT_CODE
void
code_save_fstacks()
{
    /* stacks in fpu are saved in local variable */
    int xreg,sp,uses;
    uses = use; use = 0;
    sp=freg_sp;
    while(sp-->0) {
	if ((xreg=freg_stack[sp])==-1) {
	    code_dassign_lvar(
		(freg_stack[sp]=new_lvar(SIZE_OF_DOUBLE)),freg,1); 
	    freg_stack[sp]= freg_stack[sp]-REG_LVAR_OFFSET;
	}
    }
    use = uses;
}
#endif



#if LONGLONG_CODE


/* 64bit int part */

static void
pcond(char *s,int l1)
{
    printf("\tj%s\t_%d\n",s,l1);
}

int
lrexpr(int e1, int e2,int l1, int op,int cond)
{
    int l2;
    code_save_stacks();
    g_expr(e1);
    emit_lpush();
    g_expr(e2);
    // we are sure %ecx is free
    // %ebx is used in Intel Mac
    stack_depth -= SIZE_OF_INT * 2;
    printf("\tpopl %%ecx\n");   // LSW
    printf("\tcmpl %%edx,(%%esp)\n");  // MSW
    printf("\tpopl %%edx\n"); 
    l2 = fwdlabel();
    // cond==0 jump on false condtion   ( if(x) => rexpr(..  cond=0 ...) )
    switch(op) {
    case LOP+GT:
    case LOP+GE:
        pcond(code_gt(1),cond?l1:l2);
        pcond(code_eq(0),cond?l2:l1);
        break;
    case LOP+UGT:
    case LOP+UGE:
        pcond(code_ugt(1),cond?l1:l2);
        pcond(code_eq(0), cond?l2:l1);
        break;
    case LOP+EQ:
        pcond(code_eq(0),(cond?l2:l1));
        break;
    case LOP+NEQ:
        pcond(code_eq(0),(cond?l1:l2));
        break;
    default:
        error(-1);
    }
    printf("\tsubl %%eax,%%ecx\n");
    switch(op) {
    case LOP+GT:  pcond(code_gt(cond),  l1); break;
    case LOP+GE:  pcond(code_ge(cond),  l1); break;
    case LOP+UGT: pcond(code_ugt(cond), l1); break;  
    case LOP+UGE: pcond(code_uge(cond), l1); break;  
    case LOP+EQ:  pcond(code_eq(cond),l1); break;
    case LOP+NEQ: pcond(code_eq(!cond),l1); break;
    }  
    fwddef(l2); 
    return l1;
}

int emit_lpop()
{
    return 0;
}

void code_lregister(int e2,int reg)
{
    use_longlong(reg);
    if (reg!=REG_L) {
	printf("\tmovl %%esi,%s\n",l_eax(reg));
	printf("\tmovl %%edi,%s\n",l_edx(reg));
    }
}

void code_cmp_lregister(int reg,int label,int cond)
{
    char *crn;
    use_int(reg);
    crn = register_name(reg,0);
    printf("\tmovl %%esi,%s\n",crn);
    printf("\torl %%edi,%s\n",crn);
    printf("\ttestl %s,%s\n",crn,crn);
    jcond(label,cond);
}

void code_cmp_lrgvar(int e1,int e2,int label,int cond)
{
    char *n,*crn;
    use_int(e2);
    crn = register_name(e2,0);
#ifdef __APPLE__
    int r = get_ptr_cache((NMTBL*)caddr(e1));
    n = register_name(r,0);
    if (cadr(e1)) {
	printf("\tmovl %d(%s),%s\n",cadr(e1),n,crn);
	printf("\torl  %d(%s),%s\n",cadr(e1)+4,n,crn);
    } else {
	printf("\tmovl (%s),%s\n",n,crn);
	printf("\torl  4(%s),%s\n",n,crn);
    }
#else
    n = ((NMTBL*)caddr(e1))->nm;
    if (cadr(e1)) {
	printf("\tmovl %s+%d,%s\n",n,cadr(e1),crn);
	printf("\torl  %s+%d,%s\n",n,cadr(e1)+4,crn);
    } else {
	printf("\tmovl %s,%s\n",n,crn);
	printf("\torl  %s+4,%s\n",n,crn);
    }
#endif
    printf("\ttestl %s,%s\n",crn,crn);
    jcond(label,cond);
}

void code_cmp_lrlvar(int e1,int e2,int label,int cond)
{
    char *crn;
    use_int(e2);
    crn = register_name(e2,0);
    printf("\tmovl "); lvar(e1); printf(",%s\n",crn);
    printf("\torl  "); lvar(e1+4); printf(",%s\n",crn);
    printf("\ttestl %s,%s\n",crn,crn);
    jcond(label,cond);
}

void code_lassign(int e1,int e2)
{
    char *rn;
    // e1 = e2
    use_longlong(e2);
    rn = register_name(e1,0);
#if ENDIAN_L==0
    printf("\tmovl %s,(%s)\n",l_eax(e2),rn);
    printf("\tmovl %s,4(%s)\n",l_edx(e2),rn);
#endif
}

void code_lassign_gvar(int e1,int e2)
{
    char *n;
    use_longlong(e2);
#if ENDIAN_L==0
#ifdef __APPLE__
    int r = get_ptr_cache((NMTBL*)caddr(e1));
    n = register_name(r,0);
    if (cadr(e1)) {
	printf("\tmovl %s,%d(%s)\n",l_eax(e2),cadr(e1),n);
	printf("\tmovl %s,%d(%s)\n",l_edx(e2),cadr(e1)+4,n);
    } else {
	printf("\tmovl %s,(%s)\n",l_eax(e2),n);
	printf("\tmovl %s,4(%s)\n",l_edx(e2),n);
    }
#else
    n = ((NMTBL*)caddr(e1))->nm;
    if (cadr(e1)) {
	printf("\tmovl %s,%s+%d\n",l_eax(e2),n,cadr(e1));
	printf("\tmovl %s,%s+%d\n",l_edx(e2),n,cadr(e1)+4);
    } else {
	printf("\tmovl %s,%s\n",l_eax(e2),n);
	printf("\tmovl %s,%s+4\n",l_edx(e2),n);
    }
#endif
#endif
}

void code_lassign_lvar(int e1,int e2)
{
    use_longlong(e2);
#if ENDIAN_L==0
    printf("\tmovl %s,",l_eax(e2)); lvar(e1); printf("\n");
    printf("\tmovl %s,",l_edx(e2)); lvar(e1+4); printf("\n");
#endif
}

void code_lassign_lregister(int e2,int reg)
{
    // e2 = reg
    use_longlong(reg);
    if (e2!=reg) {
	printf("\tmovl %s,%s\n",l_eax(reg),l_eax(e2));
	printf("\tmovl %s,%s\n",l_edx(reg),l_edx(e2));
    }
}

void
code_lconst(int e1,int creg)
{
    use_longlong(creg);
#if ENDIAN_L==0
    printf("\tmovl $%d,%s\n",code_l1(lcadr(e1)),l_eax(creg));
    printf("\tmovl $%d,%s\n",code_l2(lcadr(e1)),l_edx(creg));
#endif
}

void code_lneg(int e1)
{
    use_longlong(e1);
    printf("\tnegl %s\n",l_eax(e1));
    printf("\tadcl $0,%s\n",l_edx(e1));
    printf("\tnegl %s\n",l_edx(e1));
}

void code_lrgvar(int e1,int e2)
{
    char *n;
    use_longlong(e2);
#if ENDIAN_L==0
#ifdef __APPLE__
    int r = get_ptr_cache((NMTBL*)caddr(e1));
    n = register_name(r,0);
    if (cadr(e1)) {
	printf("\tmovl %d(%s),%s\n",cadr(e1),n,l_eax(e2));
	printf("\tmovl %d(%s),%s\n",cadr(e1)+4,n,l_edx(e2));
    } else {
	printf("\tmovl (%s),%s\n",n,l_eax(e2));
	printf("\tmovl 4(%s),%s\n",n,l_edx(e2));
    }
#else
    n = ((NMTBL*)caddr(e1))->nm;
    if (cadr(e1)) {
	printf("\tmovl %s+%d,%s\n",n,cadr(e1),l_eax(e2));
	printf("\tmovl %s+%d,%s\n",n,cadr(e1)+4,l_edx(e2));
    } else {
	printf("\tmovl %s,%s\n",n,l_eax(e2));
	printf("\tmovl %s+4,%s\n",n,l_edx(e2));
    }
#endif
#endif
}

void code_lrlvar(int e1,int e2)
{
    use_longlong(e2);
#if ENDIAN_L==0
    printf("\tmovl "); lvar(e1); printf(",%s\n",l_eax(e2));
    printf("\tmovl "); lvar(e1+4); printf(",%s\n",l_edx(e2));
#endif
}

#define check_lreg(reg) if (reg==REG_L) code_lassign_lregister(reg,REG_LCREG)

void
ltosop(int op,int reg,int e2)
{
    char *opl,*oph,*call;
    int lb;

    // e2 (operand) is on the top of the stack
    use_longlong(reg);
    opl = 0; call=0;
    stack_depth -= SIZE_OF_INT * 2;

    switch(op) {
    case LLSHIFT:
    case LULSHIFT:
	printf("\tmovl %%ecx,4(%%esp)\n");
	printf("\tpopl %%ecx\n");
        printf("\tshldl %%eax,%%edx\n");
        printf("\tsall %%cl,%%eax\n");
        printf("\ttestb $32,%%cl\n");
	printf("\tje\t_%d\n",(lb=fwdlabel()));
        printf("\tmovl %%eax,%%edx\n");
        printf("\txorl %%eax,%%eax\n");
	fwddef(lb);
	printf("\tpopl %%ecx\n");
	check_lreg(reg);
	return;
    case LRSHIFT:
	printf("\tmovl %%ecx,4(%%esp)\n");
	printf("\tpopl %%ecx\n");
        printf("\tshrdl %%edx,%%eax\n");
        printf("\tsarl %%cl,%%edx\n");
        printf("\ttestb $32,%%cl\n");
	printf("\tje\t_%d\n",(lb=fwdlabel()));
        printf("\tmovl %%edx,%%eax\n");
        printf("\tsarl $31,%%edx\n");
	fwddef(lb);
	printf("\tpopl %%ecx\n");
	check_lreg(reg);
	return;
    case LURSHIFT:
	printf("\tmovl %%ecx,4(%%esp)\n");
	printf("\tpopl %%ecx\n");
        printf("\tshrdl %%edx,%%eax\n");
        printf("\tshrl %%cl,%%edx\n");
        printf("\ttestb $32,%%cl\n");
	printf("\tje\t_%d\n",(lb=fwdlabel()));
        printf("\tmovl %%edx,%%eax\n");
        printf("\txorl %%edx,%%edx\n");
	fwddef(lb);
	printf("\tpopl %%ecx\n");
	check_lreg(reg);
	return;
    }
    switch(op) {
    case LADD:  opl="addl";oph="adcl"; break;
    case LSUB:  opl="subl";oph="sbbl"; break;
    case LBAND: opl=oph="andl"; break;
    case LEOR:  opl=oph="xorl"; break;
    case LBOR:  opl=oph="orl"; break;
    case LMUL:
    case LUMUL:
	printf("\tpushl %%edx\n");
	printf("\tpushl %%eax\n");
	printf("\tpushl %%ecx\n");
	//       0   saved ecx
	//       4   c_l
	//       8   c_h
	//      12   o_l
	//      16   o_h
        printf("\tmull 12(%%esp)\n");          //  c_l*o_l -> %edx,%eax
        printf("\tmovl 4(%%esp),%%ecx\n");     //  c_l->%ecx
        printf("\timull 16(%%esp),%%ecx\n");   //  c_l*o_h->%ecx
        printf("\taddl %%ecx,%%edx\n");        //  %edx+%ecx->%edx
        printf("\tmovl 8(%%esp),%%ecx\n");     //  c_h->%ecx
        printf("\timull 12(%%esp),%%ecx\n");   //  c_h*o_l->%ecx
        printf("\taddl %%ecx,%%edx\n");        //  %edx+%ecx->%edx
	printf("\tpopl %%ecx\n");
	// printf("\taddl $8,%%esp\n");
	printf("\tlea 16(%%esp),%%esp\n");
	return;
#ifdef __APPLE__
    case LDIV:  call="L___divdi3$stub"; 
         extern_define("__divdi3",0,FUNCTION,1); break;
    case LUDIV: call="L___udivdi3$stub";
         extern_define("__udivdi3",0,FUNCTION,1); break;
    case LMOD:  call="L___moddi3$stub";
         extern_define("__moddi3",0,FUNCTION,1); break;
    case LUMOD: call="L___umoddi3$stub";
         extern_define("__umoddi3",0,FUNCTION,1); break;
#else
    case LDIV:  call="__divdi3"; break;
    case LUDIV: call="__udivdi3"; break;
    case LMOD:  call="__moddi3"; break;
    case LUMOD: call="__umoddi3"; break;
#endif
    default: error(-1);
    }
    if (opl) {
	printf("\t%s (%%esp),%%eax\n\t%s 4(%%esp),%%edx\n",opl,oph);
	printf("\tlea 8(%%esp),%%esp\n");
	check_lreg(reg);
    } else if (call) {
#ifdef __APPLE__
	clear_ptr_cache();
	printf("\tpushl %%edx\n");
	printf("\tpushl %%eax\n");
	printf("\tcall %s\n",call);
	printf("\tlea 16(%%esp),%%esp\n");
#else
	printf("\tpushl %%edx\n");
	printf("\tpushl %%eax\n");
	printf("\tcall %s\n",call);
	printf("\tlea 16(%%esp),%%esp\n");
#endif
	check_lreg(reg);
    } else {
	error(-1);
    }
}

int code_lconst_op_p(int op,int e) {
    long long l;
    if (car(e)==CONST) l = cadr(e);
    else if (car(e)==LCONST) l = lcadr(e);
    else return 0;

    switch(op) {
    case LLSHIFT:
    case LULSHIFT:
    case LRSHIFT:
    case LURSHIFT:
	return (0<=l&&l<=63);
    case LMUL:
    case LUMUL:
    case LUDIV:
    /* case LDIV: */
	return -0x10000000LL<l&&l<0x10000000LL && ilog(l);
    case LADD:
    case LSUB:
    case LBAND:
    case LEOR:
    case LBOR:
	return 1;
    default:
	return 0;
    }
}

void loprtc(int op,int reg,int e) {
    char *opl,*oph=0;
    int vl,il;
    int vh;
    long long l=0;

    if (car(e)==CONST) l = cadr(e);
    else if (car(e)==LCONST) l = lcadr(e);
    else error(-1);

    vl = code_l1(l);
    vh = code_l2(l);
    il = l;

    use_longlong(reg);
    opl = 0;

    switch(op) {
    case LMUL: case LUMUL:
        vl=il=ilog(il);
    case LLSHIFT:
    case LULSHIFT:
	if (il==0) return;
	else if (il==32) {
	    code_register(regv_l(reg),regv_h(reg));
	    code_const(0,regv_l(reg));
	    return;
	} else if (il>32) {
	    code_register(regv_l(reg),regv_h(reg));
	    printf("\tsall $%d,%s\n",(int)il-32,l_edx(reg));
	    code_const(0,regv_l(reg));
	    return;
	}
        printf("\tshldl $%d,%s,%s\n",vl,l_eax(reg),l_edx(reg));
        printf("\tsall $%d,%s\n",(int)il,l_eax(reg));
	return;
    case LRSHIFT:
	if (il==0) return;
	else if (il==32) {
	    code_register(regv_h(reg),regv_l(reg));
	    creg = ireg = REG_EAX;
	    code_i2ll(reg);
	    return;
	} else if (il>32) {
	    code_register(regv_h(reg),regv_l(reg));
	    printf("\tsarl $%d,%s\n",(int)il-32,l_eax(reg));
	    creg = ireg = REG_EAX;
	    code_i2ll(reg);
	    return;
	}
        printf("\tshrdl $%d,%s,%s\n",(int)il,l_edx(reg),l_eax(reg));
        printf("\tsarl $%d,%s\n",(int)il,l_edx(reg));
	return;
    case LUDIV:
        il=ilog(il);
    case LURSHIFT:
	if (il==0) return;
	else if (il==32) {
	    code_register(regv_h(reg),regv_l(reg));
	    code_const(0,regv_h(reg));
	    return;
	} else if (il>32) {
	    if (il>64) error(-1);
	    code_register(regv_h(reg),regv_l(reg));
	    printf("\tsarl $%d,%s\n",(int)il-32,l_eax(reg));
	    code_const(0,regv_h(reg));
	    return;
	}
        printf("\tshrdl $%d,%s,%s\n",(int)il,l_edx(reg),l_eax(reg));
        printf("\tshrl $%d,%s\n",(int)il,l_edx(reg));
	return;
    }
    switch(op) {
    case LADD: opl="addl";oph="adcl"; break;
    case LSUB: opl="subl";oph="sbbl"; break;
    case LEOR:  opl=oph="xorl"; break;
    case LBOR:  opl=oph="orl"; break;
    case LBAND: opl=oph="andl"; break;
    default: error(-1);
    }
    printf("\t%s $%d,%s\n\t%s $%d,%s\n",opl,vl,l_eax(reg),oph,vh,l_edx(reg));
}

void emit_lpop_free(int e1)
{
//     printf("\taddl $8,%%esp\n");
}

void emit_lpush()
{
    stack_depth += SIZE_OF_INT * 2;
    printf("\tpushl %%edx\n\tpushl %%eax\n");
}

void code_i2ll(int reg)
{
    int reg0 = USE_CREG;
    int creg0 = creg;

    use_longlong(reg0);
    use_register(creg0,REG_EAX,1);

    printf("\tcltd\n");
    check_lreg(reg);
    lreg = creg = reg0;
}

void code_i2ull(int reg)
{
    code_i2ll(reg);
}

void code_u2ll(int reg)
{
    int reg0 = USE_CREG;
    int creg0 = creg;

    use_longlong(reg0);
    use_register(creg0,REG_EAX,1);

    use_longlong(reg0);
    printf("\txorl %%edx,%%edx\n");
    check_lreg(reg);
    lreg = creg = reg0;
}

void code_u2ull(int reg)
{
    code_u2ll(reg);
}

void code_ll2i(int reg)
{
    use_int(reg);
    if (REG_EAX!=reg)
	printf("\tmovl %%eax,%s\n",register_name(creg,0));
}

void code_ll2u(int reg)
{
    code_ll2i(reg);
}

void code_ull2i(int reg)
{
    code_ll2i(reg);
}

void code_ull2u(int reg)
{
    code_ll2i(reg);
}

#if FLOAT_CODE
void code_d2ll(int reg)
{
    use_longlong(reg);
#if 1
        printf("\tsubl    $64, %%esp\n");
        printf("\tfnstcw  34(%%esp)\n");
        printf("\tmovzwl  34(%%esp), %%eax\n");
        printf("\tmovb    $12, %%ah\n");
        printf("\tmovw    %%ax, 32(%%esp)\n");
        printf("\tfldcw   32(%%esp)\n");
        printf("\tfistpll 52(%%esp)\n");
        printf("\tfldcw   34(%%esp)\n");
        printf("\tmovl    52(%%esp), %%eax\n");
        printf("\tmovl    56(%%esp), %%edx\n");
        printf("\taddl    $64, %%esp\n");
#else
        printf("\tsubl $40,%%esp\n");
        printf("\tfnstcw 2(%%esp)\n");
        printf("\tmovw 2(%%esp),%%ax\n");
        printf("\torw $3072,%%ax\n");
        printf("\tmovw %%ax,0(%%esp)\n");
        printf("\tfldcw 0(%%esp)\n");
        printf("\tfistpll 12(%%esp)\n");
        printf("\tfldcw 2(%%esp)\n");
        printf("\tmovl 12(%%esp),%%eax\n");
        printf("\tmovl 16(%%esp),%%edx\n");
        printf("\taddl $40,%%esp\n");
#endif
	check_lreg(reg);
}

void code_d2ull(int reg)
{
    use_longlong(reg);
#ifdef __APPLE__
    clear_ptr_cache();
#endif
        printf("\tsubl $16,%%esp\n");
        printf("\tfstpl (%%esp)\n");
#ifdef __APPLE__
        printf("\tcall L___fixunsdfdi$stub\n");
        extern_define("__fixunsdfdi",0,FUNCTION,1); 
#else
        printf("\tcall __fixunsdfdi\n");
#endif
        printf("\taddl $16,%%esp\n");
}

void code_f2ll(int reg)
{
    code_d2ll(reg);
}

void code_f2ull(int reg)
{
    use_longlong(reg);
#ifdef __APPLE__
    clear_ptr_cache();
#endif
        printf("\tsubl $16,%%esp\n");
        printf("\tfstps (%%esp)\n");
#ifdef __APPLE__
        printf("\tcall L___fixunssfdi$stub\n");
        extern_define("__fixunssfdi",0,FUNCTION,1); 
#else
        printf("\tcall __fixunssfdi\n");
#endif
        printf("\taddl $16,%%esp\n");
}

void code_ll2d(int reg)
{
        printf("\tsubl $8,%%esp\n");
        printf("\tmovl %%eax,(%%esp)\n");
        printf("\tmovl %%edx,4(%%esp)\n");
        printf("\tfildll (%%esp)\n");
        printf("\taddl $8,%%esp\n");
}

void code_ll2f(int reg)
{
    code_ll2d(reg);
}

void code_ull2d(int reg)
{
    code_ll2d(reg);
}

void code_ull2f(int reg)
{
    code_ll2d(reg);
}

#endif


void code_lpreinc(int e1,int e2,int reg)
{
    int dir = caddr(e1);
    int creg0;
    char *crn;
    if (car(e2)==LREGISTER) {
        use_longlong(reg);
        printf("\taddl $%d,%%esi\n",dir);
	printf("\tadcl $%d,%%edi\n",dir>0?0:-1);
	if (use && reg!=REG_L) {
	    code_lregister(REG_L,reg);
	}
        return;
    } 
    g_expr(e2);
    crn = register_name(creg0=creg,0);
    printf("\taddl $%d,(%s)\n",dir,crn);
    printf("\tadcl $%d,4(%s)\n",dir>0?0:-1,crn);
    use_longlong(reg);
    lload(creg0,0,reg);
}

void code_lpostinc(int e1,int e2,int reg)
{
    int dir = caddr(e1);
    int creg0;
    char *crn;
    if (car(e2)==LREGISTER) {
        use_longlong(reg);
	if (use && reg!=REG_L) {
	    code_lregister(REG_L,reg);
	}
        printf("\taddl $%d,%%esi\n",dir);
	printf("\tadcl $%d,%%edi\n",dir>0?0:-1);
        return;
    } 
    g_expr(e2);
    crn = register_name(creg0=creg,0);
    printf("\taddl $%d,(%s)\n",dir,crn);
    printf("\tadcl $%d,4(%s)\n",dir>0?0:-1,crn);
    if (use) {
	use_longlong(reg);
	lload(creg0,0,reg);
	printf("\taddl $%d,%s\n",-dir,l_eax(reg));
	printf("\tadcl $%d,%s\n",-dir>0?0:-1,l_edx(reg));
    }
}

void code_lassop(int op,int reg)
{
    error(-1);
}

void
code_register_lassop(int reg,int op) {
    error(-1);
}


#endif





#if CASE_CODE

int
code_table_jump_p(int delta) { return 1; }

void
code_table_jump(int l,int csvalue,int delta,int max,int min,int dlabel)
{
    char *crn;
    // use_register(creg,csvalue,0);
    set_ireg(csvalue,0);
    crn = register_name(creg,0);
    printf("\tsubl\t$%d,%s\n",min,crn);
    printf("\tcmpl\t$%d,%s\n",max-min,crn);
    printf("\tja\t_%d\n",dlabel);
    if (delta==1)  {
#ifdef __APPLE__
	printf("\tmovl\t_%d-_%d(%%ebx,%s,4),%s\n",l,goffset_label,crn,crn);
	printf("\taddl\t%%ebx,%s\n",crn);
	printf("\tjmp\t*%s\n",crn);
#else
	printf("\tjmp\t*_%d(,%s,4)\n",l,crn);
#endif
	return;
    }
#ifdef __APPLE__
	if (regs[REG_EAX]==PTRC_REG)
	    clear_ptr_cache_reg(REG_EAX);
#endif
    use_register(creg,REG_EAX,1);
    crn = "%eax";
    
    switch(delta) {
    case 2:
	printf("\tmovl\t$1,%%edx\n");
	printf("\tandl\t%%eax,%%edx\n");
	printf("\tjne\t_%d\n",dlabel);
#ifdef __APPLE__
	printf("\tmovl\t_%d-_%d(%%ebx,%s,2),%s\n",l,goffset_label,crn,crn);
	printf("\taddl\t%%ebx,%s\n",crn);
	printf("\tjmp\t*%s\n",crn);
#else
	printf("\tjmp\t*_%d(,%%eax,2)\n",l); 
#endif
	break;
    case 4:
	printf("\tmovl\t$3,%%edx\n");
	printf("\tandl\t%%eax,%%edx\n");
	printf("\tjne\t_%d\n",dlabel);
#ifdef __APPLE__
	printf("\tmovl\t_%d-_%d(%%ebx,%s),%s\n",l,goffset_label,crn,crn);
	printf("\taddl\t%%ebx,%s\n",crn);
	printf("\tjmp\t*%s\n",crn);
#else
	printf("\tjmp\t*_%d(%%eax)\n",l); 
#endif
	break;
    default:
	printf("\tmovl $%d,%%ecx\n",delta);
	printf("\txor %%edx,%%edx\n\tdivl %%ecx\n");
	printf("\tandl\t%%edx,%%edx\n");
	printf("\tjne\t_%d\n",dlabel);
#ifdef __APPLE__
	printf("\tmovl\t_%d-_%d(%%ebx,%s,4),%s\n",l,goffset_label,crn,crn);
	printf("\taddl\t%%ebx,%s\n",crn);
	printf("\tjmp\t*%s\n",crn);
#else
	printf("\tjmp\t*_%d(,%%eax,4)\n",l); 
#endif
	break;
    }
    
}

void
code_table_open(int l)
{
    output_mode=DATA_EMIT_MODE;
#ifdef __APPLE__
    printf(" \t.align 2\n");
#else
    printf(" \t.section\t.rodata\n\t.align 4\n");
#endif
    fwddef(l);
}

void
code_table_value(int label,int table_top)
{
#ifdef __APPLE__
    printf("\t.long _%d-_%d\n",label,goffset_label);
#else
    printf("\t.long _%d\n",label);
#endif
}

void
code_table_close()
{
    text_mode(0);
}

#endif


#if ASM_CODE

/*
    print an operand  
 */

static void
emit_asm_operand(int rstr)
{
    if (car(rstr)==REGISTER) {
	printf("%s",register_name(cadr(rstr),0));
    } else if (car(rstr)==CONST) {
	printf("%d",cadr(rstr));
    } else if (car(rstr)==FNAME) {
#ifdef __APPLE__
	printf("_%s-_%d",(char*)cadr(rstr),goffset_label);
#else
	printf("%s",(char*)cadr(rstr));
#endif
    } else if (car(rstr)==STRING) {
#ifdef __APPLE__
	printf("_%d-_%d",cadr(rstr),goffset_label);
#else
	printf("_%d",cadr(rstr));
#endif
    } else {
	error(-1);
    }
}

/*
     prepare asm operands

     char *constraints string
     int  operand expr
     int  mode          (ASM_INPUT,ASM_OUTPUT)
     int  replacement list
     int  output operands count
     int  output operands replacement list

     retrun replacement list
        list3( operands, next, clobber )
                               0    can be shared in input/output
                               1    can't be used in input
 */

int
code_asm_operand(char *p,int e1,int mode,int repl,int n,int repl0)
{
    int r;
    int c;
    int val;
    int clobber = 0;

    printf("## constraint %s\n",p);
    if (*p=='=') {
	// output register
	p++;
    }
    if (*p=='&') {
	// earlyclobber
	p++;
	clobber = 1;
    }
    c = *p;
    if (c=='r') {
	if (mode==ASM_INPUT) {
	    for(;repl0;repl0 = cadr(repl0)) {
		if (car(car(repl0))==REGISTER && caddr(repl0)==0) {
		    r = cadr(car(repl0));
		    caddr(repl0) = ASM_USED;
		    break;
		}
            }  
	    r = get_register();
	} else {
	    r = get_register();
	}
	repl = list3(list2(REGISTER,r),repl,clobber);
    } else if (c=='m') {
	repl = list3(list2(0,0),repl,clobber);
    } else if (c=='i') {
	if (car(e1)==GVAR) {
	    e1=list3(FNAME,(int)(((NMTBL *)caddr(e1))->nm),0);
	} else if (car(e1)==FNAME) {
	    e1=list3(FNAME,(int)(((NMTBL *)cadr(e1))->nm),0);
	} else if (car(e1)==STRING) {
	    val = emit_string_label();
	    ascii((char*)cadr(e1));
	    e1=list3(STRING,val,0);
	} else if (car(e1)==CONST) {
	} else error(-1);
	repl = list3(e1,repl,clobber);
    } else if (digit(c)) {
	val = 0;
	do { val = val*10 + c-'0'; } while (digit(c=*p++));
	if (val>MAX_ASM_REG) error(-1); // too large register
	if (n-val<0) error(-1);
	repl = list3(car(nth(n-val-1,repl0)),repl,clobber);
    } else error(-1);
    return repl;
}

void
code_free_asm_operand(int repl)
{
    int reg;
    for(;repl;repl=cadr(repl)) {
	if (car(car(repl))==REGISTER) {
	    reg = cadr(car(repl));
	    if (regs[reg]!=REG_VAR) free_register(reg);
	}
    }
}


extern void
code_asm(char *asm_str,int repl)
{
    int c,i,rstr,val;
    char *p;
    int reg[MAX_ASM_REG];

    text_mode(0);
    c = *asm_str;
    if (c!='\t'&&c!=' ') printf("\t");
    for(i=0;repl && i<MAX_ASM_REG;i++) {
	reg[i] = car(repl);
	repl = cadr(repl);
    }
    p = asm_str;
    while((c = *p++)) {
	if (c=='%') {
	    c = *p++;
	    if (!c) { break;
	    } else if (c=='%') {
		printf("%%"); continue;
	    } else if (!digit(c)) {
		printf("%%%c",c); continue;
	    }
	    val = 0;
	    do { val = val*10 + c-'0'; } while (digit(c=*p++)) ;
	    p--;
	    if (val>MAX_ASM_REG) error(-1); // too large register
	    rstr = reg[val];
	    emit_asm_operand(rstr);
	} else {
	    printf("%c",c);
	}
    }
    printf("\n");
}

#endif


#if BIT_FIELD_CODE

/* bit field alignment calcuration */

static void
set_bitsz(int type,int *pbitpos, int *pbitsize,
	int *psign,int *pbitsz,int *palign,int *pl)
{ 
    int sign=0,bitsz=1; 
    int align=4,l=0;
    *pbitpos = cadr(caddr(type));
    *pbitsize = caddr(caddr(type));

    switch(cadr(type)) { 
    case INT:		sign=1; bitsz=32; align=4;break; 
    case UNSIGNED:		bitsz=32; align=4;break; 
    case CHAR:          sign=1; bitsz= 8; align=1;break; 
    case UCHAR: 		bitsz= 8; align=1;break; 
    case SHORT:         sign=1; bitsz=16; align=2;break; 
    case USHORT:        sign=1; bitsz=16; align=2;break; 
    case LONGLONG:      sign=1; bitsz=64; align=4;l=1; break; 
    case ULONGLONG:            	bitsz=64; align=4;l=1; break; 
    default: error(-1);
    }
    *psign = sign;
    *pbitsz = bitsz;
    *palign = align;
    *pl = l;
}

/*
      bit field alignment calcuration
        this is architecture depenedent
 */

extern int
code_bit_field_disp(int type,int *poffset,int *bfd,int *sz)
{
    int sign,bitsz,align;
    int i;
    int bitpos = *bfd;
    int bitpos0;
    int bitsize;
    int offset = *poffset;
    int l;
    set_bitsz(type,&bitpos0,&bitsize,&sign,&bitsz,&align,&l);

    if (bitsize>bitsz) { error(BTERR); bitsize = bitsz; }

    /* bfd means previous bit field bit offset */
    if (bitpos) {
	/* previous field is bit field and spaces may remain */
	/* calc previsous offset */

	i= offset-(bitpos+7)/8;

	for(l = bitpos;l>0;l -= 8,i++) {
	    if ((i & (align-1))==0 && l+bitsize <= bitsz) {
		/* alignment is correct and space remains */
		*poffset=offset=i;
		i = l+bitsize;
                *bfd = (i==bitsz)?0:i;
		*sz = (i+7)/8;
// printf("## bitpos=%d bitsize=%d bitsz=%d offset=%d\n",l,bitsize,bitsz,*poffset);
		return l;
	    } 
	}
    }

    /* first bit-field */

    if ((i=(offset & (align-1)))) {
	*poffset = (offset += (align-i));
    }
    bitpos = 0;
    *bfd = (bitsize==bitsz)?0:bitsize;
    *sz = (bitsize+7)/8;

// printf("## bitpos=%d bitsize=%d bitsz=%d offset=%d\n",bitpos,bitsize,bitsz,*poffset);
    return bitpos;
}

/* bit field value */

extern void
code_bit_field(int type,int adr,int reg)
{
    int sign,bitsz,l,align;
    int bitsize,bitpos;
    int i,size;
    set_bitsz(type,&bitpos,&bitsize,&sign,&bitsz,&align,&l);
    size=bitsz/8;
// printf("## %d: bitpos=%d bitsize=%d bitsz=%d\n",lineno,bitpos,bitsize,bitsz);
    /* this implementation returns -1 for int i:1; */
    if (l==1) {
#if LONGLONG_CODE
	use_int(adr);
	use_longlong(reg);
	lload(adr,0,reg);
	/* shift left */
	if ((i=bitsz-bitsize-bitpos)) 
	    loprtc(LLSHIFT,reg,list2(CONST,i));
	/* shift right */
	if ((i=bitsz-bitsize)) 
	    loprtc(sign?LRSHIFT:LURSHIFT,reg,list2(CONST,i));
#endif
    } else {
	use_int(adr);
	use_int(reg);
	printf("\t%s %d(%s),%s\n",cload(sign,size),0,register_name(adr,0),
		    register_name(reg,0));
	/* shift left */
	if ((i=32-bitsize-bitpos)) 
	    oprtc(LSHIFT,reg,list2(CONST,i));
	/* shift right */
	if ((i=32-bitsize)) 
	    oprtc(sign?RSHIFT:URSHIFT,reg,list2(CONST,i));
    }
}

/* bit field replacement */

static void
make_mask_and_or(int mask,int reg,int lreg)
{
printf("## mask 0x%08x ~0x%08x\n",mask,~mask);
	printf("\tpushl %s\n",register_name(reg,0));
	/* make and-mask  */
	oprtc(BOR,reg,list2(CONST,~mask));
	/* do conjunction  */
	if (lreg==-1) {
	    printf("\tandl %s,4(%%esp)\n",register_name(reg,0));
	} else if (lreg==-2) {
	    printf("\tandl %s,8(%%esp)\n",register_name(reg,0));
	} else {
	    printf("\tandl %s,%s\n",register_name(reg,0),register_name(lreg,0));
	}
	/* make or-mask  */
	printf("\tpopl %s\n",register_name(reg,0));
	oprtc(BAND,reg,list2(CONST,mask));
	/* do disjunction  */
	if (lreg==-1) {
	    printf("\torl %s,0(%%esp)\n",register_name(reg,0));
	} else if (lreg==-2) {
	    printf("\torl %s,4(%%esp)\n",register_name(reg,0));
	} else {
	    printf("\torl %s,%s\n",register_name(reg,0),register_name(lreg,0));
	    printf("\txchg %s,%s\n",register_name(reg,0),register_name(lreg,0));
	}
}

extern void
code_bit_replace(int adr,int value,int type)
{
    int sign,bitsz,l,align;
    int bitsize,bitpos;
    int mask = 0;
    int size;
    set_bitsz(type,&bitpos,&bitsize,&sign,&bitsz,&align,&l);
    size = bitsz/8;
// printf("## %d: bitpos=%d bitsize=%d bitsz=%d\n",lineno,bitpos,bitsize,bitsz);
    if (l) {
#if LONGLONG_CODE
	int push=0;
	use_int(adr);
	use_longlong(value);
	/* shift left */
	if (bitpos) 
	    loprtc(LLSHIFT,value,list2(CONST,bitpos));
	if (bitpos+bitsize>=32) {
	    /* make and-mask upper */
	    printf("\tpushl %s\n",register_name(adr,0)); push=1;
	    printf("\t%s %d(%s),%s\n",cload(sign,size),4,register_name(adr,0),
			register_name(adr,0));
	    mask = make_mask(64-bitpos-bitsize,bitpos>=32?63-bitpos:31);
	    make_mask_and_or(mask,regv_h(value),adr);
	    printf("\tmovl 0(%%esp),%s\n",register_name(adr,0));
	    printf("\t%s %s,4(%s)\n",move(0),
		    l_edx(value), register_name(adr,0));
	}
	if (bitpos<32) {
	    if (!push) { printf("\tpushl %s\n",register_name(adr,0)); push=1;}
	    /* make and-mask lower */
	    printf("\t%s %d(%s),%s\n",cload(sign,size),0,register_name(adr,0),
			register_name(adr,0));
	    mask = make_mask(bitpos+bitsize>=32?0:32-bitpos-bitsize,31-bitpos);
	    make_mask_and_or(mask,regv_l(value),adr);
	    printf("\tpopl %s\n",register_name(adr,0)); push=0;
	    printf("\t%s %s,(%s)\n",move(0),
		    l_eax(value), register_name(adr,0));
	}
	if (push) printf("\taddl %%sp,$4\n");
#endif
    } else {
	use_int(adr);
	use_int(value);
	printf("\tpushl %s\n",register_name(adr,0));
	printf("\t%s %d(%s),%s\n",cload(sign,size),0,register_name(adr,0),
		    register_name(adr,0));
	/* shift left */
	if (bitpos) 
	    oprtc(LSHIFT,value,list2(CONST,bitpos));
	/* make and-mask */
	mask = make_mask(32-bitpos-bitsize,31-bitpos);
	make_mask_and_or(mask,value,adr);
	printf("\tpopl %s\n",register_name(adr,0));
        code_assign(adr,size==4?0:size,value);
    }
    if (use) {
	code_bit_field(type,adr,USE_CREG);
    }
}


static void
make_mask_and_or_const(int mask,int reg,int c)
{
    int a;
// printf("## mask 0x%08x ~0x%08x\n",mask,~mask);
    a = ~mask|c;
    if (a!=-1) {
	/* do conjunction  */
	if (reg<MAX_DATA_REG && ((a& ~0xffff)==~0xffff)) {
	    if ((a& ~0xff)==~0xff)
		printf("\tandb $%d,%s\n",a&0xff,register_name(reg,1));
	    else
		printf("\tandw $%d,%s\n",a&0xffff,register_name(reg,2));
	} else
	    printf("\tandl $%d,%s\n",a,register_name(reg,0));
    }
    /* make or-mask  */
    c = mask&c;
    if (c!=0) {
	/* do disjunction  */
	if (reg<MAX_DATA_REG && (!(c& ~0xffff))) {
	    if (!(c& ~0xff))
		printf("\torb $%d,%s\n",c&0xff,register_name(reg,1));
	    else
		printf("\torw $%d,%s\n",c&0xffff,register_name(reg,2));
	} else
	    printf("\torl $%d,%s\n",c,register_name(reg,0));
    }
}

extern void
code_bit_replace_const(int value,int adr,int type)
{
    int sign,bitsz,l,align;
    int bitpos,bitsize,size;
    int mask = 0;
    int c,lvalue;
#if LONGLONG_CODE
    long long lc;
#endif
    set_bitsz(type,&bitpos,&bitsize,&sign,&bitsz,&align,&l);
    size = bitsz/8;
// printf("## %d: bitpos=%d bitsize=%d bitsz=%d\n",lineno,bitpos,bitsize,bitsz);
    use_int(adr);
    if (l) {
#if LONGLONG_CODE
	lvalue = get_register();
	/* shift left */
	lc = lcadr(value);
	lc <<= bitpos;
	
	if (bitpos+bitsize>=32) {
	    printf("\t%s %d(%s),%s\n",cload(sign,size),4,register_name(adr,0),
			register_name(lvalue,0));
	    /* make and-mask upper */
	    mask = make_mask(64-bitpos-bitsize,bitpos>=32?63-bitpos:31);
	    make_mask_and_or_const(mask,lvalue,(int)(lc>>32));
	    printf("\t%s %s,4(%s)\n",move(0),register_name(lvalue,0),
			register_name(adr,0));
	}
	if (bitpos<32) {
	    printf("\t%s %d(%s),%s\n",cload(sign,size),0,register_name(adr,0),
			register_name(lvalue,0));
	    /* make and-mask lower */
	    mask = make_mask(bitpos+bitsize>=32?0:32-bitpos-bitsize,31-bitpos);
	    make_mask_and_or_const(mask,lvalue,(int)(lc));
	    printf("\t%s %s,(%s)\n",move(0),
		    register_name(lvalue,0),register_name(adr,0));
	}
	free_register(lvalue);
#endif
    } else {
	lvalue = get_register();
	printf("\t%s %d(%s),%s\n",cload(sign,size),0,register_name(adr,0),
		    register_name(lvalue,0));
	/* shift left */
	c = cadr(value);
	c <<= bitpos;
	/* make and-mask */
	mask = make_mask(32-bitpos-bitsize,31-bitpos);
	make_mask_and_or_const(mask,lvalue,c);
        code_assign(adr,size==4?0:size,lvalue);
	free_register(lvalue);
    }
    if (use) 
        code_bit_field(type,adr,USE_CREG);
}

#endif

int
not_simple_p(int e3)
{
    switch(e3) {
        case FUNCTION: case CONV: case STASS: case ALLOCA:
        case LDIV: case LUDIV: case LMOD: case LUMOD:
        case LMUL: case LUMUL:
        case LLSHIFT: case LULSHIFT: case LRSHIFT: case LURSHIFT:
        case DDIV: case DADD: case DSUB: case DMUL: case DMINUS:
        case DPOSTINC : case DPREINC : case DASSOP :
        case DOP+LT : case DOP+LE : case DOP+GT : case DOP+GE :
        case DOP+EQ : case DOP+NEQ:
        case RBIT_FIELD: case BASS: case BASSOP: case LCALL:
	case INLINE:
        return 1;
    }
    return 0;
}

/* end */