view mc-code-i64.c @ 796:80a59598df5f

ia64 stdarg
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Tue, 23 Nov 2010 17:45:40 +0900
parents a4fd2ab28e24
children e2f7680a574a
line wrap: on
line source

/* Micro Codu Generation Part for intel386 */

/************************************************************************
** Copyright (C) 2006 Shinji Kono
** 連絡先: 琉球大学情報工学科 河野 真治  
** (E-Mail Address: kono@ie.u-ryukyu.ac.jp)
**
**    このソースのいかなる複写,改変,修正も許諾します。ただし、
**    その際には、誰が貢献したを示すこの部分を残すこと。
**    再配布や雑誌の付録などの問い合わせも必要ありません。
**    営利利用も上記に反しない範囲で許可します。
**    バイナリの配布の際にはversion messageを保存することを条件とします。
**    このプログラムについては特に何の保証もしない、悪しからず。
**
**    Everyone is permitted to do anything on this program 
**    including copying, modifying, improving,
**    as long as you don't try to pretend that you wrote it.
**    i.e., the above copyright notice has to appear in all copies.  
**    Binary distribution requires original version messages.
**    You don't have to ask before copying, redistribution or publishing.
**    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
***********************************************************************/



#include <stdio.h>
#include "mc.h"
#include "mc-parse.h"
#include "mc-codegen.h"
#include "mc-code.h"
#include "mc-inline.h"

#ifdef __APPLE__
#define USE_SSE2
#define USE_PIC
#endif

#if defined(__GNUC__) && __GNUC__ >= 4

#include "mc-include.c"

static
char *init_src0 = "\
typedef struct __builtin_va_list { \\\n\
    long long_last; \\\n\
    long float_first; \\\n\
    long float_last; \\\n\
    long stack_top; \\\n\
    long arg; \\\n\
} __builtin_va_list1, *__builtin_va_list; \\\n\
 \\\n\
#define __builtin_va_start(ap,v) \\\n\
{ \\\n\
    ap = &__my_va_list; \\\n\
    ap->float_first = ap->long_last-8; \\\n\
    long adr = (long)&v; \\\n\
    if (adr >= ap->stack_top) ap->arg = ap->float_first = adr; \\\n\
    if (__builtin_types_compatible_p(typeof(v),double))  \\\n\
	ap->float_first = adr; \\\n\
    ap->arg = adr; \\\n\
} \\\n\
\n\
#define __builtin_va_arg(ap,type) ({ \\\n\
    long arg; \\\n\
    if (__builtin_types_compatible_p(type,double)  \\\n\
            && ap->float_first < ap->stack_top) { \\\n\
	ap->float_first = ap->float_first+8; \\\n\
	if (ap->float_first==ap->float_last) \\\n\
	    ap->float_first = ap->stack_top;\\\n\
	arg = ap->float_first; \\\n\
    } else { \\\n\
	ap->arg = ap->arg+8; \\\n\
        if (ap->arg==ap->long_last) \\\n\
	    ap->arg = ap->stack_top; \\\n\
        arg = ap->arg; \\\n\
    } \\\n\
    *((type *)(arg)); \\\n\
}) \\\n\
\n"

"\
#define __builtin_va_end(v)\n\
#define __inline inline \n\
#define __inline__ inline \n\
#define __DARWIN_1050(x) \n\
#define __AVAILABILITY_INTERNAL1000_DEP1050\n\
#define __OSX_AVAILABLE_BUT_DEPRECATED(a,b,c,d) \n\
#define __DBL_MIN_EXP__ (-1021) \n\
#define __FLT_MIN__ 1.17549435e-38F \n\
#define __DEC64_DEN__ 0.000000000000001E-383DD \n\
#define __CHAR_BIT__ 8 \n\
#define __WCHAR_MAX__ 2147483647 \n\
#define __DBL_DENORM_MIN__ 4.9406564584124654e-324 \n\
#define __FLT_EVAL_METHOD__ 0 \n\
#define __DBL_MIN_10_EXP__ (-307) \n\
#define __FINITE_MATH_ONLY__ 0 \n\
#define __DEC64_MAX_EXP__ 384 \n\
#define __SHRT_MAX__ 32767 \n\
#define __LDBL_MAX__ 1.18973149535723176502e+4932L \n\
#define __APPLE_CC__ 5664 \n\
#define __UINTMAX_TYPE__ long unsigned int \n\
#define __DEC32_EPSILON__ 1E-6DF \n\
#define __SCHAR_MAX__ 127 \n\
#define __USER_LABEL_PREFIX__ _ \n\
#define __STDC_HOSTED__ 1 \n\
#define __DEC64_MIN_EXP__ (-383) \n\
#define __DBL_DIG__ 15 \n\
#define __FLT_EPSILON__ 1.19209290e-7F \n\
#define __LDBL_MIN__ 3.36210314311209350626e-4932L \n\
#define __DEC32_MAX__ 9.999999E96DF \n\
#define __strong  \n\
#define __APPLE__ 1 \n\
#define __DECIMAL_DIG__ 21 \n\
#define __LDBL_HAS_QUIET_NAN__ 1 \n\
#define __DYNAMIC__ 1 \n\
#define __GNUC__ 4 \n\
#define __MMX__ 1 \n\
#define __FLT_HAS_DENORM__ 1 \n\
#define __DBL_MAX__ 1.7976931348623157e+308 \n\
#define __DBL_HAS_INFINITY__ 1 \n\
#define __DEC32_MIN_EXP__ (-95) \n\
#define OBJC_NEW_PROPERTIES 1 \n\
#define __STRICT_ANSI__ 1 \n\
#define __LDBL_HAS_DENORM__ 1 \n\
#define __DEC32_MIN__ 1E-95DF \n\
#define __weak __attribute__((objc_gc(weak))) \n\
#define __DBL_MAX_EXP__ 1024 \n\
#define __DEC128_EPSILON__ 1E-33DL \n\
#define __SSE2_MATH__ 1 \n\
#define __amd64 1 \n\
#define __tune_core2__ 1 \n\
#define __LONG_LONG_MAX__ 9223372036854775807LL \n\
#define __GXX_ABI_VERSION 1002 \n\
#define __FLT_MIN_EXP__ (-125) \n\
#define __x86_64 1 \n\
#define __DBL_MIN__ 2.2250738585072014e-308 \n\
#define __LP64__ 1 \n\
#define __DBL_HAS_QUIET_NAN__ 1 \n\
#define __DEC128_MIN__ 1E-6143DL \n\
#define __REGISTER_PREFIX__  \n\
#define __DBL_HAS_DENORM__ 1 \n\
#define __NO_INLINE__ 1 \n\
#define __DEC_EVAL_METHOD__ 2 \n\
#define __DEC128_MAX__ 9.999999999999999999999999999999999E6144DL \n\
#define __FLT_MANT_DIG__ 24 \n\
#define __VERSION__ \"Micro C\" \n\
#define __DEC64_EPSILON__ 1E-15DD \n\
#define __DEC128_MIN_EXP__ (-6143) \n\
#define __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ 1064 \n\
#define __SIZE_TYPE__ long unsigned int \n\
#define __DEC32_DEN__ 0.000001E-95DF \n\
#define __FLT_RADIX__ 2 \n\
#define __LDBL_EPSILON__ 1.08420217248550443401e-19L \n\
#define __SSE_MATH__ 1 \n\
#define __k8 1 \n\
#define __LDBL_DIG__ 18 \n\
#define __x86_64__ 1 \n\
#define __FLT_HAS_QUIET_NAN__ 1 \n\
#define __FLT_MAX_10_EXP__ 38 \n\
#define __LONG_MAX__ 9223372036854775807L \n\
#define __FLT_HAS_INFINITY__ 1 \n\
#define __DEC64_MAX__ 9.999999999999999E384DD \n\
#define __DEC64_MANT_DIG__ 16 \n\
#define __STDC_VERSION__ 199901L \n\
#define __DEC32_MAX_EXP__ 96 \n\
#define __DEC128_DEN__ 0.000000000000000000000000000000001E-6143DL \n\
#define __LITTLE_ENDIAN__ 1 \n\
#define __LDBL_MANT_DIG__ 64 \n\
#define __CONSTANT_CFSTRINGS__ 1 \n\
#define __DEC32_MANT_DIG__ 7 \n\
#define __k8__ 1 \n\
#define __WCHAR_TYPE__ int \n\
#define __pic__ 2 \n\
#define __FLT_DIG__ 6 \n\
#define __INT_MAX__ 2147483647 \n\
#define __FLT_MAX_EXP__ 128 \n\
#define __DBL_MANT_DIG__ 53 \n\
#define __DEC64_MIN__ 1E-383DD \n\
#define __WINT_TYPE__ int \n\
#define __SSE__ 1 \n\
#define __LDBL_MIN_EXP__ (-16381) \n\
#define __MACH__ 1 \n\
#define __amd64__ 1 \n\
#define __LDBL_MAX_EXP__ 16384 \n\
#define __SSP__ 1 \n\
#define __LDBL_MAX_10_EXP__ 4932 \n\
#define __DBL_EPSILON__ 2.2204460492503131e-16 \n\
#define _LP64 1 \n\
#define __GNUC_PATCHLEVEL__ 1 \n\
#define __LDBL_HAS_INFINITY__ 1 \n\
#define __GNUC_STDC_INLINE__ 1 \n\
#define __INTMAX_MAX__ 9223372036854775807L \n\
#define __FLT_DENORM_MIN__ 1.40129846e-45F \n\
#define __PIC__ 2 \n\
#define __FLT_MAX__ 3.40282347e+38F \n\
#define __SSE2__ 1 \n\
#define __FLT_MIN_10_EXP__ (-37) \n\
#define __INTMAX_TYPE__ long int \n\
#define __DEC128_MAX_EXP__ 6144 \n\
#define __GNUC_MINOR__ 2 \n\
#define __DBL_MAX_10_EXP__ 308 \n\
#define __LDBL_DENORM_MIN__ 3.64519953188247460253e-4951L \n\
#define __STDC__ 1 \n\
#define __PTRDIFF_TYPE__ long int \n\
#define __DEC128_MANT_DIG__ 34 \n\
#define __LDBL_MIN_10_EXP__ (-4931) \n\
#define __SSE3__ 1 \n\
"
#ifdef __APPLE__
"#define __APPLE__ 1\n"
"#define __GNUC__ 4\n"
"#define __BIG_ENDIAN__ 1\n"
#endif
;

/*

  #define size_t int\n\

*/

#else

#include "mc-include.c"

#if defined(__GNUC__) && __GNUC__ >= 3
static
char *init_src0 = "\
#define __builtin_va_start(ap,arg) ap=(((int)(&arg))+sizeof(arg))\n\
#define __builtin_va_arg(ap,type)  (*((type *)ap)++)\n\
#define __builtin_va_end\n\
#define __i386__ 1\n\
#define __LITTLE_ENDIAN__ 1\n\
#define __STDC__ 1\n\
#define __extension__\n\
// #define __restrict\n\
#define __flexarr\n\
#define __const const\n\
#define __THORW\n\
// #define __attribute__(a)\n\
#define __inline__ inline\n\
#define __inline inline\n\
#define __GNUC__ 3\n\
#define __builtin_va_list int\n\
typedef long unsigned int __SIZE_TYPE__ ;\n\
"

#else
static
char *init_src0 = "\
#define va_list int\n\
#define va_start(ap,arg) ap=(((int)(&arg))+sizeof(arg))\n\
#define va_arg(ap,type)  (*((type *)ap)++)\n\
#define va_end\n\
#define __i386__ 1\n\
#define __LITTLE_ENDIAN__ 1\n\
#define __STDC__ 1\n\
#define __extension__\n\
// #define __restrict\n\
#define __flexarr\n\
#define __const const\n\
#define __THORW\n\
// #define __attribute__(a)\n\
#define __inline__ inline\n\
#define __SIZE_TYPE__ long unsigned int\n\
#define __GNUC__ 3\n\
"

#endif


 ;

#endif

int data_alignment = 0;

#define    SIZE_OF_INT  4
#define    SIZE_OF_SHORT  2
#define    SIZE_OF_FLOAT  4
#define    SIZE_OF_DOUBLE  8
#define    SIZE_OF_LONGLONG  8
#define    ENDIAN  0
#define    ENDIAN_L  0
#define    ENDIAN_D  0

int eval_order = REVERSE;

#define TEXT_EMIT_MODE 0
#define DATA_EMIT_MODE 1
#define RODATA_EMIT_MODE 2

#ifdef __APPLE__
#else
#define DOT_SIZE 1
#endif

static int output_mode = TEXT_EMIT_MODE;

static int creg;
static int ireg;

extern int lp64;

int code_lassop_p = 1;

#define MAX_REGISTER 15              /* intel64のレジスタを15まで使う*/
#define REAL_MAX_REGISTER (1+16)     /* intel64のレジスタが16つということ*/
int MAX_REGISTER_VAR=5;    
#define REAL_MAX_DREGISTER (1+16)    /* intel64のxmmレジスタが16つということ*/

int MAX_DREGISTER_VAR=0;             /* 保存される xmm register はない */

#define MIN_TMP_FREG 8
#define MAX_TMP_FREG 15

int MAX_INPUT_REGISTER_VAR = 6;
int MAX_CODE_INPUT_REGISTER_VAR = 6;
int MAX_INPUT_DREGISTER_VAR = 8;
// static int MAX_INPUT_FREGISTER_VAR = 0;
int MAX_CODE_INPUT_DREGISTER_VAR = 8;

static int  reg_sp;   /* REGister Stack-Pointer */
static int reg_stack[MAX_MAX];  /* 実際のレジスタの領域 */
static int stack_depth = 0;

/* floating point registers */

static int  freg_sp;  /* floating point REGister Stack-Pointer */
static int freg_stack[MAX_MAX]; /* 実際のレジスタの領域 */

static int max_reg_var, max_freg_var;

/*
         local2     -8                  
         local1 <-- -6 0 local variable    
                    -12  <- disp_offset          %ebp
                     -8
                     -4
        %ebp = %esp   0
        %eip          4   <- arg_offset
                 arg8 8
                 arg7 16
	   %r9d  arg6
	   %r8d  arg5
	   %ecx  arg4
	   %edx  arg3
	   %esi  arg2
	   %rdi  arg1
            see enter/enter1/leave           see code_enter
 */
static int arg_offset_v;

static int code_disp_label;

static int float_one_lib_used=0;
static int float_one_f_lib_used=0;

static
NMTBL float_one = {"_float_one",0,STATIC,FLOAT,0};
static 
NMTBL float_one_f = {"_float_one_f",0,STATIC,FLOAT,0};

static char *float_one_lib[] = {
  "        .literal8",
"        .align 3",
"__float_one:",
"        .long   0",
"        .long   1072693248",
        0
};

static char *float_one_f_lib[] = {
  "        .literal4",
  "        .align 2",
  "__float_one_f:",
  "        .long   1065353216",
        0
};


/*
    creg   current register
	ireg  current register for integer (int mode)
	lreg  current register for long long (long long mode)

    regs[]        register usage

    freg    current floating point register
	kept in FPU stack (no register)
 */

static int ia32regs[1+REAL_MAX_REGISTER+REAL_MAX_DREGISTER];
static int regs_line[1+REAL_MAX_REGISTER+REAL_MAX_DREGISTER];

static int *regs  = ia32regs;

static int freg;

// register number should start 1
// regs[] value
//    0 for not ready
//   -1 use currrent register 
//    1 used
//    2 (REG_VAR) register variable
//    3 pointer cache (not used in ia32)


#define REG_ESP   0
#define REG_EBP   1
#define REG_EDI   2    // first argument register
#define REG_ESI   3
#define REG_EDX   4
#define REG_ECX   5    // for strange reason (code_assop)
#define REG_R8    6  
#define REG_R9    7    // input register
#define REG_EAX   8    // varargs count of fvar
#define REG_R10   9
#define REG_R11   10
#define REG_EBX   11   // register var
#define REG_R12   12
#define REG_R13   13
#define REG_R14   14
#define REG_R15   15
#define REG_XMM0  16
#define is_int_reg(reg) (1<=reg&&reg<=MAX_REGISTER)
#define is_float_reg(reg) (RET_FREGISTER<=reg&&reg<FREG_OFFSET+REAL_MAX_DREGISTER)

//  return value register
#define FREG_OFFSET   16
#define RET_FREGISTER   16
#define RET_DREGISTER   16
#define RET_LREGISTER   REG_EAX
#define RET_REGISTER    REG_EAX

//  defalut current register
#define CREG_REGISTER   REG_R10
#define FREG_FREGISTER  27

static char *reg_name_l[] = {0,
    0,
    0,
    0,
    "%dl",
    "%cl",
    "%r8b",
    "%r9b",
    "%al",
    "%r10b",
    "%r11b",
    "%bl",
    "%r12b",
    "%r13b",
    "%r14b",
    "%r15b",
    };
static char *reg_name_w[] = {
    "%sp",
    "%bp",
    "%di",
    "%si",
    "%dx",
    "%cx",
    "%r8w",
    "%r9w",
    "%ax",
    "%r10w",
    "%r11w",
    "%bx",
    "%r12w",
    "%r13w",
    "%r14w",
    "%r15w",
    };

static char *reg_name[] = {
    "%esp",
    "%ebp",
    "%edi",
    "%esi",
    "%edx",
    "%ecx",
    "%r8d",
    "%r9d",
    "%eax",
    "%r10d",
    "%r11d",
    "%ebx",
    "%r12d",
    "%r13d",
    "%r14d",
    "%r15d",
    };

#define REG_VAR_BASE    (REG_R15)
#define REG_VAR_MIN     (REG_R15-MAX_REGISTER_VAR)

#define FREG_VAR_BASE    (RET_FREGISTER+15)
#define FREG_VAR_MIN     (RET_FREGISTER+15-MAX_DREGISTER_VAR)

static char *reg_name_q[] = {
    "%rsp",
    "%rbp",
    "%rdi",
    "%rsi",
    "%rdx",
    "%rcx",
    "%r8",
    "%r9",
    "%rax",
    "%r10",
    "%r11",
    "%rbx",
    "%r12",
    "%r13",
    "%r14",
    "%r15",
};

static char *reg_name_d[] = {    
    "%xmm0", // 17
    "%xmm1",
    "%xmm2",
    "%xmm3",
    "%xmm4",
    "%xmm5",
    "%xmm6",
    "%xmm7",
    "%xmm8",
    "%xmm9",
    "%xmm10",
    "%xmm11",
    "%xmm12",
    "%xmm13",
    "%xmm14",
    "%xmm15" };

static void ascii(char *s);

static int use_register(int virt, int real, int move);
static void shift(char *op, int reg,int creg, int sz);
static void ld_indexx(int byte, int n, int xreg,int reg,int sign);
//static void data_mode(char *name);
// static void text_mode(int align);
static int get_data_register(void);

static void local_table(void);
static int push_struct(int e4,int t, int arg) ;
static void code_clear_stack_reg(int reg1);
#if FLOAT_CODE
static char * fload(int d);
static int code_d1(double d);
static int code_d2(double d);

#endif
static void jcond(int l, char cond);
static char * code_cond(int op,int cond);





#define round16(i)   align(i,16)
#define round4(i)    align(i,4)
#define round8(i)    align(i,8)


#define func_disp_offset (16)
// #define code_disp_offset (16)

#define arg_offset  (16)
#define arg_offset1  (0)
#define ARG_LVAR_OFFSET 0x10000000

#define code_disp_offset0 (-func_disp_offset)   
int disp_offset = code_disp_offset0;

/*
     FUNC_LVAR == CODE_LVAR is necessary in jump from function
       func_disp_offset === code_disp_offset0
 */
#define CODE_LVAR(l) ((l)+code_disp_offset0)
#define CODE_CALLER_ARG(l) ((l)+arg_offset1)
#define FUNC_LVAR(l) ((l)-func_disp_offset)
#define CALLER_ARG(l) ((l)+arg_offset1)
#define CALLEE_ARG(l) ((l)+arg_offset)
static int r1_offset_label;
static const char lpfx[] = "_";
static int lvar_offset_label;
static int reg_in_arg;
static int max_func_args,max_func_arg_label;

static void
lvar(int l)
{
    if (is_code(fnptr)) {
        if (l>=ARG_LVAR_OFFSET) {  /* caller's arguments */
            printf("%d(%%rsp)",CODE_CALLER_ARG(l-ARG_LVAR_OFFSET));
        } else
            printf("%d(%%rbp)",CODE_LVAR(l));
    } else if (l<0) {  /* local variable */
        printf("%d(%%rbp)",FUNC_LVAR(l));
    } else if (l>=ARG_LVAR_OFFSET) {  /* caller's arguments */
        printf("%d(%%rsp)",CALLER_ARG(l-ARG_LVAR_OFFSET));
    } else { /* callee's arguments */
        printf("%d-_%d(%%rbp)",CALLEE_ARG(l), lvar_offset_label);
    }
}

/*
    function call stack frame
                     prev esp
                      <-------r1_offset------------------------------>
            <-----                   ebp-->                  <----- esp
 r+  +------------+---+---------------+----------+-------------------+    -
      callee arg   xx   register save   local      caller arg  
                          reg_save      disp       max_func_args*SIZE_OF_INT
        lvar>0                         lvar<0       lvar>0x1000 0000

code segment stack frame

                 * gotoを呼び出した関数のr1 ! r1(goto前のr1)
                                                disp_offset
   #             *                           ebp <---r1_offset---------> esp
r+ +----------+--+----------+----------------+-----------+----------+----+
    cousin arg xx  reg save !callee arg      !code local  caller arg  xx
                   r20-r29     lvar>0         lvar<0      lvar>0x1000 000
                   f20-f31  <-my_func_args--><--disp-----><-max_func_arg->
                              *SIZE_OF_INT                  *SIZE_OF_INT

  %esp should be alignment 16
 
 */

static int
code_offset_set(NMTBL *fnptr)
{
#if 0
    int l;
#endif
    int code_f = is_code(fnptr);
    int lvar_offsetv = round16(-disp);
    int r1_offsetv = round16(lvar_offsetv+max_func_args*SIZE_OF_INT+func_disp_offset);

    if (max_reg_var>=0 && (max_reg_var)%2==0) {
	// pushq makes rsp%8==8, make it round16
	r1_offsetv += 8;  
    }
    if (code_f) {
	data_mode(0);
	code_label(code_disp_label);
	emit_longlong(llist2(LCONST,round16(r1_offsetv)));
        code_disp_label = 0;
    } else {
	//  +8 makes esp alignment 16
	if (r1_offsetv-lvar_offsetv > 65000) error(-1);
	    // too large function arguments?
	// printf(".set %s%d,%d\n",lpfx,r1_offset_label,r1_offsetv);
    }
    if (max_func_arg_label) {
	data_mode(0);
	code_label(max_func_arg_label);
	emit_longlong(llist2(LCONST,round16(max_func_args*SIZE_OF_INT)));
        max_func_arg_label = 0;
    }

#if 0
printf("## reg_save %d\n",reg_save);
printf("## function %s\n",fnptr->nm);
    l = ARG_LVAR_OFFSET;
printf("## offset call0\t%d\n",CALLER_ARG);
    l = ARG_LVAR_OFFSET+max_func_args*SIZE_OF_INT;
printf("## offset calln\t%d %d\n",CALLER_ARG,max_func_args*SIZE_OF_INT);
    l = disp;
printf("## offset lvarn\t%d %d\n",FUNC_LVAR+lvar_offsetv,disp);
    l = 0;
printf("## offset lvar0\t%d\n",FUNC_LVAR+lvar_offsetv);
    l = -reg_save;
printf("## offset regs\t%d\n",FUNC_LVAR+lvar_offsetv);
printf("## offset r1off\t%d\n",r1_offsetv);
    l = 0;
printf("## offset carg0\t%d\n",CALLEE_ARG+r1_offsetv);
    l = my_func_args;
printf("## offset cargn\t%d %d\n",CALLEE_ARG+r1_offsetv,my_func_args);
#endif
    return r1_offsetv;
}



#define use_int(reg)   if (reg==-1) reg=use_int0()

static int
use_int0() { 
    int i = creg;
    if (!i||!ireg||!is_int_reg(i)) {
        if (!ireg) {
	    ireg = get_register();
	}
        i = ireg;
    }
    if (!regs[i]) regs[i]=USING_REG;
    creg = ireg = i;
    return i;
}

#define is_data_reg(reg) (reg_name_l[reg]!=0)
#define is_pointer_reg(reg) (reg> REG_EBP)

static int 
use_register(int reg0, int reg1, int move)
{
    /*
	reg0 becomes reg1, if (move) copy the content.
	if reg1 is used, reg0 contains old value.
     */

    char *move_op;
    code_clear_stack_reg(reg1); 
    move_op = (regs[reg1])?"\txchg %s,%s\n":"\tmovq %s,%s\n";
    if (move && reg0!=reg1) {
	printf(move_op,reg_name_q[reg0],reg_name_q[reg1]);
	if (!regs[reg1]) regs[reg1]=USING_REG;
    } 
    if (reg0!=reg1)
	free_register(reg1);
    return reg0;
}

#define use_data_reg(reg,keep)   \
    if (reg==-1||!is_data_reg(reg)) reg=use_data_reg0(keep,reg)

int 
use_data_reg0(int keep,int reg)
{
    int ptreg =0;
    int i;
    if (is_pointer_reg(creg)) {
	free_register(ptreg=creg); 
	ireg = creg = 0;
    }
    if (is_pointer_reg(ireg)) {
	free_register(ireg); 
	ireg = 0;
    }
    i = reg==USING_REG?creg:reg;
#ifdef __APPLE__
    if (regs[i]==PTRC_REG) clear_ptr_cache_reg(i);
#endif
    if (!i||!ireg||!is_data_reg(i)) {
        if (!ireg) {
	    ireg = get_data_register();
	}
        i = ireg;
    }
    if (!regs[i]) regs[i]=USING_REG;
    creg = ireg = i;
    if (ptreg && keep) {
	printf("\tmovq %s,%s\n",reg_name_q[ptreg],reg_name_q[creg]);
    }
    return i;
}

char *
fregister_name(int reg)  
{
    if (!is_float_reg(reg)) error(-1);
    return reg_name_d[(reg)-FREG_OFFSET];
}

static void
set_freg(int reg,int mode)
{
    if (!is_float_reg(reg)) error(-1);
    if (reg!=creg) {
        if (freg && reg!=freg) {
            free_register(freg);
            if (mode) {
                printf("\tmovapd %s,%s\n",fregister_name(freg),fregister_name(reg));
            }
        }
        // if (creg!=ireg) free_register(creg);
        regs[reg]=USING_REG;
    }
    creg = freg = reg;
}

static void
set_ireg(int reg,int mode)
{
    if (!is_int_reg(reg)) error(-1);
    if (reg!=creg) {
#ifdef __APPLE__
	if (regs[reg]==PTRC_REG)
	    clear_ptr_cache_reg(reg);
#endif
        if (ireg && reg!=ireg ) {
            if (regs[ireg]!=REG_VAR) free_register(ireg);
            if (mode) {
                printf("\tmovq %s,%s\n",reg_name_q[ireg],reg_name_q[reg]);
            }
        }
        if (creg>0 && regs[creg]!=REG_VAR) free_register(creg);
    }
    creg = ireg = reg;
    if (!regs[reg]) regs[reg]=USING_REG;
}

#define is_long_reg(reg)   is_int_reg(reg)
#define use_longlong(reg)   use_int(reg)

static void
set_lreg(int reg,int mode)
{
  set_ireg(reg,mode);
}

#if FLOAT_CODE
#define  use_float(d,reg) if (reg==USE_CREG) reg=d?use_double0():use_float0()
static
int use_float0() { 
  int i = creg;
  if (!is_float_reg(i)) {
    if (!freg) freg = get_dregister(0);
    else if (freg!=i) if (regs[i]!=REG_VAR) free_register(i);
    i = freg;
  }
  if (!regs[i]) regs[i]=USING_REG;
  creg = i;
  return i;
}
static
int use_double0() { 
  int i = creg;
  if (!is_float_reg(i)) {
    if (!freg) freg = get_dregister(1);
    else if (freg!=i) if (regs[i]!=REG_VAR) free_register(i);
    i = freg;
  }
  if (!regs[i]) regs[i]=USING_REG;
  creg = i;
  return i;
}
#endif


extern void
code_init(void)
{

    /* called only once */

    init_src = init_src0;
    size_of_int = SIZE_OF_INT;
    size_of_pointer = SIZE_OF_LONGLONG;
    size_of_short = SIZE_OF_SHORT;
    size_of_float = SIZE_OF_FLOAT;
    size_of_double = SIZE_OF_DOUBLE;
    size_of_longlong = SIZE_OF_LONGLONG;
    endian = ENDIAN;
    lp64 = 1;
    struct_align = size_of_int;



}

extern void
emit_reinit()
{
    /* called for each file */
    output_mode = -1;
#ifdef __APPLE__
    init_ptr_cache();
#endif
}


char *
register_name(int i,int byte)
{
    if (i<=0 || i>=FREG_OFFSET) {
	error(REG_ERR);
	return "%rax";
    }
    if (byte==1 && is_data_reg(i)) {
	return reg_name_l[i];
    } else if (byte==SIZE_OF_SHORT && is_data_reg(i)) {
	return reg_name_w[i];
    } else if (byte==SIZE_OF_INT) {
	return reg_name[i];
    } else {
	return reg_name_q[i]; 
    }
}

void
gexpr_code_init(void){
    // use_register(creg,REG_EAX,0);
    set_ireg(CREG_REGISTER,0);
}

void
code_gexpr(int e){
    if ((is_int_reg(creg))&&regs[creg]==REG_VAR)
	creg = ireg = 0;
}

int 
get_register(void)
{    /* 使われていないレジスタを調べる */
    int i,reg,j;
    for(i=REG_EDI;i<MAX_REGISTER+1;i++) {
	if (i!=REG_EDX && ! regs[i]) {    /* 使われていないなら */
	    // REG_EDX は使わない
	    regs_line[i]=glineno;
	    regs[i]=1;      /* そのレジスタを使うことを宣言し */
	    return i;       /* その場所を表す番号を返す */
	}
    }
#ifdef __APPLE__
    /* PTR_CACHE をつぶす */
    if ((i=last_ptr_cache())) {
        clear_ptr_cache_reg(i);
        regs[i]=USING_REG;      /* そのレジスタを使うことを宣言し */
	regs_line[i]=glineno;
        return i;   /* その場所を表す番号を返す */
    }
#endif
    /* search register stack */
    for(i=0;i<reg_sp;i++) {
        if ((reg=reg_stack[i])>=0) {
            code_assign_lvar(
                (j=new_lvar(SIZE_OF_INT)),reg,0); 
            reg_stack[i]= j-REG_LVAR_OFFSET;
	    regs_line[reg]=glineno;
            return reg;
        }
    }
    error(RGERR);
    return -1;    /* 空いている場所がないなら、それを表す -1 を返す */
}

static int 
get_data_register(void)
{    /* 使われていないレジスタを調べる */
    int i,reg,j;
    for(i=REG_EDI; i<MAX_REGISTER+1;i++) {
	if (is_data_reg(i) && ! regs[i]) {    /* 使われていないなら */
	    regs[i]=1;      /* そのレジスタを使うことを宣言し */
	    regs_line[i]=glineno;
	    return i;       /* その場所を表す番号を返す */
	}
    }
#ifdef __APPLE__
    /* PTR_CACHE をつぶす */
    while ((i=last_ptr_cache())) {
        clear_ptr_cache_reg(i);
	if (is_data_reg(i)) {
	    regs[i]=USING_REG;      /* そのレジスタを使うことを宣言し */
	    regs_line[i]=glineno;
	    return i;   /* その場所を表す番号を返す */
	}
    }
#endif
    /* search register stack */
    for(i=0;i<reg_sp;i++) {
        if (is_data_reg(i) && (reg=reg_stack[i])>=0) {
            code_assign_lvar(
                (j=new_lvar(SIZE_OF_INT)),reg,0); 
            reg_stack[i]= j-REG_LVAR_OFFSET;
	    regs_line[reg]=glineno;
            return reg;
        }
    }
    error(-1);
    return -1;    /* 空いている場所がないなら、それを表す -1 を返す */
}

void 
free_register(int i) {    /* いらなくなったレジスタを開放 */
    regs[i]=0;
    regs_line[i]=0;
}

extern void
use_ptr_cache(int r)
{
#ifdef __APPLE__
    regs[r]=PTRC_REG;
#else
    error(-1);
#endif
}

extern void
code_ptr_cache_def(int r,NMTBL *nptr)
{
#ifdef __APPLE__
    char *rrn = register_name(r,0);
    if (nptr->sc==STATIC && !(is_code(nptr)||is_function(nptr))) {
        printf("\tleaq _%s(%%rip),%s\n",nptr->nm, rrn);
    } else {
        printf("\tmovq _%s@GOTPCREL(%%rip),%s\n",
	    nptr->nm, rrn);
    }
#else
    error(-1);
#endif
}

/*
 */

int 
get_input_register_var(int i,NMTBL *nptr,int is_code)
{
    if (is_code) {
        if(!(i<REG_VAR_BASE-REG_VAR_MIN)) return 0;
        i = REG_VAR_BASE-i;
	return list3n(LREGISTER,i,nptr);
    } else {
	if (i>=MAX_INPUT_REGISTER_VAR) return 0;
	i += REG_EDI;
	regs[i]=INPUT_REG;
	return list3n(LREGISTER,i,nptr);
    }
}

int 
get_input_dregister_var(int i,NMTBL *nptr,int is_code,int d)
{
  if (is_code) {
    // if(!(i<MAX_INPUT_DREGISTER_VAR)) return 0;
    // i = FREG_OFFSET+i;
    return 0;
  } else {
    if(!(i<MAX_INPUT_DREGISTER_VAR)) return 0;
    i = FREG_OFFSET+i;
  }
  return list3n(d?DREGISTER:FREGISTER,i,nptr);
}

int 
get_input_lregister_var(int i,NMTBL *nptr,int is_code)
{
  return get_input_register_var(i,nptr,is_code);
}

#if FLOAT_CODE
int 
get_dregister(int d)
{    /* 使われていないレジスタを調べる */
    int i,reg;
    for(i=MAX_TMP_FREG+FREG_OFFSET;i>MIN_TMP_FREG+FREG_OFFSET;i--) {
        if (regs[i]) continue;    /* 使われている */
        regs[i]=USING_REG;      /* そのレジスタを使うことを宣言し */
        regs_line[i]=glineno;
        return i;   /* その場所を表す番号を返す */
    }
    /* search register stack */
    for(i=0;i<freg_sp;i++) {
        if ((reg=freg_stack[i])>=0) {
            code_dassign_lvar(
                (freg_stack[i]=new_lvar(SIZE_OF_DOUBLE)),reg,1); 
            freg_stack[i]= freg_stack[i]-REG_LVAR_OFFSET;
	    regs_line[reg]=glineno;
            return reg;
        }
    }
#if 0
    // float register と言うのはない
    for(i=0;i<FREG_VAR_BASE-FREG_VAR_MIN;i++) {
        reg =FREG_VAR_BASE-i+FREG_OFFSET;
        if (! regs[reg]) {       /* 使われていないなら */
            regs[reg]=USING_REG; /* そのレジスタを使うことを宣言し */
            if (i>max_freg_var) max_freg_var=i;
            return reg;   /* その場所を表す番号を返す */
        }
    }
#endif
    /* 空いている場所がないなら、エラー (いったい誰が使ってるの?) */
    error(REG_ERR); return freg;
}

int
pop_fregister(void)
{     /* レジスタから値を取り出す */
    return freg_stack[--freg_sp];
}
#endif

int
get_lregister_var(NMTBL *n) {
    return get_register_var(n);
}

#define get_lregister() get_register()

int
register_full(void)
{
    int i;
    for(i=1;i<MAX_REGISTER+1;i++) {
	if (! regs[i]) { 
	    return 0;  
	}
    }
    return 1;    
}

int
free_register_count(int d)
{
    int i,count;
    count = 0;
    for(i=1;i<MAX_REGISTER+1;i++) {
	if (! regs[i]) count++;
    }
    return d?0:count;    
}

void
free_all_register(void)
{
    int i;
    for(i=1;i<REAL_MAX_REGISTER+REAL_MAX_DREGISTER;i++) {
	regs[i]=0;
    }
    creg = ireg = 0;
    return;
}

extern int
code_register_overlap(int s,int t)
{
    if (car(s)==REGISTER||car(s)==LREGISTER) {
	if (car(t)==REGISTER||car(t)==LREGISTER) return cadr(s)==cadr(t);
    }
    return 0;
}

void
register_usage(char *s)
{
    int i;
    printf("## %d: %s:",lineno,s);
    if (creg) printf(" creg=%s ",register_name(creg,0));
    for(i=1;i<MAX_REGISTER+1;i++) {
	printf("%d",regs[i]);
    }
#if 0
    printf(" regs_stack",register_name(creg,0);
    for(i=reg_sp;i>=0;i--) {
	if(reg_stack[i]>=0)
	    printf(" %s",register_name(reg_stack[i],0));
    }
#endif
    if (freg) printf("freg=%s ", fregister_name(freg));
    for(i=RET_FREGISTER;i<REAL_MAX_DREGISTER+REAL_MAX_REGISTER;i++) {
	printf("%d",regs[i]);
    }
    printf("\n");
}

/*
    store input argument into stack
    we need this always because of one path compiler
 */
static void
code_save_input_registers(int dots, int arg_size)
{
    int args;
    NMTBL *n;
    int reg;
    int tag;
    int t;
    int offset = 0;
    int reg_var = 0;
    int freg_var = 0;

    disp -= arg_size;
    offset = disp;
    reg_in_arg = arg_size;
    printf("\t.set %s%d,%d\n",lpfx,lvar_offset_label,reg_in_arg);

    for(args = fnptr->dsp;args;args = cadr(args)) {
        n = ncadddr(args);
        tag = n->sc;
        reg = n->dsp;
        if (!n||n==&null_nptr) error(REG_ERR);
        if (tag==REGISTER) {
            n->dsp = offset;
            offset+=SIZE_OF_LONGLONG;
            t = INT;
            reg_var++;
        } else if (tag==FREGISTER) {
            n->dsp = offset;
            t = n->ty;
	    offset+=SIZE_OF_DOUBLE; 
            freg_var++;
        } else if (tag==DREGISTER) {
            n->dsp = offset;
            t = n->ty;
            offset+=SIZE_OF_DOUBLE; 
            freg_var++;
        } else if (tag==LREGISTER) {
            n->dsp = offset;
            t = n->ty;
            offset+=SIZE_OF_LONGLONG; 
            reg_var++;
        } else {
            // offset += SIZE_OF_LONGLONG; // size(n->ty);
            continue;
        }
        n->sc  = LVAR;
        g_expr_u(assign_expr0(list3n(LVAR,n->dsp,0),list3n(tag,reg,n),t,t));
        if (tag==REGISTER||tag==DREGISTER||tag==FREGISTER||tag==LREGISTER) {
            free_register(reg);
        }
    }
    if (dots) {
        while ((reg = get_input_register_var(reg_var,0,0))) {
            g_expr_u(assign_expr0(
                list3n(LVAR,offset,0),reg,LONGLONG,LONGLONG));
            offset+=SIZE_OF_LONGLONG;
            reg_var++;
        }
	//  Intel64 keeps number of double value in %al
        while ((reg = get_input_dregister_var(freg_var,0,0,0))) {
            g_expr_u(assign_expr0(
                list3n(LVAR,offset,0),reg,DOUBLE,DOUBLE));
            offset+=SIZE_OF_DOUBLE;
            freg_var++;
        }
    }
    // my_func_args = offset;
}

static void
def_va_list_truct(int long_last, int float_last)
{
    int stype = type;
    int smode = mode;
    int sstmode = stmode;
    NMTBL *n;

    if (!parse_mode || inmode) {
	NMTBL *nptr =name_space_search(get_name("__builtin_va_list1",0,NONDEF),0);
	if (!nptr) error(-1);
	type = nptr->ty;
	typedefed=glist3n(TYPEDEF,typedefed,nptr);
	mode = LDECL;
	stmode = 0;
	n = def(lsearch("__my_va_list",0),0);
    }

        // ssetup va_list structure
        // __my_va_list is defined as local in code_arg_register
/*
__builtin_va_list1 __my_va_list;
    long long_last; \n\
    long float_first; \n\
    long float_last; \n\
    long stack_top; \n\
 */
    if (parse_mode && inmode) {
	parse = list5n(ST_DECL,parse,list3(mode,0,0),0,n);
        // long_last
	parse = list3(ST_COMP,parse,assign_expr0( list3n(IVAR,n->dsp,n),list2(ADDRESS,list3n(LVAR,long_last,0)),LONGLONG,LONGLONG));
        // float_last
	parse = list3(ST_COMP,parse,assign_expr0( list3(LADD,list3n(IVAR,n->dsp,n),llist2(LCONST,16)),list2(ADDRESS,list3n(LVAR,float_last,0)),LONGLONG,LONGLONG));
        // stack_top
	parse = list3(ST_COMP,parse,assign_expr0( list3(LADD,list3n(IVAR,n->dsp,n),llist2(LCONST,24)),list3(LADD,list2(LREGISTER,REG_EBP),llist2(LCONST,16)),LONGLONG,LONGLONG));
    } else if (!parse_mode && !inmode) {
        // long_last
	g_expr_u(assign_expr0( list3n(LVAR,n->dsp,n),list2(ADDRESS,list3n(LVAR,long_last,0)),LONGLONG,LONGLONG));
        // float_last
	g_expr_u(assign_expr0( list3n(LVAR,n->dsp+16,n),list2(ADDRESS,list3n(LVAR,float_last,0)),LONGLONG,LONGLONG));
        // stack_top
	g_expr_u(assign_expr0( list3n(LVAR,n->dsp+24,n),list3(LADD,list2(LREGISTER,REG_EBP),llist2(LCONST,16)),LONGLONG,LONGLONG));
    }
    mode = smode;
    type = stype;
    stmode = sstmode;
}

void
code_arg_register(NMTBL *fnptr, int in)
{
    int args = fnptr->dsp;
    NMTBL *n;
    int reg_var = 0;
    int freg_var = 0;
    int type;
    int reg;
    int is_code0 = is_code(fnptr);
    int dots;
    arg_offset_v = 0;
    function_type(fnptr->ty,&dots);


    while (args) {
        // we should use increment_arg
        /* process in reverse order */
        n = ncadddr(args);
        type = n->ty;
        if (type==LONGLONG||type==ULONGLONG) { // scalar includes LONGLONG, check first
            if ((reg = get_input_lregister_var(reg_var,n,is_code0))) {
		if (!in) {
		    n->sc = LREGISTER;
		    n->dsp = cadr(reg);
		    regs[n->dsp]= INPUT_REG;
		}
		reg_var+=1;
		arg_offset_v += SIZE_OF_LONGLONG; // (caddr(args)=size(type));
            }
        } else if (scalar(type)) { 
            if ((reg = get_input_register_var(reg_var,n,is_code0))) {
		if (!in) {
		    n->sc = (car(type)==POINTER && lp64) ?LREGISTER:REGISTER;
		    n->dsp = cadr(reg);
		    regs[n->dsp]= INPUT_REG;
		}
                reg_var++;
                arg_offset_v += SIZE_OF_LONGLONG; // (caddr(args)=size(type));
            }
        } else if (type==FLOAT) {
            if ((reg = get_input_dregister_var(freg_var,n,is_code0,0))) {
		if (!in) {
		    n->sc = FREGISTER;
		    n->dsp = cadr(reg);
		    regs[n->dsp]= INPUT_REG;
		}
                freg_var++;
                arg_offset_v += SIZE_OF_LONGLONG; // (caddr(args)=size(type));
            }
        } else if (type==DOUBLE) {
            if ((reg = get_input_dregister_var(freg_var,n,is_code0,1))) {
		if (!in) {
		    n->sc = DREGISTER;
		    n->dsp = cadr(reg);
		    regs[n->dsp]= INPUT_REG;
		}
                freg_var++;
                arg_offset_v += SIZE_OF_LONGLONG; // (caddr(args)=size(type));
            }
        }
        args = cadr(args);
    }
    if (is_function(fnptr)) {
        if (dots) {
		//  %al の値によって float を適切にloadする必要がある  sigh...
                arg_offset_v =
                    MAX_INPUT_REGISTER_VAR*SIZE_OF_LONGLONG +
                    MAX_INPUT_DREGISTER_VAR*SIZE_OF_DOUBLE;

		int long_last = -arg_offset_v + (MAX_INPUT_REGISTER_VAR)*SIZE_OF_LONGLONG;
		int float_last = long_last + (MAX_INPUT_DREGISTER_VAR)*SIZE_OF_DOUBLE;

		def_va_list_truct(long_last,float_last);
#ifndef __APPLE__
		if (!in) 
		printf(".set %s%d, %d\n",lpfx, arg_offset_label,
		    arg_offset_v+ arg_offset);
#endif
	}
	if (!in) code_save_input_registers(dots, arg_offset_v);
    }
}

void 
gexpr_init(void)
{
    text_mode(0);
    if (reg_sp>0) error(-1);
    if (freg_sp>0) error(-1);
    reg_sp = 0;
    freg_sp = 0;
    stack_depth = 0;
    gexpr_code_init();
    regs[creg]=1;
    register_usage("gexpr_init");
}


void 
emit_init(void)
{
    free_all_register();
    reg_sp = 0;
    freg_sp = 0;
    max_reg_var=-1; max_freg_var=-1;
}

int 
pop_register(void)
{     /* レジスタから値を取り出す */
    return reg_stack[--reg_sp];
}

void
emit_pop_free(int xreg)
{
    if (xreg>=0 && xreg!=creg && regs[xreg]!=REG_VAR) {
	free_register(xreg);
    }
}


int
get_register_var(NMTBL *nptr)
{
    int i;
    for(i=0;i<REG_VAR_BASE-REG_VAR_MIN;i++) {
	int reg = REG_VAR_BASE-i;
        if (! regs[reg]) {       /* 使われていないなら */
            /* そのレジスタを使うことを宣言し */
            regs[reg]=REG_VAR; 
            regs_line[reg]=glineno; 
            if (i>max_reg_var) max_reg_var=i;
            /* その場所を表す番号を返す */
            return list3n(REGISTER,reg,nptr); 
        }
    }
    return list3n(LVAR,new_lvar(SIZE_OF_INT),0);
}

int
get_dregister_var(NMTBL *nptr,int d)
{
    int i;
    for(i=0;i<FREG_VAR_BASE-FREG_VAR_MIN;i++) {
        int reg = FREG_VAR_BASE-i;
        if (! regs[reg]) {       /* 使われていないなら */
            regs[reg]=REG_VAR; /*そのレジスタを使うことを宣言し*/
            regs_line[reg]=glineno; 
            if (i>max_freg_var) max_freg_var=i;
            /* その場所を表す番号を返す */
            return list3n(DREGISTER, reg,nptr); 
        }
    }
    return list3n(LVAR,new_lvar(SIZE_OF_DOUBLE),0);
}


int 
emit_push()
{
    int new_reg,old;
    new_reg = get_register();       /* 絶対に取れる */
    // who free new_reg?
    if (new_reg==creg) error(-1);
    old = creg;
    reg_stack[reg_sp++] = creg;     /* push するかわりにレジスタを使う */
    ireg = creg = new_reg;
    if (!regs[creg]) regs[creg]=USING_REG;
    return old;
}

int
emit_pop(int type)
{
    int xreg,reg;
    xreg=pop_register();
    if (xreg<= -REG_LVAR_OFFSET) {
        reg = get_register();
        code_lrlvar(REG_LVAR_OFFSET+xreg,reg);
        free_lvar(REG_LVAR_OFFSET+xreg);
        xreg = reg;
    }
    return xreg;
}

void 
code_label(int labelno)
{
#ifdef __APPLE__
    clear_ptr_cache();
#endif
    printf("_%d:\n",labelno);
}

void
code_gvar(int e1,int creg) {
    use_int(creg);
#ifdef __APPLE__
    NMTBL *nptr = ncaddr(e1);
    if (nptr->sc==STATIC && !(is_code(nptr)||is_function(nptr))) {
	printf("\tleaq _%s+%d(%%rip),%s\n", nptr->nm,cadr(e1),register_name(creg,0));
	return;
    }
    int r = get_ptr_cache(nptr);
    if (cadr(e1)) {
	printf("\tleaq %d(%s),%s\n", cadr(e1),register_name(r,0),
		register_name(creg,0));
    } else {
	printf("\tmovq %s,%s\n", register_name(r,0), register_name(creg,0));
    }
#else
    if (cadr(e1)) {
	printf("\tmovq $%s+%d,%s\n",nptr->nm,cadr(e1),
		register_name(creg,0));
    } else {
	printf("\tmovq $%s,%s\n",nptr->nm,register_name(creg,0));
    }
#endif

}

void
code_rgvar(int e1,int creg) {
    use_int(creg);
    if (car(e1)==URGVAR) {
        code_crgvar(e1,creg,0,SIZE_OF_INT);
        return;
    }
#ifdef __APPLE__
    NMTBL *nptr = ncaddr(e1);
    if (nptr->sc==STATIC && !(is_code(nptr)||is_function(nptr))) {
	printf("\tmovl _%s+%d(%%rip),%s\n", nptr->nm,cadr(e1),register_name(creg,SIZE_OF_INT));
	return;
    }
    int r = get_ptr_cache(ncaddr(e1));
    if (cadr(e1)) {
        printf("\tmovl %d(%s),%s\n", cadr(e1),register_name(r,0),
                register_name(creg,SIZE_OF_INT));
    } else {
        printf("\tmovl (%s),%s\n", register_name(r,0), register_name(creg,SIZE_OF_INT));
    }
#else
    if (cadr(e1)) {
	printf("\tmovl %s+%d,%s\n",nptr->nm,cadr(e1),
		register_name(creg,SIZE_OF_INT));
    } else
	printf("\tmovl %s,%s\n",nptr->nm,register_name(creg,SIZE_OF_INT));
#endif

}

static char *
cload(int sign,int sz) {
    return sz==1?(sign?"movsbq":"movzbq"):
	    sz==SIZE_OF_SHORT?(sign?"movswq":"movzwq"):
	    sz==SIZE_OF_INT?(sign?"movslq":"movl"):"movq";
}

static int
regu(int sign,int sz,int u) {
    return sz==SIZE_OF_INT?(sign?u:SIZE_OF_INT):u;
}

void
code_crgvar(int e1,int creg,int sign,int sz){
    use_int(creg);
#ifdef __APPLE__
    NMTBL *nptr = ncaddr(e1);
    if (nptr->sc==STATIC && !(is_code(nptr)||is_function(nptr))) {
	printf("\t%s _%s+%d(%%rip),%s\n", cload(sign,sz),nptr->nm,cadr(e1),register_name(creg,regu(sign,sz,0)));
	return;
    }
    int r = get_ptr_cache(ncaddr(e1));
    if (cadr(e1)) {
        printf("\t%s %d(%s),%s\n", cload(sign,sz),cadr(e1),register_name(r,0),
                register_name(creg,regu(sign,sz,0)));
    } else {
        printf("\t%s (%s),%s\n", cload(sign,sz),
		register_name(r,0), register_name(creg,regu(sign,sz,0)));
    }
#else
    if (cadr(e1)) {
	printf("\t%s %s+%d,%s\n",cload(sign,sz),
		nptr->nm,cadr(e1),register_name(creg,regu(sign,sz,0)));
    } else
	printf("\t%s %s,%s\n",cload(sign,sz),
		nptr->nm,register_name(creg,regu(sign,sz,0)));
#endif

}


void
code_lvar(int e2,int creg) {
    use_int(creg);
    printf("\tleaq "); lvar(e2);
    printf(",%s\n",register_name(creg,0));
}


void
code_register(int e2,int creg) {
    use_int(creg);
    if (e2!=creg)
    printf("\tmovq %s,%s\n",register_name(e2,0),register_name(creg,0));
}


void
code_rlvar(int e2,int reg) {
    use_int(reg);
    if (car(e2)==URLVAR) {
        code_crlvar(e2,creg,0,SIZE_OF_INT);
        return;
    }
    printf("\tmovl "); lvar(e2);
    printf(",%s\n",register_name(reg,SIZE_OF_INT));
}

extern void
code_i2c(int reg)
{
    use_data_reg(reg,1);
    printf("\t%s %s,%s\n",cload(1,1),
	register_name(reg,1),register_name(reg,0));
}

extern void
code_i2s(int reg)
{
    use_data_reg(reg,1);
    printf("\t%s %s,%s\n",cload(1,SIZE_OF_SHORT),
	register_name(reg,2),register_name(reg,0));
}

extern void
code_u2uc(int reg)
{   
    use_data_reg(reg,1);
    printf("\t%s %s,%s\n",cload(0,1),
	register_name(reg,1),register_name(reg,0));
}

extern void
code_u2us(int reg)
{   
    use_data_reg(reg,1);
    printf("\t%s %s,%s\n",cload(0,SIZE_OF_SHORT),
	register_name(reg,2),register_name(reg,0));
}

void
code_crlvar(int e2,int reg,int sign,int sz) {
    use_int(reg);
    printf("\t%s ",cload(sign,sz)); lvar(e2);
    printf(",%s\n",register_name(reg,regu(sign,sz,0)));
}


void
code_fname(NMTBL *n,int creg) {
    use_int(creg);
#ifdef __APPLE__
    if (n->sc==STATIC) {
	printf("\tleaq _%s(%%rip),%s\n", n->nm, 
	    register_name(creg,0));
	return;
    }
    int r = get_ptr_cache(n);
    printf("\tmovq %s,%s\n", register_name(r,0), register_name(creg,0));
#else
    printf("\tmovq $%s,%s\n",n->nm,register_name(creg,0));
#endif
}

void
code_label_value(int label,int reg) {
    use_int(reg);
#ifdef __APPLE__
    printf("\tleaq _%d(%%rip),%s\n",
	label,register_name(reg,0));
#else
    printf("\tleaq _%d,%s\n",label,register_name(reg,0));
#endif
}

void
code_const(int e2,int creg) {
    use_int(creg);
    printf("\tmovl $%d,%s\n",e2,register_name(creg,SIZE_OF_INT));
}

void
code_neg(int creg) {
    use_int(creg);
    printf("\tnegl %s\n", register_name(creg,SIZE_OF_INT));
}


void
code_not(int creg) {
    use_int(creg);
    printf("\tnotl %s\n", register_name(creg,SIZE_OF_INT));
}


void
code_lnot(int creg) {
    char *xrn;

    use_data_reg(creg,1);
    xrn = register_name(creg,1);
    printf("\tcmpl $0,%s\n", register_name(creg,SIZE_OF_INT));
    printf("\tsete %s\n", xrn);
    printf("\tmovzbl %s,%s\n", xrn,register_name(creg,SIZE_OF_INT));
}

void
code_preinc(int e1,int e2,int dir,int sign,int sz,int reg) {
    char *xrn;
    if (car(e2)==REGISTER) {
	use_int(reg);
	printf("\taddl $%d,%s\n",dir,register_name(cadr(e2),SIZE_OF_INT));
	if (use)
	    printf("\tmovl %s,%s\n",register_name(cadr(e2),SIZE_OF_INT),register_name(reg,SIZE_OF_INT));
	return;
    } 
    g_expr(e2);
    xrn = register_name(creg,0);
    use_int(reg);
    printf("\t%s $%d,(%s)\n",(sz==1)?"addb":(sz==SIZE_OF_SHORT)?"addw":"addl",dir,xrn);
    if (use)
	printf("\t%s (%s),%s\n",cload(sign,sz),xrn,register_name(reg,regu(sign,sz,0)));
}


void
code_postinc(int e1,int e2,int dir,int sign,int sz,int reg) {
    char *xrn;
    if (car(e2)==REGISTER) {
	use_int(reg);
	if (use)
	    printf("\tmovl %s,%s\n",register_name(cadr(e2),SIZE_OF_INT),register_name(reg,SIZE_OF_INT));
	printf("\taddl $%d,%s\n",dir,register_name(cadr(e2),SIZE_OF_INT));

	return;
    } 
    g_expr(e2);
    emit_push();  
    xrn = register_name((e2=emit_pop(0)),0);
    use_int(reg);
    if (use)
	printf("\t%s (%s),%s\n",cload(sign,sz),xrn,register_name(reg,regu(sign,sz,0)));
    printf("\t%s $%d,(%s)\n",(sz==1)?"addb":(sz==SIZE_OF_SHORT)?"addw":"addl",dir,xrn);
    emit_pop_free(e2);
}



void
code_return(int creg) {
    use_int(creg);
#ifdef __APPLE__
    printf("\tleaq _%d(%%rip),%s\n",retcont,register_name(creg,0));
#else
    printf("\tleaq _%d,%s\n",retcont,register_name(creg,0));
#endif
}


void
code_environment(int creg) {
    use_int(creg);
    printf("\tmovq %%rbp,%s\n",register_name(creg,0));
}

static int rexpr_bool(int e1,int reg);
#if FLOAT_CODE
static int drexpr_bool(int e1,int reg);
#endif

void
code_bool(int e1,int reg) {
    char *xrn;
    int e2,e3;
    if (rexpr_bool(e1,reg)) return;
#if FLOAT_CODE
    if (drexpr_bool(e1,reg)) return;
#endif
    char *s;
    if ((s=code_cond(OP(car(e1)),1))) {
	g_expr(list3(LCMP,cadr(e1),caddr(e1)));
	if (!use) return;
	use_data_reg(reg,1);
	printf("\tset%s\t%s\n",s,register_name(reg,1));
	printf("\tmovzbq %s,%s\n",register_name(reg,1),register_name(reg,0));
	return;
    }

    b_expr(e1,1,e2=fwdlabel(),1);  /* including > < ... */
    if (use) {
	use_int(reg);
	xrn = register_name(reg,0);
	printf("\txorq %s,%s\n",xrn,xrn);
	jmp(e3=fwdlabel());
	fwddef(e2);
	printf("\tmovq $1,%s\n",xrn);
	fwddef(e3);
    } else {
	fwddef(e2);
    }
}

static char *
code_gt(int cond) {
    return (cond?"g":"le");
}

static char *
code_ugt(int cond) {
    return (cond?"a":"be");
}

static char *
code_ge(int cond) {
    return (cond?"ge":"l");
}

static char *
code_uge(int cond) {
    return (cond?"ae":"b");
}

static char *
code_eq(int cond) {
    return (cond?"e":"ne");
}

void
code_cmp_crgvar(int e1,int reg,int sz,int label,int cond) {
    use_int(reg);
#ifdef __APPLE__
    int r = get_ptr_cache(ncaddr(e1));
    if (cadr(e1)) {
	if (sz==1)
	    printf("\tcmpb $0,%d(%s)\n",cadr(e1),register_name(r,0));
	else if (sz==SIZE_OF_SHORT)
	    printf("\tcmpw $0,%d(%s)\n",cadr(e1),register_name(r,0));
    } else {
	if (sz==1)
	    printf("\tcmpb $0,(%s)\n",register_name(r,0));
	else if (sz==SIZE_OF_SHORT)
	    printf("\tcmpw $0,(%s)\n",register_name(r,0));
    }
#else
    if (cadr(e1)) {
	if (sz==1)
	    printf("\tcmpb $0,%s+%d\n",(ncaddr(e1))->nm,cadr(e1));
	else if (sz==SIZE_OF_SHORT)
	    printf("\tcmpw $0,%s+%d\n",(ncaddr(e1))->nm,cadr(e1));
    } else {
	if (sz==1)
	    printf("\tcmpb $0,%s\n",(ncaddr(e1))->nm);
	else if (sz==SIZE_OF_SHORT)
	    printf("\tcmpw $0,%s\n",(ncaddr(e1))->nm);
    }
#endif
    jcond(label,cond);
}


void
code_cmp_crlvar(int e1,int reg,int sz,int label,int cond) {
    use_int(reg);
    if (sz==1) {
	printf("\tcmpb $0,"); lvar(e1); printf("\n");
    } else if (sz==SIZE_OF_SHORT) {
	printf("\tcmpw $0,"); lvar(e1); printf("\n"); 
    }
    jcond(label,cond);
}


void
code_cmp_rgvar(int e1,int reg,int label,int cond) {
    use_int(reg);
#ifdef __APPLE__
    int r = get_ptr_cache(ncaddr(e1));
    if (cadr(e1))
	printf("\tcmpl $0,%d(%s)\n",cadr(e1),register_name(r,0));
    else
	printf("\tcmpl $0,(%s)\n",register_name(r,0));
#else
    if (cadr(e1))
	printf("\tcmpl $0,%s+%d\n",(ncaddr(e1))->nm,cadr(e1));
    else
	printf("\tcmpl $0,%s\n",(ncaddr(e1))->nm);
#endif
    jcond(label,cond);
}


void
code_cmp_rlvar(int e1,int reg,int label,int cond) {
    use_int(reg);
    printf("\tcmpl $0,"); lvar(e1); printf("\n");
    jcond(label,cond);
}


void
code_cmp_register(int e2,int label,int cond) {
    use_int(e2);
    printf("\tcmpl $0,%s\n",register_name(e2,SIZE_OF_INT));
    jcond(label,cond);
}


void
code_string(int e1,int creg)
{
    char *s;
    int lb;
    NMTBL *n = ncaddr(e1);
    if ((lb=attr_value(n,LABEL))) {
        // already defined
        return code_label_value(lb,creg) ;
    }

    use_int(creg);
    s=n->nm;
    lb = emit_string_label();
    ascii(s);
    if (output_mode==TEXT_EMIT_MODE) {
	printf(".text\n");
    } else {
	text_mode(0);
    }
#ifdef __APPLE__
    printf("\tleaq _%d(%%rip),%s\n",lb,
	    register_name(creg,0));
#else
    printf("\tleaq _%d,%s\n",lb,register_name(creg,0));
#endif
    set_attr(n,LABEL,lb);
}

void
emit_strings(NMTBL *n)
{
    int l = emit_string_label();
    int i;
    for(i = n->dsp; i; i = cadr(i)) {
        ascii(scaddr(i));
    }
    emit_label(l);
}

void
code_strings(int e2,int reg)
{
    int l = emit_string_label();
    int i;
    for(i = e2; i; i = cadr(i)) {
        ascii(scaddr(i));
    }
    if (output_mode==TEXT_EMIT_MODE) {
	printf(".text\n");
    } else {
	text_mode(0);
    }
    code_label_value(l,reg);
}

#define MAX_COPY_LEN 64

/*  ARG_ORDER==1 case do not allow library call in emit_copy
*/

void 
emit_copy(int from,int  to,int length,int offset,int value,int det)
{
    int dreg;
    char *drn,*frn;
    char *trn;
    use_int(from);
    use_int(to);
    frn = register_name(from,0);
    trn = register_name(to,0);

    /* length <0 means upward direction copy */
    switch (length) {
    case 0:     break;
    case 1: case -1:
	drn = register_name(dreg = get_data_register(),1);
        printf("\tmovb %d(%s),%s\n",offset,frn,drn);
        printf("\tmovb %s,%d(%s)\n",drn,offset,trn);
	free_register(dreg);
        break;
    case 2: case -2:
	drn = register_name(dreg = get_data_register(),2);
        printf("\tmovw %d(%s),%s\n",offset,frn,drn);
        printf("\tmovw %s,%d(%s)\n",drn,offset,trn);
	free_register(dreg);
        break;
    case 4: case -4:
	drn = register_name(dreg = get_register(),SIZE_OF_INT);
        printf("\tmovl %d(%s),%s\n",offset,frn,drn);
        printf("\tmovl %s,%d(%s)\n",drn,offset,trn);
	free_register(dreg);
        break;
    case 8: case -8:
	drn = register_name(dreg = get_register(),0);
        printf("\tmovq %d(%s),%s\n",offset,frn,drn);
        printf("\tmovq %s,%d(%s)\n",drn,offset,trn);
	free_register(dreg);
        break;
    default:
        if (length <0) {
            if (length > -MAX_COPY_LEN) {
                for(;length<=-8;length+=8,offset-=8)
                    emit_copy(from,to,-8,offset-8,0,det);
                for(;length<=-4;length+=4,offset-=4)
                    emit_copy(from,to,-4,offset-4,0,det);
                for(;length<=-2;length+=2,offset-=2)
                    emit_copy(from,to,-2,offset-2,0,det);
                if(length<0)
                    emit_copy(from,to,length,offset-1,0,det);
                break;
            }
        } else if (length <=MAX_COPY_LEN) {
            for(;length>=8;length-=8,offset+=8)
                emit_copy(from,to,8,offset,0,det);
            for(;length>=4;length-=4,offset+=4)
                emit_copy(from,to,4,offset,0,det);
            for(;length>=2;length-=2,offset+=2)
                emit_copy(from,to,2,offset,0,det);
            if(length>0)
                emit_copy(from,to,length,offset,0,det);
            break;
        }
	// should be parallel_rassign....
	// clear_ptr_cache();
	// code_save_stacks();

	printf("\tpushq %%rsi\n");
	printf("\tpushq %%rdi\n");
	printf("\tpushq %%rcx\n");
	printf("\tpushq %s\n",register_name(from,0));
	printf("\tpushq %s\n",register_name(to,0));
	printf("\tpopq %%rdi\n");
	printf("\tpopq %%rsi\n");
	if (length<0) {
	    printf("\tmovq $%d,%%rcx\n",-length/4);
	    printf("\taddq $%d,%%rsi\n",-length-4);
	    printf("\taddq $%d,%%rdi\n",-length-4
		+(to==REG_ESP?8*8:0)
		);
	    printf("\tstd\n\trep\n\tmovsl\n");
	    printf("\tpopq %%rcx\n");
	    printf("\tpopq %%rdi\n");
	    printf("\tpopq %%rsi\n");
	    if(length%4) {
		offset = offset+length/SIZE_OF_INT;
		length=length%4;
		emit_copy(from,to,length,offset,0,det);
	    }
	} else {
	    printf("\tmovq $%d,%%rcx\n",length/4);
	    if (to==REG_ESP)
		printf("\taddl $%d,%%rdi\n",8*4);
	    printf("\tcld\n\trep\n\tmovsl\n");
	    printf("\tpopq %%rcx\n");
	    printf("\tpopq %%rdi\n");
	    printf("\tpopq %%rsi\n");
	    if(length%4) {
		offset = offset+length/SIZE_OF_INT;
		length=length%4;
		emit_copy(from,to,length,offset,0,det);
	    }
	}
    }
    if (value) {
    /* creg must point top of the destination data */
    /* this code is necessary for the value of assignment or function call */
    /* otherwise we don't need this */
	if(creg!=to) {
	    if (regs[creg]!=REG_VAR) free_register(creg); creg=to;
	}
    }
}

static int
push_struct(int e4,int t, int arg) 
{
    int length,dreg;
    g_expr(e4);
    length=size(t); 
    if(length%SIZE_OF_INT) {
	length += SIZE_OF_INT - (length%SIZE_OF_INT);
    }
    emit_push();
    code_lvar(cadr(arg),USE_CREG);
    dreg = emit_pop(0);

    // copy dreg to creg with length
    // try small amount copy
    /* downward direction copy */
    emit_copy(dreg,creg,length,0,0,1);
    emit_pop_free(dreg);
    /* we have value in creg, it may be changed */
    return length/SIZE_OF_INT;
}

static int
simple_arg(int e3)
{
    return !contains_p(e3,not_simple_p);
}

#define caller_arg_offset_v(arg) (ARG_LVAR_OFFSET+(arg)*SIZE_OF_INT)

/*
     use input register as current register
 */

static void
use_input_reg(int reg,int mode)
{
    if (is_int_reg(reg)) {
        if (ireg&&reg == ireg) {
            if (creg==ireg) creg = 0;
            ireg = 0;
        }
    } else if (is_float_reg(reg)) {
        if (freg&&reg == freg) {
            if (creg==freg) creg = ireg;
            freg = 0;
        }
    }
    if (mode) {
	clear_ptr_cache_reg(reg);
        regs[reg]=USING_REG;
    }
}


/*
    Eary implementation uses pushl arg for function call. gcc
    use the same arguement evaluation order. Of course, the
    order is unspecified in C language, but it is better to
    use same argument evaluation order. Especially for test
    program.
 */
#define ARG_ORDER 1
#if (ARG_ORDER==1)

static int delayed_arg;

#endif

static int
compute_complex_arg(int e3,int reg_arg_list,int arg) {
    int t=caddr(e3);
    int e4 = car(e3);
    reg_arg_list = list2(arg,reg_arg_list);
#if ARG_ORDER==1
    delayed_arg = list2(assign_expr0(arg,e4,t,t),delayed_arg);
#else
    g_expr_u(assign_expr0(arg,e4,t,t));
#endif



    car(e3) = arg;
    return reg_arg_list;
}

static void
increment_function_arg(int e3,int *pnargs,int *preg_arg,int *pfreg_arg) {
    int nargs=0,reg_arg=0,freg_arg=0;
    int t=type_value(caddr(e3));
    if (t>=0&&(car(t)==BIT_FIELD)) {
        t = type_value(cadr(t));
    }
    if(scalar(t)) {
        nargs ++ ; 
        nargs ++ ; reg_arg++;
    } else if (t==LONGLONG||t==ULONGLONG) {
        nargs ++ ; reg_arg++;
        nargs ++ ; 
    } else if (t==FLOAT) {
        freg_arg++;
        nargs += size(t)/SIZE_OF_INT;
        nargs ++ ; 
    } else if (t==DOUBLE) {
        freg_arg++;
        nargs += size(t)/SIZE_OF_INT;
    } else if (t>=0&&(car(t)==STRUCT||car(t)==UNION)) {
        nargs += round8(size(t))/SIZE_OF_INT;
    } else {
        error(TYERR);
        nargs ++ ;
    }
    *pnargs += nargs;
    *preg_arg += reg_arg;
    *pfreg_arg += freg_arg;
}



#define AS_SAVE 1
#define AS_ARG  0

/*
     set storage type of caller's arguments
         register or stack
     this muse corprate with code_arg_register();
     if AS_SAVE is set, register variable (or temporary local variable)
     is used.
 */

static int
get_input_arg(int t,int mode,int nargs,int reg_arg,int freg_arg)
{
    int arg = nargs - (reg_arg>MAX_INPUT_REGISTER_VAR?MAX_INPUT_REGISTER_VAR:reg_arg)*2 
                    - (freg_arg>MAX_INPUT_DREGISTER_VAR?MAX_INPUT_DREGISTER_VAR:freg_arg)*2 ;
    t = type_value(t);
    if (t>=0&&(car(t)==BIT_FIELD)) {
        t = type_value(cadr(t));
    }
    if(scalar(t)) {
        if (mode==AS_SAVE) {
            return get_register_var(0);
        } else if (reg_arg>=MAX_INPUT_REGISTER_VAR) {
            return list3n(LVAR,caller_arg_offset_v(arg),0);
        } else
            return get_input_register_var(reg_arg,0,0);
    } else if (t==LONGLONG||t==ULONGLONG) {
        if (mode==AS_SAVE) {
            return get_lregister_var(0);
        } else if (reg_arg>=MAX_INPUT_REGISTER_VAR)  {
            return list3n(LVAR,caller_arg_offset_v(arg),0);
        } else
            return get_input_lregister_var(reg_arg,0,0);
    } else if (t==FLOAT) {
        if (mode==AS_SAVE) {
            return get_dregister_var(0,0);
        } else if (freg_arg>=MAX_INPUT_DREGISTER_VAR) {
            return list3n(LVAR,caller_arg_offset_v(arg),0);
        } else
            return get_input_dregister_var(freg_arg,0,0,0);
    } else if (t==DOUBLE) {
        if (mode==AS_SAVE) {
            return get_dregister_var(0,1);
        } else if (freg_arg>=MAX_INPUT_DREGISTER_VAR) {
            return list3n(LVAR,caller_arg_offset_v(arg),0);
        } else
            return get_input_dregister_var(freg_arg,0,0,1);
    } else if (t>=0&&(car(t)==STRUCT||car(t)==UNION)) {
        if (mode==AS_SAVE) {
            return get_register_var(0);
        } else
            return list3n(LVAR,caller_arg_offset_v(arg),0);
    } else {
        error(-1);
        return get_register_var(0);
    }
}

static void
code_call(int e2,NMTBL *fn,int jmp)
{
    if (car(e2) == FNAME) {     
        printf("\tcall\t_%s\n",fn->nm);
    } else {
        printf("\tcall\t*%s\n",register_name(REG_R10,0));
    }
}

int
function(int e1)
{

    int e2,e3,e4,e5,nargs,t;
    int arg,reg_arg,freg_arg,arg_assign;
    int dots;
    int reg_arg_list=0,ret_type;
    NMTBL *fn = 0;
    int jmp = 0;
    int complex_;
    int pnargs,preg_arg,pfreg_arg;
    int stargs;
#if (ARG_ORDER==1)
    int save_delayed_arg = delayed_arg;
    delayed_arg = 0;
#endif
    const int as_save = AS_SAVE;

    ret_type = function_type(cadddr(e1),&dots);
    if (caddr(cadddr(e1))==0) dots=1;

    arg_assign = 0;
    e2 = cadr(e1);
    if (car(e2) == FNAME) {     
        fn=ncaddr(e2);
    } else {    
        if (car(e2)==INDIRECT) e2=cadr(e2); // (*func)(i) case
        jmp = list3(REGISTER,REG_R10,0);

        if (!simple_arg(e2)) {
            e3=get_register_var(0);
            reg_arg_list = list2(e3,reg_arg_list);
            g_expr_u(assign_expr0(e3,e2,INT,INT));
            e2=e3;
        }
        arg_assign = list2(assign_expr0(jmp,e2,INT,INT),arg_assign);
    }
    /* First we execute complex argument to avoid interaction with
       input variables. Remain the last complex argument in complex_. */
    stargs = 0;
    complex_ = 0;
    nargs = reg_arg = freg_arg = 0;
    pnargs = preg_arg = pfreg_arg = 0;
    for (e3 = e1 = reverse0(caddr(e1)); e3; e3 = cadr(e3)) {    
        t=caddr(e3);
        if ((e5= !simple_arg(car(e3)))) {
            if (complex_) {
                arg = get_input_arg(caddr(complex_),as_save,
                                        pnargs,preg_arg,pfreg_arg);
                reg_arg_list = compute_complex_arg(complex_,reg_arg_list,arg);
            }
            // memorise last complex arg parameter
            pnargs=nargs;preg_arg=reg_arg;pfreg_arg=freg_arg;
            complex_ = e3;
        }
        if (t>=0&&(car(t)==STRUCT||car(t)==UNION)) {
            // The struct should be pushed after complex arguments.
            if (e5) { // compute address only, complex_ is me now. Clear it.
                complex_ = 0;
                e4 = car(e3);
                if (car(e4)==RSTRUCT) e4 = cadr(e4);
                else if (car(e4)==INDIRECT) e4 = cadr(e4);
                if (!simple_arg(e4)) {
                    // Calculate complex struct address here.
                    // If simple, leave it.
                    arg = get_register_var(0);
#if ARG_ORDER==1
		    delayed_arg = list2(
			assign_expr0(arg,e4,LONGLONG,LONGLONG),
			delayed_arg);
#else
                    g_expr_u(assign_expr0(arg,e4,LONGLONG,LONGLONG));
#endif
                    car(e3)=arg;
                    reg_arg_list = list2(arg,reg_arg_list);

                    car(e3) = rvalue_t(arg,LONGLONG);
                }
            }
            stargs = list4(e3,stargs,nargs,reg_arg);
        }
        increment_function_arg(e3,&nargs,&reg_arg,&freg_arg);
    }
#if (ARG_ORDER==1)
    if (complex_) {
	arg = get_input_arg(caddr(complex_),as_save,
				pnargs,preg_arg,pfreg_arg);
	reg_arg_list = compute_complex_arg(complex_,reg_arg_list,arg);
    }
    for(;delayed_arg;delayed_arg = cadr(delayed_arg)) {
	g_expr_u(car(delayed_arg));
    }
#endif

    /* now all input register vars are free */
    code_save_stacks();

    // set_lreg(LREG_LREGISTER,0);
    set_freg(FREG_FREGISTER,0);
    set_ireg(CREG_REGISTER,0);

    //  Struct arguments need emit_copy. it destructs 3 input registers.
    //  But it returns no value on a register. So calculate it here.
    //  We cannot do this in the previous loop, because the copied struct may be
    //  override by other complex arguments. But before this we have to check
    //  complex_.

    //  ARG_ORDER==1 case put the last value on the top of stack.
    //  emit_copy/push_struct must preserve argument stack, i.e.
    //  no library call is allowed.

    if (stargs) {
#if (ARG_ORDER!=1)
        if (complex_) {
            arg = get_input_arg(caddr(complex_),AS_SAVE,
                                    pnargs,preg_arg,pfreg_arg);
            reg_arg_list = compute_complex_arg(complex_,reg_arg_list,arg);
        }
#endif
        for(stargs=reverse0(stargs);stargs;stargs = cadr(stargs)) {
            e3 = car(stargs);
            e4 = car(e3);
            t  = caddr(e3);
            arg = get_input_arg(t,AS_ARG,caddr(stargs),cadddr(stargs),0);
            push_struct(e4,t,arg);
            car(e3)=0;  // done


        }
#if (ARG_ORDER!=1)
    } else {
        //  last complex argument can use input register
        if (complex_) {
            arg = get_input_arg(caddr(complex_),AS_ARG,pnargs,preg_arg,pfreg_arg)
;
            reg_arg_list = compute_complex_arg(complex_,reg_arg_list,arg);

            car(complex_) = 0; // done.


        }
#endif
    }
    nargs = reg_arg = freg_arg = 0;
    // calc stack arguments first, it may requires extra registers,
    // and we can still use input registers now.
    for (e3 = e1; e3; 
                increment_function_arg(e3,&nargs,&reg_arg,&freg_arg),
                e3 = cadr(e3)) {        
        if (!(e4=car(e3))) continue;
        t=caddr(e3);
        arg = get_input_arg(t,AS_ARG,nargs,reg_arg,freg_arg);
        if (car(arg)!=LVAR) continue;
        g_expr_u(assign_expr0(arg,e4,t,t));
        car(e3)=0;  // done
    }
    nargs = reg_arg = freg_arg = 0;
    int max_freg = 0;
    for (e3 = e1; e3;
                increment_function_arg(e3,&nargs,&reg_arg,&freg_arg),
                e3 = cadr(e3)) {
        if (!(e4=car(e3))) continue;
        t=type_value(caddr(e3));
        arg = get_input_arg(t,AS_ARG,nargs,reg_arg,freg_arg);
        if(scalar(t)||t==LONGLONG||t==ULONGLONG) {
            reg_arg_list = list2(arg,reg_arg_list);
            /* protect from input register free */
            if (car(arg)==LREGISTER) {
                use_input_reg(cadr(arg),1);
		t = LONGLONG;
	    }
            g_expr_u(assign_expr0(arg,e4,t,t));
        } else if (t==DOUBLE||t==FLOAT) {
            reg_arg_list = list2(arg,reg_arg_list);
            if (car(arg)==DREGISTER||car(arg)==FREGISTER) {
                use_input_reg(cadr(arg),1); /* protect from input register free */
		max_freg ++;
	    }
            g_expr_u(assign_expr0(arg,e4,t,t)); /* XXX */
        }
        // structs are finished
    }

    if (max_func_args<nargs) max_func_args=nargs;
    for(;arg_assign;arg_assign=cadr(arg_assign)) {
        g_expr_u(car(arg_assign));
    }
    clear_ptr_cache();
    if (dots) {
	// needs number of fregister in double
	printf("\tmovl $%d,%%eax\n", max_freg);
    }
    code_call(e2,fn,jmp);
    free_register_var(reg_arg_list);
    if (ret_type==DOUBLE||ret_type==FLOAT) {
        if (use)
            set_freg(RET_DREGISTER,0);
        else
            set_freg(FREG_FREGISTER,0);
    } else if (ret_type==VOID) {
    } else {
        if (use)
            set_ireg(RET_REGISTER,0);
        else
            set_ireg(CREG_REGISTER,0);
    }
#if (ARG_ORDER==1)
    delayed_arg = save_delayed_arg;
#endif
    return ret_type;
}

void
code_alloca(int e1,int reg)
{
    char *crn;
  
    if (!is_const(e1)) {
	g_expr(list3(BAND,list3(ADD,e1,list2(CONST,15)),list2(CONST,~15))); 
	use_int(reg);
    } else {
	use_int(reg);
	code_const(round16(cadr(e1)),reg);
    }
    crn = register_name(reg,0);
    printf("\tsubq\t%s, %%rsp\n",crn);
    if (!max_func_arg_label) max_func_arg_label = fwdlabel();
    printf("\tmovq %s%d(%%rip),%s\n",lpfx,max_func_arg_label ,crn);
    printf("\taddq\t%%rsp, %s\n",crn);
}

void
code_frame_pointer(int e3) {
    use_int(e3);
    printf("\tmovq %s,%%rbp\n",register_name(e3,0));
}

int
code_frame_pointer_register()
{
    return list2(REGISTER,REG_EBP);
}

void
code_fix_frame_pointer(int disp_offset) {
    // must be empty
}

void
code_jmp(char *s) {
  max_reg_var = REG_VAR_BASE-REG_VAR_MIN;
  max_freg_var = FREG_VAR_BASE-FREG_VAR_MIN;
    printf("\tjmp _%s\n",s);
}


void
code_indirect_jmp(int e2) {
    use_int(e2);
    printf("\tjmp *%s\n",register_name(e2,0));
}

void
code_rindirect(int e1, int reg,int offset, int sign,int byte)
{
    char *crn,*op;
    g_expr(e1);
    op=cload(sign,byte);
    crn = register_name(creg,0);
    use_int(reg);
    printf("\t%s %d(%s),%s\n",op,offset,crn,register_name(reg,regu(sign,byte,0)));
}

#if FLOAT_CODE
int
code_drindirect(int e1, int reg,int offset, int d)
{
    g_expr(e1);
    int p = creg;
    use_float(d,reg);
    printf("\t%s %d(%s),%s\n",fload(d),offset,register_name(p,0),fregister_name(reg));
    return DOUBLE;
}
#endif

#if LONGLONG_CODE

static void
lload(int creg,int offset,int reg)
{
    char *crn = register_name(creg,0);
    use_longlong(reg);
    printf("\tmovq %d(%s),%s\n",offset,crn,register_name(reg,0));
}

int
code_lrindirect(int e1, int reg, int offset, int us)
{
    int reg0;
    g_expr(e1);
    reg0=creg;
    use_longlong(reg);
    lload(reg0,offset,reg);
    return LONGLONG;
}
#endif

char *
move(int byte)
{
    return byte==1?"movb":byte==SIZE_OF_SHORT?"movw":
	byte==SIZE_OF_INT?"movl":"movq";
}

void
code_assign_gvar(int e2,int creg,int byte) {
    if (byte && byte <SIZE_OF_INT) { use_data_reg(creg,1); 
    } else { use_int(creg); }
#ifdef __APPLE__
    int r = get_ptr_cache(ncaddr(e2));
    if (cadr(e2)) 
	printf("\t%s %s,%d(%s)\n",move(byte),register_name(creg,byte),
	    cadr(e2),register_name(r,0));
    else
	printf("\t%s %s,(%s)\n",move(byte),register_name(creg,byte),
	    register_name(r,0));
#else
    if (cadr(e2)) 
	printf("\t%s %s,%s+%d\n",move(byte),register_name(creg,byte),(ncaddr(e2))->nm,cadr(e2));
    else
	printf("\t%s %s,%s\n",move(byte),register_name(creg,byte),(ncaddr(e2))->nm);
#endif
}

void
code_assign_lvar(int e2,int creg,int byte) {
    if (byte && byte <SIZE_OF_INT) { use_data_reg(creg,1); 
    } else { use_int(creg); }
    printf("\t%s %s,",move(byte),register_name(creg,byte));
    lvar(e2); printf("\n");
}

void
code_assign_register(int e2,int byte,int creg) {
    use_int(creg);
    if (creg!=e2)
	printf("\tmovq %s,%s\n",register_name(creg,0),register_name(e2,0));
}

void
code_assign(int e2,int byte,int creg) {
    use_int(e2);
    if (byte && byte <SIZE_OF_INT) { use_data_reg(creg,1); 
    } else { use_int(creg); }
    if (creg!=e2)
    printf("\t%s %s,(%s)\n",move(byte),register_name(creg,byte),register_name(e2,0));
}

void
code_register_assop(int e2,int reg, int op,int byte) {
    //  reg <= reg(e2) op=reg
    use_int(reg);
    tosop(op,e2,reg);
}

void
code_assop(int op,int creg,int byte,int sign) {
    int xreg;
    //  (*pop()) op = creg
    //     creg should be ecx

    use_int(creg);
    xreg = emit_pop(0);       /* pop e3 value */
    emit_push();
    ld_indexx(byte,0,creg,ireg,sign);
    tosop(op,ireg,xreg);
    emit_pop_free(xreg);
    if (byte && byte <SIZE_OF_INT) {
	use_data_reg(ireg,1);
    }
    xreg = emit_pop(0);       /* pop e3 value */
    printf("\t%s %s,(%s)\n",move(byte),register_name(ireg,byte),register_name(xreg,0));
    emit_pop_free(xreg);
}

int
tosop_operand_safe_p(int op)
{
    switch(op) {
    case ADD: case SUB: case CMP:
    case BAND: case EOR: case BOR:
    case MUL: case UMUL:
	return 1;
    default: return 0;
    }
}

void
tosop1(int op,int reg,int oreg,int sz)
{
    int ox=0;
    char *orn,*crn;
    // creg = creg op oreg

    use_int(reg);

    if(oreg==-1) {
        error(-1);
    } else if (oreg<= -REG_LVAR_OFFSET) {
        ox = get_register(); if (ox<0) error(-1);
	if (sz<=SIZE_OF_INT)
	    code_rlvar(oreg+REG_LVAR_OFFSET,ox);
	else
	    code_lrlvar(oreg+REG_LVAR_OFFSET,ox);
        free_lvar(oreg+REG_LVAR_OFFSET);
        oreg = ox;
    }

    switch(OP(op)) {
    case LSHIFT:
    case ULSHIFT:
	shift(sz<=SIZE_OF_INT?"sall":"salq",oreg,reg,sz);
        if(ox) free_register(ox);
	return;
    case RSHIFT:
	shift(sz<=SIZE_OF_INT?"sarl":"sarq",oreg,reg,sz);
        if(ox) free_register(ox);
	return;
    case URSHIFT:
	shift(sz<=SIZE_OF_INT?"shrl":"shrq",oreg,reg,sz);
        if(ox) free_register(ox);
	return;
    }
    // regs[oreg]=1;
    orn = register_name(oreg,sz<=SIZE_OF_INT?SIZE_OF_INT:0);
    crn = register_name(reg,sz<=SIZE_OF_INT?SIZE_OF_INT:0);
    char *q = sz<=SIZE_OF_INT?"l":"q";
    switch(OP(op)) {
    case ADD:
	printf("\tadd%s %s,%s\n",q,orn,crn);
	break;
    case SUB: 
	printf("\tsub%s %s,%s\n",q,orn,crn);
	break;
    case CMP:
	printf("\tcmp%s %s,%s\n",q,orn,crn);
	break;
    case BAND: 
	printf("\tand%s %s,%s\n",q,orn,crn);
	break;
    case EOR: 
	printf("\txor%s %s,%s\n",q,orn,crn);
	break;
    case BOR:
	printf("\tor%s %s,%s\n",q,orn,crn);
	break;
    case MUL:
    case UMUL:
	printf("\timul%s %s,%s\n",q,orn,crn);
	break;
    case DIV:
    case UDIV:
    case MOD:
    case UMOD:
#ifdef __APPLE__
	if (regs[REG_EDX]==PTRC_REG)
	    clear_ptr_cache_reg(REG_EDX);
#endif
	use_register(reg,REG_EAX,1);
	if (oreg==REG_EAX) oreg=reg;
	if (oreg==REG_EDX) {
	    use_register(oreg,REG_ECX,1);
	    oreg = REG_ECX;
	}
	code_clear_stack_reg(REG_EDX);
        if (sz<=SIZE_OF_INT) {
	    orn = register_name(oreg,SIZE_OF_INT);
	    printf((op==DIV||op==MOD)?
		"\tcltd\n\tidivl %s\n":
		"\txorl %%edx,%%edx\n\tdivl %s\n",orn);
	    set_ireg((op==MOD||op==UMOD)?REG_EDX:REG_EAX,0);
	    set_ireg(reg,1);
        } else {
	    orn = register_name(oreg,0);
	    if (op==LDIV||op==LMOD) {
		code_lassign_lregister(REG_EDX,REG_EAX);
		printf("\tsarq $63,%%rdx\n");
		printf("\tidivq %s\n",orn);
            } else {
		printf("\txorl %%edx,%%edx\n\tdivq %s\n",orn);
            }
	    set_lreg((op==LMOD||op==LUMOD)?REG_EDX:REG_EAX,0);
	    set_lreg(reg,1);
        }
	break;
    }
    if(ox && ox!=ireg) free_register(ox);
}

void
tosop(int op,int reg,int oreg)
{
    tosop1(op,reg,oreg,SIZE_OF_INT);
}

int
code_const_op_p(int op,int e)
{
    if (car(e)!=CONST) return 0;
    if (op==DIV||op==UDIV) return ilog(cadr(e));
    if (op==MOD||op==UMOD) return 0;
    else return 1;
}

void
oprtc1(int op,int reg,int orn1,int sz)
{
    char *crn;
    int datareg;
    use_int(reg);
    char *q = sz<=SIZE_OF_INT?"l":"q";
    crn = register_name(reg,sz<=SIZE_OF_INT?SIZE_OF_INT:0);
    long orn ;
    if (car(orn1)==CONST) orn = cadr(orn1);
    else if (car(orn1)==LCONST) orn = lcadr(orn1);
    else { orn =0; error(-1); }

    datareg=is_data_reg(reg);

    switch(OP(op)) {
    case LSHIFT:
    case ULSHIFT:
	printf("\tsal%s $%ld,%s\n",q,orn,crn);
	return;
    case DIV:
	orn = ilog(orn);
    case RSHIFT:
	printf("\tsar%s $%ld,%s\n",q,orn,crn);
	return;
    case UDIV:
	orn = ilog(orn);
    case URSHIFT:
	printf("\tshr%s $%ld,%s\n",q,orn,crn);
	return;
    case ADD:
	printf("\tadd%s $%ld,%s\n",q,orn,crn);
	break;
    case SUB: case CMP:
	printf("\tsub%s $%ld,%s\n",q,orn,crn);
	break;
    case BAND: 
	if (datareg&&(orn & ~255)==~255)
	    printf("\tandb $%ld,%s\n",orn,register_name(reg,1));
	else if (datareg&&(orn & ~65535)==~65535)
	    printf("\tandw $%ld,%s\n",orn,register_name(reg,2));
	else if (sz<=SIZE_OF_INT||(datareg&&(orn & ~0xffffffffL)==~0xffffffffL))
	    printf("\tandl $%ld,%s\n",orn,register_name(reg,4));
	else {
	    int t = get_register(); char *trn = register_name(t,0);
	    printf("\tmovq $%ld,%s\n",orn,trn);
	    printf("\tandq %s,%s\n",trn,crn);
	    free_register(t);
	}
	break;
    case EOR: 
	printf("\txor%s $%ld,%s\n",q,orn,crn);
	break;
    case BOR:
	if (datareg&&(orn & ~255)==0)
	    printf("\tor $%ld,%s\n",orn,register_name(reg,1));
	else if (datareg&&(orn & ~65535)==0)
	    printf("\tor $%ld,%s\n",orn,register_name(reg,2));
	else if (sz<=SIZE_OF_INT||(datareg&&(orn & ~0xffffffffL)==0))
	    printf("\torl $%ld,%s\n",orn,register_name(reg,4));
	else {
	    int t = get_register(); char *trn = register_name(t,0);
	    printf("\tmovq $%ld,%s\n",orn,trn);
	    printf("\torq %s,%s\n",trn,crn);
	    free_register(t);
	}
	break;
    case MUL:
    case UMUL:
	if (ilog(orn)) {
	    printf("\tsal%s $%d,%s\n",q,ilog(orn),crn);
	} else
	    printf("\t%s%s $%ld,%s\n","imul",q,orn,crn);
	break;
    default:
	error(-1);
    }
}

void
oprtc(int op,int reg,int orn)
{
    oprtc1(op,reg,orn,SIZE_OF_INT);
}


static void
shift(char *op, int oreg,int reg,int sz)
{
    int dreg;
    use_register(oreg,REG_ECX,1);
    dreg = (reg==REG_ECX)?oreg:reg;
    printf("\t%s %%cl,%s\n",op,register_name(dreg,sz<=SIZE_OF_INT?SIZE_OF_INT:0));
    set_ireg(dreg,0);
    set_ireg(reg,1);
}

void
ld_indexx(int byte, int n, int xreg,int reg,int sign)
{	
    if (byte && byte <SIZE_OF_INT) {
	use_data_reg(reg,1);
    } else {
	use_int(reg);
    }
    if (n) 
	    printf("\t%s %d(%s),%s\n",cload(sign,byte),n,
		register_name(xreg,0),register_name(reg,regu(sign,byte,0)));
    else
	    printf("\t%s (%s),%s\n",cload(sign,byte),
		register_name(xreg,0),register_name(reg,regu(sign,byte,0)));
}

int
code_csvalue()
{
    return glist2(REGISTER,creg); /* for switch value */
}

void
code_cmpdimm(int e, int csreg,int label,int cond)
{
    /* used in dosiwtch() */
    set_ireg(csreg,0);
    printf("\tcmpl $%d,%s\n",e,register_name(creg,SIZE_OF_INT));
    jcond(label,cond);
}

void
code_opening(char *filename)
{
    printf("\t.file \"%s\"\n",filename);
    printf("\t.version\t\"01.01\"\n");
    /* printf("gcc2_compiled.:\n"); */
    // printf(".text\n");
}

static void
emit_lib(char *p[])
{
  while(*p) {
    printf("%s\n",*p++);
  }
}

void
code_closing()
{
  if (float_one_lib_used) emit_lib(float_one_lib);
  if (float_one_f_lib_used) emit_lib(float_one_f_lib);
    global_table();
    printf("\t.ident \"Micro-C compiled\"\n");
}

static char *
code_cond(int op,int cond)
{
    switch(op) {
    case GT:  return code_gt(cond);
    case UGT: return code_ugt(cond);
    case GE:  return code_ge(cond);
    case UGE: return code_uge(cond);
    case LT:  return code_ge(!cond);
    case ULT: return code_uge(!cond);
    case LE:  return code_gt(!cond);
    case ULE: return code_ugt(!cond);
    case EQ:  return code_eq(cond);
    case NEQ: return code_eq(!cond);
    default: return 0;
    }
}

static int
rexpr_bool(int e1,int reg)
{
    char *s;
    if (!(s=code_cond(car(e1),1))) return 0;
    g_expr(list3(CMP,cadr(e1),caddr(e1)));
    if (!use) return 1;
    use_data_reg(reg,1);
    printf("\tset%s\t%s\n",s,register_name(reg,1));
    printf("\tmovzbl %s,%s\n",register_name(reg,1),register_name(reg,SIZE_OF_INT));
    return 1;
}

int
rexpr(int e1, int l1, int cond,int t)
{
    g_expr(list3(CMP,cadr(e1),caddr(e1)));
    printf("\tj%s\t_%d\n",code_cond(car(e1),cond),l1);
    return l1;
}


static void
jcond(int l, char cond)
{       
    printf("\tj%s\t_%d\n",cond==LT?code_ge(0):cond?"ne":"e",l);
}

void
jmp(int l)
{       
    printf("\tjmp\t_%d\n",l);
    control = 0;
    /* align? */
    /*
      this is not allowed because of ? operator
    use_register(creg,REG_EAX,0);
     */
}

void
code_comment(char *s)
{
    printf("## %s",s);
}

/*
static    int lfb;
static    int lfe;
static    int lcfi1;
static    int lcfi2;
static    int lcfi3;

static void
init_lcf()
{
    lfb = fwdlabel();
    lfe = fwdlabel();
    lcfi1 = 0;
    lcfi2 = 0;
    lcfi3 = 0;
}
 */

static int code_setup;
static int code_base;

void
code_enter(char *name)
{
    // init_lcf();
    text_mode(0);
    printf("\t.align 4\n");
#ifndef __APPLE__
    if (stmode!=STATIC)
	printf(".globl %s\n",name);
    printf("\t.type\t%s,@function\n",name);
    printf("%s:\n",name);
#else
    if (stmode!=STATIC)
	printf(".globl _%s\n",name);
    printf("_%s:\n",name);
    clear_ptr_cache();
#endif
}


void
code_enter1(int args)
{
    code_disp_label=fwdlabel();
    printf("\tmovq %%rbp,%%rsp\n");
    printf("\tsubq  _%d(%%rip),%%rsp\n",code_disp_label);

    // printf("## args %d disp %d  code_disp_offset=%d\n",args,disp,code_disp_offset); 
}

void
code_leave(char *name)
{
    code_offset_set(fnptr);
#ifndef __APPLE__
    // printf("_%d:\n",labelno);
    // printf("\t.size\t%s,_%d-%s\n",name,labelno,name);
    local_table();
    labelno++;
#else
    local_table();
#endif
    free_all_register();
}

void
enter(char *name)
{
    text_mode(0);
    printf("\t.align 2\n");
#ifndef __APPLE__
    if (stmode!=STATIC)
	printf(".globl %s\n",name);
    printf("%s:\n",name);
    // printf("\t.type\t%s,@function\n",name);
#else
    if (stmode!=STATIC)
	printf(".globl _%s\n",name);
    printf("_%s:\n",name);
#endif
    lvar_offset_label = fwdlabel();
    r1_offset_label = fwdlabel();
    max_func_args = 0;
    printf("\tpushq %%rbp\n");
    printf("\tmovq %%rsp,%%rbp\n");
    printf("\tjmp _%d\n", (code_setup=fwdlabel()));
#ifdef __APPLE__
    clear_ptr_cache();
#endif
    fwddef((code_base=fwdlabel()));
    control=1;
}

void
enter1()
{
    text_mode(0);
}

void
code_label_call(int l)
{
        printf("\tcall\tL_%d\n",l);
}

void
code_ret()
{
        printf("\tret\n");
        control=0;
}

static void 
make_return_continuation()
{
    int ty = cadr(fnptr->ty);
    fwddef(retcont);
    if (ty==FLOAT||ty==DOUBLE) {
	set_freg(RET_DREGISTER,0);
	printf("\tmovs%s %s,%s\n",ty==FLOAT?"s":"d",fregister_name(REG_XMM0),fregister_name(creg));
	printf("\tmovq %s,%%rbp\n",register_name(REG_R15,0));
    } else if (ty>0&&( car(ty)==STRUCT || car(ty)==UNION)) {
	set_ireg(RET_REGISTER,0);
	printf("\tlea %d(%%rbp),%s\n",disp-SIZE_OF_LONGLONG, register_name(creg,0));
	printf("\tmovq %s,%%rbp\n",register_name(REG_R15,0));
    } else if (ty!=VOID) {
	set_ireg(RET_REGISTER,0);
	printf("\tmovq %s,%s\n",register_name(REG_R15,0),register_name(creg,0));
	printf("\tmovq %s,%%rbp\n",register_name(REG_R14,0));
    }
}

void
code_register_pop()
{
    int i = max_reg_var;
    for(;i>=0;i--) {
	printf("\tpopq %s\n", register_name(REG_VAR_BASE-i,0));
    }
}

void
code_register_push()
{
    int i = 0;
    for(;i<=max_reg_var;i++) {
	printf("\tpushq %s\n", register_name(REG_VAR_BASE-i,0));
    }
}

void
leave(int control, char *name)
{

#ifdef __APPLE__
    disp &= -(SIZE_OF_INT*4); // 16byte alignment
    // disp -= 12;
#else
    disp &= -SIZE_OF_INT;
#endif
    if (control)
        code_set_return_register(1);
    if (retcont) {
	if (control)
	    jmp(retlabel);
	make_return_continuation();
    }
    fwddef(retlabel);

    printf("\tleaq %d(%%rbp),%%rsp\n",max_reg_var<0?0:-(max_reg_var+1)*SIZE_OF_LONGLONG);
    code_register_pop();
    printf("\tleave\n");
    printf("\tret\n");
    control=0;
    int r1_offset = code_offset_set(fnptr);

    // we can do this in enter(), in parse tree mode
    text_mode(0);
    fwddef(code_setup);
    code_register_push();
    printf("\tsubq $%d,%%rsp\n",r1_offset); 
    printf("\tjmp _%d\n",code_base);

#ifndef __APPLE__
    // printf("_%d:\n",labelno);
    // printf("\t.size\t%s,_%d-%s\n",name,labelno,name);
#endif
    local_table();
    labelno++;
    free_all_register();
}

int
code_get_fixed_creg(int reg,int type) {
    if (type==FLOAT||type==DOUBLE) {
        if (reg==USE_CREG) {
            if (regs[FREG_FREGISTER]==0) {
                set_freg(FREG_FREGISTER,(!is_int_reg(freg))&&regs[freg]==USING_REG);
                return FREG_FREGISTER;
            }
        }
	use_float(type==DOUBLE, reg);
	return reg;
    } else {
        if (reg==USE_CREG) {
            if (regs[CREG_REGISTER]==0) {
                set_ireg(CREG_REGISTER,is_int_reg(creg)&&regs[creg]==USING_REG);
                return CREG_REGISTER;
            }
        }
	use_int(reg);
	return reg;
    }
}

void
code_set_fixed_creg(int reg,int mode,int type) {
    if (type==FLOAT||type==DOUBLE) {
    } else if (type==LONGLONG||type==ULONGLONG) {
    } else {
	set_ireg(reg,mode);
    }
}

int
code_set_return_register(int mode) {
    // before goto leave code, set return register
    if (cadr(fnptr->ty)==FLOAT) {
        set_freg(RET_FREGISTER,mode);
	return 0;
    } else if (cadr(fnptr->ty)==DOUBLE) {
        set_freg(RET_DREGISTER,mode);
	return 0;
    } else if (cadr(fnptr->ty)==LONGLONG||cadr(fnptr->ty)==ULONGLONG) {
        set_lreg(RET_LREGISTER,mode);
	return ireg;
    } else if (cadr(fnptr->ty)==VOID) {
	return 0;
    } else {
        set_ireg(RET_REGISTER,mode);
	return ireg;
    }
}

void
gen_gdecl(char *n, int gpc)
{
    // must be empty
}

extern void
ascii(char *s)
{
#ifdef __APPLE__
    printf("\t.ascii \"");
#else
    printf("\t.string \"");
#endif
    while(*s) {
	if (*s=='\n')
	    printf("%cn",92);
	else if (*s<' ')
	    printf("%c%03o",92,*s);
	else if (*s=='\\')
	    printf("\\\\");
	else if (*s==34)
	    printf("%c%c",92,34);
	else 
	    printf("%c",*s);
	s++;
    }
#ifdef __APPLE__
    printf("\\0%c\n",34);
#else
    printf("%c\n",34);
#endif
}

extern int
emit_string_label()
{
    int lb;
    cstring_mode();
    lb=fwdlabel();
    printf("_%d:\n",lb);
    return lb;
}

extern void 
emit_string(char *s,int t)
{
    t = type_value(t);
    if (car(t)==ARRAY &&  
            (type_value(cadr(t))==CHAR||type_value(cadr(t))==UCHAR)) {
        ascii(s);
    } else {
        int l = emit_string_label();
        ascii(s);
        emit_label(l);
    }
    return;
}

void
code_align(int t)
{
    int d;
    switch(t) {
    case CHAR: case UCHAR: return;
    case SHORT: case USHORT: d = data_alignment & 1; break;
    default: d = data_alignment & 3;
    }
    if (d) {
        printf("\t.align 2\n");
        data_alignment = 0;
    }
}

extern void
emit_global(NMTBL *n,int a,int e)
{
    int t = type_value(n->ty);
    if (e>0 && car(e)==STRING && t>0 && car(t)==ARRAY &&  
            (type_value(cadr(t))==CHAR||type_value(cadr(t))==UCHAR)) {
        cstring_mode();
    } else
	data_mode(n->nm);
    code_align(a);
#ifdef __APPLE__
    if (n && n->sc!=STATIC)
	printf(".globl\t_%s\n",n->nm);
    printf("_%s:\n",n->nm); 
#else
    if (n && n->sc!=STATIC)
	printf(".globl\t%s\n",n->nm);
    printf("%s:\n",n->nm); 
#endif
}

extern void
emit_space(int sp)
{
    data_mode(0);
    printf("\t.space\t%d\n",sp);
}

extern void
emit_char(int d)
{
    data_mode(0);
    printf("\t.byte %d\n",d);
}

extern void
emit_short(int d)
{
    data_mode(0);
    printf("\t.short %d\n",d);
}

extern void
emit_int(int d)
{
    data_mode(0);
    printf("\t.long %d\n",d);
}

extern void
emit_longlong(int e)
{
#if LONGLONG_CODE
    long long ll = lcadr(e);
    data_mode(0);
    printf("\t.quad\t0x%llx\n",ll);
#endif
}

extern void
emit_double(int e)
{
#if FLOAT_CODE
    double d = dcadr(e);
    data_mode(0);
#if (ENDIAN_D==0)
        printf("\t.long\t0x%x,0x%x\n",code_d1(d),code_d2(d));
#else
        printf("\t.long\t0x%x,0x%x\n",code_d2(d),code_d1(d));
#endif
#endif
}

extern void
emit_float(int e)
{
#if FLOAT_CODE
    float f = dcadr(e);
    data_mode(0);
    printf("\t.long\t0x%x\n",*(int *)&f);
#endif
}

extern void
emit_address(char *s,int offset)
{
    data_mode(0);
#ifdef __APPLE__
    if (offset)
	printf("\t.quad _%s+%d\n",s,offset);
    else
	printf("\t.quad _%s\n",s);
#else
    if (offset)
	printf("\t.quad %s+%d\n",s,offset);
    else
	printf("\t.quad %s\n",s);
#endif
}

extern void
emit_label(int labelno)
{
    data_mode(0);
    printf("\t.quad _%d\n",labelno);
}

extern void
emit_data_closing(NMTBL *n)
{
#ifdef DOT_SIZE
    int lb;
#endif
    if (mode==GDECL) {
	data_mode(0);
#ifdef DOT_SIZE
	lb=fwdlabel();
	printf("_%d:\n",lb);
	printf("\t.size\t%s,_%d-%s\n",n->nm,lb,n->nm);
#endif
    }
}



void
exntern_entry(NMTBL *n) 
{
#if 0
// We don't need this.
    static int gtbl_label = 0;
    int d = attr_value_in_list(n->attr,FUNCTION);
    int lfb   = car(d);
    int lfe   = cadr(d);
    int lcfi1 = caddr(d);
    int lcfi2 = cadddr(d);
    int lcfi3 = caddddr(d);
    int set1 = gtbl_label++;
    int set2 = gtbl_label++;
    int set3 = gtbl_label++;
    int set4 = gtbl_label++;
    int set5 = gtbl_label++;
    int set6 = gtbl_label++;

    printf(
    ".globl _%s.eh\n"                             // name of this entry
    "_%s.eh:\n"
    "        .set L$set$%d,LEFDE%d-LASFDE%d\n"    // size of this entry
    "        .long L$set$%d\n"
    "LASFDE%d:\n"
    "        .long   LASFDE%d-EH_frame1\n"
    "        .quad   LFB%d-.\n"                   //  function top
    "        .set L$set$%d,LFE%d-LFB%d\n"         //  size of function
    "        .quad L$set$%d\n"
    "        .byte   0x0\n"
    "        .byte   0x4\n"
    "        .set L$set$%d,LCFI%d-LFB%d\n"        //  pushq %rbp
    "        .long L$set$%d\n"
    "        .byte   0xe\n"
    "        .byte   0x10\n"
    "        .byte   0x86\n"
    "        .byte   0x2\n"
    "        .byte   0x4\n"
    "        .set L$set$%d,LCFI%d-LCFI%d\n"       //  movq %rsp, %rbp
    "        .long L$set$%d\n"
    "        .byte   0xd\n"
    "        .byte   0x6\n",
         n->nm, n->nm, 
         set1, set2, set3, 
         set1,
         set3, 
         set3, 
         lfb1,
         set4, lfe, lfb, 
         lfb2, 
         set5, lcfi1, lfb,
         set5
         set6, lcfi2, lcfi1,
         set6
    );
    if (numreg>0) {
	int set7 = gtbl_label++;
	printf(
    "        .byte   0x4\n"
    "        .set L$set$%d,LCFI%d-LCFI%d\n"       //  register variable save
    "        .long L$set$%d\n"
    "        .byte   0x83\n",
         set7, lcfi3, lfci2,
         set7
	);
    }
    switch(numreg) {
    case 5:
    printf(
    "        .byte   0x7\n"
    "        .byte   0x8c\n");
    case 4:
    printf(
    "        .byte   0x6\n"
    "        .byte   0x8d\n");
    case 3:
    printf(
    "        .byte   0x5\n"
    "        .byte   0x8e\n");
    case 2:
    printf(
    "        .byte   0x4\n"
    "        .byte   0x8f\n");
    case 1:
    printf(
    "        .byte   0x3\n");
    );
    }

    printf(
    "        .align 3\n"
    "LEFDE%d:\n", set1);
#endif

}



void
global_table(void)
{
#ifndef __APPLE__
    NMTBL *n;
    int init;
    init=0;
    for(n=global_list;n;n = n->next) {
	if (is_code(n) || is_function(n)) {
	}
	if ((n->sc == GVAR||n->sc == STATIC) && n->dsp != -1) {
	    if (is_code(n)||is_function(n)) continue;
	    /* n->dsp = -1 means initialized global */
	    if (init==0) {
		data_mode(0);
		init=1;
	    }
	    printf(".comm %s,%d\n",n->nm,size(n->ty));
	    // .lcomm?
	}
    }

#else

    NMTBL *n;
    int init;
    init=0;
    for(n=global_list;n;n = n->next) {
	if ((n->sc == GVAR) && n->dsp != -1) {
	    if (is_code(n)||is_function(n)) continue;
	    /* n->dsp = -1 means initialized global */
	    if (init==0) {
		data_mode(0);
		init=1;
	    }
	    printf(".comm _%s,%d\n",n->nm,size(n->ty));
        } else if ((n->sc==STATIC) && n->dsp != -1) {
            /* n->dsp = -1 means initialized global */
            if (is_code(n)||is_function(n)) {
		printf("\t.set L_%s$stub,_%s\n",n->nm,n->nm);
		continue;
	    }
            if (init==0) {
                data_mode(0);
                init=1;
            }
            printf(".lcomm _%s,%d\n",n->nm,size(n->ty));
	}
    }
#if 0
    init=0;
    for(n = global_list;n!=&null_nptr;n = n->next) {
        if (n->sc == GVAR || 
		((is_code(n) || is_function(n)) &&has_attr(n,FNAME)) ) {
            if (init==0) {
printf(".section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\nEH_frame1:\n");
printf(
"        .set L$set$0,LECIE1-LSCIE1\n"
"        .long L$set$0\n"
"LSCIE1:\n"
"        .long   0x0\n"
"        .byte   0x1\n"
"        .ascii \"zR\0\"\n"
"        .byte   0x1\n"
"        .byte   0x78\n"
"        .byte   0x10\n"
"        .byte   0x1\n"
"        .byte   0x10\n"
"        .byte   0xc\n"
"        .byte   0x7\n"
"        .byte   0x8\n"
"        .byte   0x90\n"
"        .byte   0x1\n"
"        .align 3\n"
"LECIE1:\n");
                init=1;
            }
	}
    }
#endif
    for(n = global_list;n!=&null_nptr;n = n->next) {
        if (n->sc==EXTRN1) {
	    if ((is_code(n) || is_function(n))) {
		exntern_entry(n) ;
            }
        }
    }
    printf("	    .subsections_via_symbols\n");
#endif
}


void
local_table(void)
{
    NMTBL *n;
    int init;
    init=0;
    /* static local variables */
    for(n=local_static_list;n;n = n->next) {
	if (n->sc == STATIC) {
	    if (init==0) {
		data_mode(0);
		init=1;
	    }
#ifdef __APPLE__
	    if (n->dsp!= -1) /* -1 means initialized global */
		printf(".lcomm _%s,%d\n",n->nm,size(n->ty));
#else
	    if (n->dsp!= -1) /* -1 means initialized global */
		printf(".lcomm %s,%d\n",n->nm,size(n->ty));
#endif
	}
    }
}

void
cstring_mode(int align)
{
    if (output_mode!=RODATA_EMIT_MODE) {
#ifndef __APPLE__
        printf(".section\t.rodata\n\t.align 2\n");
#else
        printf("\t.cstring\n");
#endif
        output_mode = RODATA_EMIT_MODE;
    }
}

void
text_mode(int align)
{
    if (output_mode!=TEXT_EMIT_MODE) {
	printf(".text\n");
	// printf("\t.align 2\n");
	output_mode = TEXT_EMIT_MODE;
    }
}

void
data_mode(char *name)
{
    if (output_mode!=DATA_EMIT_MODE) {
	printf(".data\n");
	output_mode = DATA_EMIT_MODE;
    }
#ifndef __APPLE__
    if (name)
	printf("\t.type\t%s,@object\n",name);
#endif
}

#if FLOAT_CODE

/* floating point */


char *
fload(int d)
{
    return d?"movsd":"movss";
}
#define fstore(d) fload(d)

void code_dassign_gvar(int e2,int freg,int d)
{ 
    use_float(d,freg);
#ifdef __APPLE__
    int r = get_ptr_cache(ncaddr(e2));
    if (cadr(e2)) 
	printf("\t%s %s,%d(%s)\n",fstore(d),fregister_name(freg),cadr(e2),register_name(r,0));
    else
	printf("\t%s %s,(%s)\n",fstore(d),fregister_name(freg),register_name(r,0));
#else
    if (cadr(e2)) 
	printf("\t%s %s,%s+%d\n",fstore(d),fregister_name(freg),(ncaddr(e2))->nm,cadr(e2));
    else
	printf("\t%s %s,%s\n",fstore(d),fregister_name(freg),(ncaddr(e2))->nm);
#endif
}

void code_dassign_lvar(int e2,int freg,int d)
{ 
    use_float(d,freg);
    printf("\t%s %s,",fstore(d),fregister_name(freg)); lvar(e2); printf("\n");
}

/*
      e = d
 */
void code_dassign_dregister(int e,int d,int f)
{
    use_float(d,f);
    printf("\tmovapd %s,%s\n",fregister_name(f),fregister_name(e)); 
}

void code_dassign(int e2,int freg,int d)
{ 
    use_float(d,freg);
    printf("\t%s %s,(%s)\n",fstore(d),fregister_name(freg),register_name(e2,0));
}

static int
code_dload_1(int d, int reg);

static double d0 = 1.0;

static int
code_d1(double d)
{
    int *i = (int *)&d0; int *j = (int *)&d;
    return (i[1] == 0x3ff00000)?j[0]:j[1];
}

static int
code_d2(double d)
{
    int *i = (int *)&d0; int *j = (int *)&d;
    return (i[1] == 0x3ff00000)?j[1]:j[0];
}

static void emit_dconst0(ValuePtr value, int lb, void *arg)
{
    long d_mode = (long) arg;
#ifdef __APPLE__
    printf(" \t.literal8\n\t.align 3\n");
#else
    printf(" \t.section\t.rodata\n\t.align 8\n");
#endif
    printf("_%d:\n",lb);
#if ENDIAN_D==0
    if (d_mode)
	printf("\t.long\t0x%x,0x%x\n",code_d1(value->d),code_d2(value->d));
    else {
	printf("\t.long\t0x%x\n",value->i);
    }
#endif
    if (output_mode==TEXT_EMIT_MODE) {
	printf(".text\n");
    } else {
	text_mode(0);
    }
} 

/* load double / float const
   we should keep what constant we have create
 */
void code_dconst(int e2,int freg,int d)
{ 
    int sz;
    Value value;
    value.d = dcadr(e2);
    if (d) {
       sz = sizeof(double);
    } else {
       value.f = (float) value.d;
       sz = sizeof(float);
    }

    use_float(d,freg);
    if (value.d==1.0) {
        code_dload_1(d,freg);
        return;
    }
    if (value.d==0.0) {
        char *f = fregister_name(freg);
        if (d) {
            printf("\txorpd %s,%s\n",f,f);
        } else {
            printf("\txorps %s,%s\n",f,f);
        }
        return ;
    }
    long d_mode = d;
    int lb = get_data_label(&value,sz,emit_dconst0, (void*) d_mode);
#ifdef __APPLE__
    printf("\tmovs%s _%d(%%rip),%s\n",d?"d":"s",lb,fregister_name(freg));
#else
    printf("\tmovs%s _%d,%s\n",d?"d":"s",lb,fregister_name(freg));
#endif
}

void
code_builtin_fabsf(int e)
{
}
void
code_builtin_fabs(int e)
{
}
void
code_builtin_inff()
{
}
void
code_builtin_inf()
{
}

void code_dneg(int freg,int d)
{ 
    use_float(d,freg);
    int reg = get_dregister(d);
    code_dconst(dlist2(DCONST,0),reg,d);
    printf("\tsubs%s      %s,%s\n",d?"d":"s",fregister_name(freg),fregister_name(reg));
    set_freg(reg,0);
}

void code_d2i(int reg)
{ 
    use_float(1,reg);
    char *f = fregister_name(reg);
    set_ireg(get_register(),0);
    printf("\tcvttsd2si    %s,%s\n",f,register_name(ireg,0));
}

void code_i2d(int reg)
{ 
    use_int(reg);
    int c = reg;
    set_freg(get_dregister(1),0);
    printf("\tsalq $32,%s\n",register_name(c,0));
    printf("\tsarq $32,%s\n",register_name(c,0));
    printf("\tcvtsi2sd     %s,%s\n",register_name(c,0),fregister_name(freg));
}

void code_d2u(int reg)
{ 
    use_float(1,reg);
    char *f = fregister_name(reg);
    set_ireg(get_register(),0);
    printf("\tcvttsd2siq      %s,%s\n",f,register_name(ireg,0));
}

#if 0
static
void code_u2d1(int reg,int d)
{ 
    use_int(reg);
    int i = reg;
    char *u = register_name(i,0);
    int tmp = get_data_register();
    char *t = register_name(tmp,0);
    set_freg(get_dregister(d),0);
    char *dn = fregister_name(freg);
    int td = get_dregister(d);
    char *dbs = d?"d":"s";
    printf("        cmpq    $0, %s\n",u);
    printf("        js      1f\n");
    printf("        cvtsi2s%sq       %s,%s\n",dbs,u,dn);
    printf("        jmp     2f\n");
    printf("1:\n");
    printf("        movq    %s, %s\n",u,t);
    printf("        shrq    %s\n",t);
    printf("        andl    $1, %s\n",register_name(i,SIZE_OF_INT));
    printf("        orq     %s, %s\n",t,u);
    printf("        cvtsi2s%sq       %s, %s\n",dbs,u,dn); printf("        movap%s  %s, %s\n",dbs,dn,fregister_name(td));
    printf("        adds%s   %s, %s\n",dbs,dn,fregister_name(td));
    printf("2:\n");
    free_register(tmp);
    free_register(td);
}
#endif

void code_u2d(int reg) {
    use_int(reg);
    int c = reg;
    set_freg(get_dregister(1),0);
    printf("\tcvtsi2sdq     %s,%s\n",register_name(c,0),fregister_name(freg));
}

void code_d2f(int reg) { 
    use_float(0,reg);
    int f = reg;
    set_freg(get_dregister(0),0);
    printf("\tcvtsd2ss    %s,%s\n",fregister_name(f),fregister_name(freg));
}
void code_f2d(int reg) { 
    use_float(0,reg);
    int f = reg;
    set_freg(get_dregister(1),0);
    printf("\tcvtss2sd    %s,%s\n",fregister_name(f),fregister_name(freg));
}
void code_f2i(int reg) { 
    use_float(0,reg);
    int f = reg;
    set_ireg(get_register(),0);
    printf("\tcvttss2si    %s,%s\n",fregister_name(f),register_name(creg,0));
}

void code_f2u(int reg) { 
    use_float(0,reg);
    int f = reg;
    set_ireg(get_register(),0);
    printf("\tcvttss2siq      %s,%s\n",fregister_name(f),register_name(creg,0));
}

void code_i2f(int reg) { 
    use_int(reg);
    int c = reg;
    set_freg(get_dregister(0),0);
    printf("\tsalq $32,%s\n",register_name(c,0));
    printf("\tsarq $32,%s\n",register_name(c,0));
    printf("\tcvtsi2ss     %s,%s\n",register_name(c,0),fregister_name(creg));
}
void code_u2f(int reg) { 
    use_int(reg);
    int c = reg;
    set_freg(get_dregister(0),0);
    printf("\tmovl %s,%s\n",register_name(c,SIZE_OF_INT),register_name(c,SIZE_OF_INT));
    printf("\tcvtsi2ss     %s,%s\n",register_name(c,0),fregister_name(creg));
}

void code_drgvar(int e2,int d,int freg)
{ 
    use_float(d,freg);
#ifdef __APPLE__
    int r = get_ptr_cache(ncaddr(e2));
    if (cadr(e2))
	printf("\t%s %d(%s),%s\n",fload(d),cadr(e2),register_name(r,0),fregister_name(freg));
    else
	printf("\t%s (%s),%s\n",fload(d),register_name(r,0),fregister_name(freg));
#else
    if (cadr(e2))
	printf("\t%s %s+%d,%s\n",fload(d),(ncaddr(e2))->nm,cadr(e2),fregister_name(freg));
    else
	printf("\t%s %s,%s\n",fload(d),(ncaddr(e2))->nm,fregister_name(freg));
#endif
}


void code_drlvar(int e2,int d,int freg)
{ 
    use_float(d,freg);
    printf("\t%s ",fload(d)); lvar(e2); printf(",%s\n", fregister_name(freg));
}

void code_cmp_drgvar(int e2,int reg,int d,int label,int cond)
{ 
    char *db = d?"d":"s";
    use_float(d,reg);
    char *f = fregister_name(reg);
    int t = get_dregister(d);
#ifdef __APPLE__
    int r = get_ptr_cache(ncaddr(e2));
    if (cadr(e2))
	printf("\tmovs%s %d(%s),%s\n",db,cadr(e2),register_name(r,0),f);
    else
	printf("\tmovs%s (%s),%s\n",db,register_name(r,0),f);
#else
    if (cadr(e2))
	printf("\tmovs%s %s+%d,%s\n",db,(ncaddr(e2))->nm,cadr(e2),f);
    else
	printf("\tmovs%s %s,%s\n",db,(ncaddr(e2))->nm,f);
#endif
    printf("\txorp%s %s,%s\n",db,f,fregister_name(t));
    printf("\tucomis%s %s,%s\n",db,f,fregister_name(t));
    free_register(t);
    jcond(label,cond);
}

void code_cmp_drlvar(int e2,int reg,int d,int label,int cond)
{ 
    printf("\tfcomp "); lvar(e2); printf("\n");
    jcond(label,cond);
}

void
dtosop(int op,int reg,int oreg)
{ 
    char *opn="";
    char *frn;
    char *grn;
    int ox = -1;

    use_float(1,reg);
    if(oreg==-1) {
        error(-1);
    } else if (oreg<= -REG_LVAR_OFFSET) {
        ox = get_dregister(1); if (ox<0) error(-1);
        regs[ox] = USING_REG;
        code_drlvar(oreg+REG_LVAR_OFFSET,1,ox);
        oreg = ox;
    }
    grn=fregister_name(oreg);
    frn=fregister_name(reg);
    switch(op) {
    case FADD: opn="addss"; break;
    case DADD: opn="addsd"; break;
    case FSUB: opn="subss"; break;
    case DSUB: opn="subsd"; break;
    case FDIV: opn="divss"; break;
    case DDIV: opn="divsd"; break;
    case FMUL: opn="mulss"; break;
    case DMUL: opn="mulsd"; break;
    case FCMP:
	if (oreg<= -REG_LVAR_OFFSET) {
	    printf("\tucomiss "); lvar(oreg);
	    printf(",%s\n",frn);
        } else {
	    printf("\tucomiss %s,%s\n",grn,frn);
        }
        if (ox!=-1) free_register(ox);
        return;
    case DCMP: 
	if (oreg<= -REG_LVAR_OFFSET) {
	    printf("\tucomisd "); lvar(oreg);
	    printf(",%s\n",frn);
        } else {
	    printf("\tucomisd %s,%s\n",grn,frn);
        }
        if (ox!=-1) free_register(ox);
        return;
    case DCMPGE: 
    case FCMPGE: 
    default:
        error(-1); return;
    }
    if (oreg<= -REG_LVAR_OFFSET) {
	printf("\t%s ",opn);
        lvar(oreg);
        printf(",%s\n",frn);
    } else {
	printf("\t%s %s,%s\n",opn,grn,frn);
    } 
    if (ox!=-1) free_register(ox);
}


void
code_dassop(int op,int reg,int d) {
    /* we have lvalue in creg, applied floating value is in freg */
    //  (*creg) op = pop()
    int  xreg=emit_dpop(d);
    char *crn;
    char *frn;

    crn=register_name(ireg,0);
    use_float(d,reg);
    frn  =fregister_name(reg);

    printf("\t%s 0(%s),%s\n",fload(d),crn,frn);
    dtosop(op,reg,xreg);
    printf("\t%s %s,0(%s)\n",fstore(d),frn,crn);
    emit_dpop_free(xreg,d);
}

void
code_register_dassop(int reg,int op,int d) {
    // reg op= dpop()
    int  xreg=emit_dpop(d);
    dtosop(op,reg,xreg);
    emit_dpop_free(xreg,d);
}

static int
code_dload_1(int d, int g)
{
    int r;
    char *drn,*grn;
    NMTBL *one;
    // load 1
    if (d) {
        float_one_lib_used=1;
        one = &float_one;
    } else {
        float_one_f_lib_used=1;
        one = &float_one_f;
    }
    r = get_ptr_cache(one);
    drn=register_name(r,0);
    grn=fregister_name(g);
    printf("\tmovs%s 0(%s),%s\n",d?"d":"s",drn,grn);
    return g;
}

void
code_dpreinc(int e1,int e2,int d,int reg) {
    char *frn,*crn,*grn;
    int  g;
    char *ops = (caddr(e1)>0)?(d?"addsd":"addss"):(d?"subsd":"subss");

    if (car(e2)==DREGISTER||car(e2)==FREGISTER) {
        crn=fregister_name(cadr(e2));
        grn = fregister_name(g = code_dload_1(d,get_dregister(d)));
        if (reg==USE_CREG) {
            reg=get_dregister(d); if (!reg) error(-1);
            set_freg(reg,0);
        }
        frn=fregister_name(reg);
        printf("\t%s %s,%s\n",ops,grn,crn);
        if (use && reg!=cadr(e2))
            printf("\tmovap%s %s,%s\n",d?"d":"s",frn,crn);
    } else {
        g_expr(e2);
        if (!is_int_reg(creg)) error(-1);
        crn=register_name(ireg,0);
        if (reg==USE_CREG) {
            reg=get_dregister(d); if (!reg) error(-1);
            set_freg(reg,0);
        }
        frn=fregister_name(reg);
        grn = fregister_name(g = code_dload_1(d,get_dregister(d)));
        printf("\t%s 0(%s),%s\n",fload(d),crn,frn);
        printf("\t%s %s,%s\n",ops,grn,frn);
        printf("\t%s %s,0(%s)\n",fstore(d),frn,crn);
    }
    free_register(g);
}

void
code_dpostinc(int e1,int e2,int d,int reg) {
    char *frn,*crn,*grn;
    int  g,t;
    char *ops = (caddr(e1)>0)?(d?"addsd":"addss"):(d?"subsd":"subss");

    if (car(e2)==DREGISTER||car(e2)==FREGISTER) {
        crn=fregister_name(cadr(e2));
        grn = fregister_name(g = code_dload_1(d,get_dregister(d)));
        if (reg==USE_CREG) {
            reg=get_dregister(d); if (!reg) error(-1);
            set_freg(reg,0);
        }
        frn=fregister_name(reg);
        if (use && reg!=cadr(e2))
            printf("\tmovap%s %s,%s\n",d?"d":"s",frn,crn);
        printf("\t%s %s,%s\n",ops,grn,crn);
    } else {
        g_expr(e2);
        if (!is_int_reg(creg)) error(-1);
        crn=register_name(ireg,0);
        if (reg==USE_CREG) {
            reg=get_dregister(d); if (!reg) error(-1);
            set_freg(reg,0);
        }
        frn=fregister_name(reg);
        grn = fregister_name(g = code_dload_1(d,get_dregister(d)));
        printf("\t%s 0(%s),%s\n",fload(d),crn,frn);
	if (use) {
	    t = get_dregister(d);
	    printf("\tmovap%s %s,%s\n",d?"d":"s",frn,fregister_name(t));
	}
        printf("\t%s %s,%s\n",ops,grn,frn);
        printf("\t%s %s,0(%s)\n",fstore(d),frn,crn);
	if (use) {
	    set_freg(t,0); 
	}
    }
    free_register(g);
}


#define COND_BRANCH 1
#define COND_VALUE  2

/* return 1 if boolean expression */

int
drexpr0(int e1, int e2,int l1, int op,int cond,int reg,int mode)
{       
    char *s;
    if (!cond) {
	switch(op) {
	case FOP+GT:
	    return drexpr0(e2,e1,l1,FOP+GE,1,reg,mode);
	case FOP+GE:
	    return drexpr0(e2,e1,l1,FOP+GT,1,reg,mode);
	case FOP+EQ:
	    op=FOP+NEQ; break;
	case FOP+NEQ:
	    op=FOP+EQ; break;
	case DOP+GT:
	    return drexpr0(e2,e1,l1,DOP+GE,1,reg,mode);
	case DOP+GE:
	    return drexpr0(e2,e1,l1,DOP+GT,1,reg,mode);
	case DOP+EQ:
	    op=DOP+NEQ; break;
	case DOP+NEQ:
	    op=DOP+EQ; break;
	default: return 0;
	}
    }
    s = "a";
    int cmp = FCMP;
    switch(op) {
	case DOP+GE:
	    cmp = DCMP;
	case FOP+GE:
	    g_expr(list3(cmp,e1,e2));
	    s = "ae";
	    break;
	case DOP+GT:
	    cmp = DCMP;
	case FOP+GT:
	    g_expr(list3(cmp,e1,e2));
	    break;
	case DOP+EQ:
	    cmp = DCMP;
	case FOP+EQ:
	    s = "e";
	    g_expr(list3(cmp,e1,e2));
	    break;
	case DOP+NEQ:
	    cmp = DCMP;
	case FOP+NEQ:
	    g_expr(list3(cmp,e1,e2));
	    s = "ne";
	    break;
	default:
	    return 0;
    }
    if (mode==COND_BRANCH) {
	printf("\tj%s\t_%d\n",s,l1);
    } else {
	use_data_reg(reg,0);
	printf("\tset%s\t%s\n",s,register_name(reg,1));
	printf("\tmovzbq\t%s,%s\n",
	    register_name(reg,1),register_name(reg,0));
    }
    return 1;
}

int
drexpr(int e1, int e2,int l1, int op,int cond)
{
    drexpr0(e1, e2,l1, op,cond,USE_CREG,COND_BRANCH);
    return l1;
}

static int
drexpr_bool(int e1, int reg)
{
    return drexpr0(cadr(e1), caddr(e1),0, car(e1),1,reg,COND_VALUE);
}


void 
code_dregister(int e2,int freg,int d)
{
    use_float(d,freg);
    if (freg!=e2) {
        if (is_int_reg(e2)) error(-1);
        printf("\ttmovap%s %s,%s\n",d?"d":"s",fregister_name(freg),fregister_name(e2));
    }
}

void
code_cmp_dregister(int e2,int d,int label,int cond)
{
    use_float(d,e2);
#ifdef __APPLE__
	if (regs[REG_EAX]==PTRC_REG)
	    clear_ptr_cache_reg(REG_EAX);
#endif
    int tmp = get_dregister(d);
    char *n = fregister_name(tmp);
    char *c = fregister_name(creg);
    char *sd = d?"d":"s";
    printf("\txorp%s %s,%s\n",sd,n,n); 
    printf("\tucomis%s %s,%s\n",sd,c,n);
    jcond(label,cond);
    free_register(tmp);
}

int emit_dpop(int d)
{ 
    int xreg,reg;
    xreg=pop_fregister();
    if (xreg<= -REG_LVAR_OFFSET) {
        reg = get_dregister(d);
        code_drlvar(REG_LVAR_OFFSET+xreg,d,reg);
        free_lvar(REG_LVAR_OFFSET+xreg);
        xreg=reg;
    }
    return xreg;
}

void emit_dpop_free(int e1,int d)
{ 
    free_register(e1);
}

void
emit_dpush(int d)
{ 
    int new_reg;
    if (!is_float_reg(creg)) error(-1);
    if (freg_sp>MAX_MAX) error(-1);
    new_reg = get_dregister(d);       /* 絶対に取れる */
    freg_stack[freg_sp++] = freg;     /* push するかわりにレジスタを使う */
    creg = freg = new_reg;
}


#endif

void
code_save_stacks()
{
    /* registers stacks are saved in local variable */
    int i,reg;
    for(i=0;i<reg_sp;i++) {
        if ((reg=reg_stack[i])>=0) {
            code_lassign_lvar(
                (reg_stack[i]=new_lvar(SIZE_OF_LONGLONG)),reg); 
            reg_stack[i]= reg_stack[i]-REG_LVAR_OFFSET;
            if (regs[reg]!=REG_VAR) free_register(reg);
        }
    }
#if FLOAT_CODE
    for(i=0;i<freg_sp;i++) {
        if ((reg=freg_stack[i])>=0) {
            code_dassign_lvar(
                (freg_stack[i]=new_lvar(SIZE_OF_DOUBLE)),reg,1); 
            freg_stack[i]= freg_stack[i]-REG_LVAR_OFFSET;
            free_register(reg);
        }
    }
#endif
}

static void
code_clear_stack_reg(int reg1)
{
    /* specified registers stacks are saved in local variable */
    /* temporal registers are saved in local variable */
    int i,reg;
    if (regs[reg1]==PTRC_REG)
        clear_ptr_cache_reg(reg1);

    for(i=0;i<reg_sp;i++) {
        if ((reg=reg_stack[i])>=0 && reg==reg1) {
            code_lassign_lvar(
                (reg_stack[i]=new_lvar(SIZE_OF_LONGLONG)),reg); 
            reg_stack[i]= reg_stack[i]-REG_LVAR_OFFSET;
            if (regs[reg]!=REG_VAR) free_register(reg);
        }
    }
}


#if LONGLONG_CODE


/* 64bit int part 
    In INTEL64 mode, basically save as 32bit operatoin 
 */

int
lrexpr(int e1, int e2,int l1, int op,int cond)
{
    g_expr(list3(LCMP,e1,e2));
    printf("\tj%s\t_%d\n",code_cond(OP(op),cond),l1);
    return l1;
}

int emit_lpop()
{
    return emit_pop(0);
}

void code_lregister(int e2,int reg)
{
    return code_register(e2,reg);
}

void code_cmp_lregister(int reg,int label,int cond)
{
    use_longlong(reg);
    printf("\tcmpq $0,%s\n",register_name(reg,0));
    jcond(label,cond);
}

void code_cmp_lrgvar(int e1,int e2,int label,int cond)
{
    use_longlong(e2);
#ifdef __APPLE__
    int r = get_ptr_cache(ncaddr(e1));
    if (cadr(e1))
        printf("\tcmpq $0,%d(%s)\n",cadr(e1),register_name(r,0));
    else
        printf("\tcmpq $0,(%s)\n",register_name(r,0));
#else
    if (cadr(e1))
        printf("\tcmpq $0,%s+%d\n",(ncaddr(e1))->nm,cadr(e1));
    else
        printf("\tcmpq $0,%s\n",(ncaddr(e1))->nm);
#endif
    jcond(label,cond);
}

void code_cmp_lrlvar(int e1,int e2,int label,int cond)
{
    use_longlong(e2);
    printf("\tcmpq $0,"); lvar(e1); printf("\n");
    jcond(label,cond);
}

void code_lassign(int e1,int e2)
{
    use_longlong(e2);
    printf("\tmovq %s,(%s)\n",register_name(e2,0),register_name(e1,0));
}

void code_lassign_gvar(int e1,int e2)
{
    code_assign_gvar(e1,e2,SIZE_OF_LONGLONG);
}

void code_lassign_lvar(int e1,int e2)
{
    code_assign_lvar(e1,e2,SIZE_OF_LONGLONG);
}

void code_lassign_lregister(int e2,int reg)
{
    code_assign_register(e2,SIZE_OF_LONGLONG,reg);
}

void
code_lconst(int e1,int creg)
{
    use_longlong(creg);
    printf("\tmovq $%lld,%s\n",lcadr(e1),register_name(creg,0));
}

void code_lneg(int e1)
{
    use_longlong(e1);
    printf("\tnegq %s\n",register_name(e1,0));
}

void code_lrgvar(int e1,int e2)
{
    code_crgvar(e1,e2,0,SIZE_OF_LONGLONG);
}

void code_lrlvar(int e1,int e2)
{
    code_crlvar(e1,e2,0,SIZE_OF_LONGLONG);
}

void
ltosop(int op,int reg,int e2)
{
    tosop1(op,reg,e2,SIZE_OF_LONGLONG);
}

int code_lconst_op_p(int op,int e) {
    long long l;
    if (car(e)==CONST) l = cadr(e);
    else if (car(e)==LCONST) l = lcadr(e);
    else return 0;

    switch(op) {
    case LLSHIFT:
    case LULSHIFT:
    case LRSHIFT:
    case LURSHIFT:
	return (0<=l&&l<=63);
    case LMUL:
    case LUMUL:
    case LUDIV:
    case LADD:
    case LSUB:
    /* case LDIV: */
	return -0x10000000LL<l&&l<0x10000000LL && ilog(l);
    case LBAND:
    case LEOR:
    case LBOR:
    default:
	return 0;
    }
}

void loprtc(int op,int reg,int e) {
    oprtc1(op,reg,e,SIZE_OF_LONGLONG);
}

void emit_lpop_free(int e1)
{
    emit_pop_free(e1);
}

void emit_lpush()
{
    emit_push();
}

void code_i2ll(int reg)
{
    int reg0 = USE_CREG;
    // int creg0 = creg;

    use_longlong(reg0);
    use_register(reg0,REG_EAX,1);

    printf("\tcltq\n"); // printf("\tmovslq %s,%s\n", reg, reg);
    set_ireg(REG_EAX,0);
}

void code_i2ull(int reg)
{
    use_longlong(reg);
    code_i2ll(reg);
}

void code_u2ll(int reg)
{
    use_longlong(reg);
    printf("\tmovslq %s,%s\n",register_name(reg,SIZE_OF_INT),register_name(reg,0));
    //printf("\tshlq $32,%s",regisnter_name(reg,0));
    //printf("\tshrq $32,%s",regisnter_name(reg,0));
}

void code_u2ull(int reg)
{
    use_longlong(reg);
    code_u2ll(reg);
}

void code_ll2i(int reg)
{
    use_longlong(reg);
}

void code_ll2u(int reg)
{
    use_longlong(reg);
}

void code_ull2i(int reg)
{
    use_longlong(reg);
}

void code_ull2u(int reg)
{
    use_longlong(reg);
}

#if FLOAT_CODE
void code_d2ll(int reg)
{
    use_float(1,reg);
    char *f = fregister_name(reg);
    set_ireg(get_register(),0);
    printf("\tcvttsd2siq      %s,%s\n",f,register_name(ireg,0));
}

void code_d2ull(int reg)
{
    use_float(1,reg);
    char *f = fregister_name(reg);
    set_ireg(get_register(),0);
    printf("\tcvttsd2siq      %s,%s\n",f,register_name(ireg,0));
}

void code_f2ll(int reg)
{
    use_float(0,reg);
    char *f = fregister_name(reg);
    set_ireg(get_register(),0);
    printf("\tcvttss2siq      %s,%s\n",f,register_name(ireg,0));
}

void code_f2ull(int reg)
{
    use_float(0,reg);
    char *f = fregister_name(reg);
    set_ireg(get_register(),0);
    printf("\tcvttss2siq      %s,%s\n",f,register_name(ireg,0));
}

void code_ll2d(int reg)
{
    use_longlong(reg);
    char *f = register_name(reg,0);
    set_freg(get_dregister(1),0);
    printf("\tcvtsi2sdq      %s,%s\n",f,fregister_name(freg));
}

void code_ll2f(int reg)
{
    use_longlong(reg);
    char *f = register_name(reg,0);
    set_freg(get_dregister(1),0);
    printf("\tcvtsi2ssq      %s,%s\n",f,fregister_name(freg));
}

void code_ull2d(int reg)
{
    code_ll2d(reg);
}

void code_ull2f(int reg)
{
    code_ll2f(reg);
}

#endif


void code_lpreinc(int e1,int e2,int reg)
{
    char *xrn; 
    int dir=caddr(e1);
    if (car(e2)==REGISTER) {
        use_int(reg);
        printf("\taddq $%d,%s\n",dir,register_name(cadr(e2),0));
        if (use)
            printf("\tmovq %s,%s\n",register_name(cadr(e2),0),register_name(reg,0));
        return;
    } 
    g_expr(e2);
    xrn = register_name(creg,0);
    use_int(reg);
    printf("\taddq $%d,(%s)\n",dir,xrn)
;
    if (use)
        printf("\t%s (%s),%s\n",cload(0,SIZE_OF_LONGLONG),xrn,register_name(reg,0));
}

void code_lpostinc(int e1,int e2,int reg)
{
    char *xrn;
    int dir=caddr(e1);
    if (car(e2)==REGISTER) {
        use_int(reg);
        if (use)
            printf("\tmovq %s,%s\n",register_name(cadr(e2),0),register_name(reg,0));
        printf("\taddq $%d,%s\n",dir,register_name(cadr(e2),0));

        return;
    } 
    g_expr(e2);
    emit_push();  
    xrn = register_name((e2=emit_pop(0)),0);
    use_int(reg);
    if (use)
        printf("\t%s (%s),%s\n",cload(0,SIZE_OF_LONGLONG),xrn,register_name(reg,0));
    printf("\taddq $%d,(%s)\n",dir,xrn);
    emit_pop_free(e2);
}

void code_lassop(int op,int reg)
{
    int xreg;
    //  (*pop()) op = creg
    //     creg should be ecx

    use_longlong(reg);
    xreg = emit_pop(0);       /* pop e3 value */
    emit_push();
    ld_indexx(SIZE_OF_LONGLONG,0,reg,ireg,0);
    ltosop(op,ireg,xreg);
    emit_pop_free(xreg);
    xreg = emit_pop(0);       /* pop e3 value */
    printf("\t%s %s,(%s)\n",move(SIZE_OF_LONGLONG),register_name(ireg,0),register_name(xreg,0))
;
    emit_pop_free(xreg);
}

void
code_register_lassop(int reg,int op) {
    use_longlong(reg);
    int xreg = emit_lpop();
    ltosop(op,reg,xreg);
    emit_lpop_free(xreg);
}


#endif





#if CASE_CODE

int
code_table_jump_p(int delta) { return 1; }

void
code_table_jump(int l,int csvalue,int delta,int max,int min,int dlabel)
{
    char *crn;
    // use_register(creg,csvalue,0);
    set_ireg(csvalue,0);
    crn = register_name(creg,0);
    char *crnl = register_name(creg,SIZE_OF_INT);
    printf("\tsubl\t$%d,%s\n",min,crnl);
    printf("\tcmpl\t$%d,%s\n",max-min,crnl);
    printf("\tja\t_%d\n",dlabel);
    if (delta==1)  {
#ifdef __APPLE__
	printf("\tleaq\t_%d(%%rip),%%rbx\n",l);
	printf("\tmovslq\t(%%rbx,%s,4),%s\n",crn,crn);
	printf("\taddq\t%%rbx,%s\n",crn);
	printf("\tjmp\t*%s\n",crn);
#else
	printf("\tjmp\t*_%d(,%s,8)\n",l,crn);
#endif
	return;
    }
#ifdef __APPLE__
	if (regs[REG_EAX]==PTRC_REG)
	    clear_ptr_cache_reg(REG_EAX);
#endif
    use_register(creg,REG_EAX,1);
    crn = "%rax";
    
    switch(delta) {
    case 2:
	printf("\tmovl\t$1,%%edx\n");
	printf("\tandl\t%%eax,%%edx\n");
	printf("\tjne\t_%d\n",dlabel);
#ifdef __APPLE__
	printf("\tleaq\t_%d(%%rip),%%rbx\n",l);
	printf("\tmovslq\t(%%rbx,%s,2),%s\n",crn,crn);
	printf("\taddq\t%%rbx,%s\n",crn);
	printf("\tjmp\t*%s\n",crn);
#else
	printf("\tjmp\t*_%d(,%%eax,2)\n",l); 
#endif
	break;
    case 4:
	printf("\tmovl\t$3,%%edx\n");
	printf("\tandl\t%%eax,%%edx\n");
	printf("\tjne\t_%d\n",dlabel);
#ifdef __APPLE__
	printf("\tleaq\t_%d(%%rip),%%rbx\n",l);
	printf("\tmovslq\t(%%rbx,%s),%s\n",crn,crn);
	printf("\taddq\t%%rbx,%s\n",crn);
	printf("\tjmp\t*%s\n",crn);
#else
	printf("\tjmp\t*_%d(%%eax)\n",l); 
#endif
	break;
    default:
	printf("\tmovl $%d,%%ecx\n",delta);
	printf("\txor %%edx,%%edx\n\tdivl %%ecx\n");
	printf("\tandl\t%%edx,%%edx\n");
	printf("\tjne\t_%d\n",dlabel);
#ifdef __APPLE__
	printf("\tleaq\t_%d(%%rip),%%rbx\n",l);
	printf("\tmovslq\t(%%rbx,%s,4),%s\n",crn,crn);
	printf("\taddq\t%%rbx,%s\n",crn);
	printf("\tjmp\t*%s\n",crn);
#else
	printf("\tjmp\t*_%d(,%%rax,4)\n",l); 
#endif
	break;
    }
    
}

void
code_table_open(int l)
{
    output_mode=DATA_EMIT_MODE;
#ifdef __APPLE__
    printf(" \t.align 2\n");
#else
    printf(" \t.section\t.rodata\n\t.align 4\n");
#endif
    fwddef(l);
}

void
code_table_value(int label,int table_top)
{
#ifdef __APPLE__
    printf("\t.long _%d-_%d\n",label,table_top);
#else
    printf("\t.long _%d\n",label);
#endif
}

void
code_table_close()
{
    text_mode(0);
}

#endif


#if ASM_CODE

/*
    print an operand  
 */

static void
emit_asm_operand(int rstr)
{
    if (car(rstr)==REGISTER) {
	printf("%s",register_name(cadr(rstr),0));
    } else if (car(rstr)==CONST) {
	printf("%d",cadr(rstr));
    } else if (car(rstr)==FNAME) {
#ifdef __APPLE__
	printf("_%s(%%rip)",ncaddr(rstr)->nm);
#else
	printf("%s",ncaddr(rstr)->nm);
#endif
    } else if (car(rstr)==LABEL) {
#ifdef __APPLE__
	printf("_%d(%%rip)",cadr(rstr));
#else
	printf("_%d",cadr(rstr));
#endif
    } else {
	error(-1);
    }
}

/*
     prepare asm operands

     char *constraints string
     int  operand expr
     int  mode          (ASM_INPUT,ASM_OUTPUT)
     int  replacement list
     int  output operands count
     int  output operands replacement list

     retrun replacement list
        list3( operands, next, clobber )
                               0    can be shared in input/output
                               1    can't be used in input
 */

int
code_asm_operand(char *p,int e1,int mode,int repl,int n,int repl0)
{
    int r;
    int c;
    int val;
    int clobber = 0;

    printf("## constraint %s\n",p);
    if (*p=='=') {
	// output register
	p++;
    }
    if (*p=='&') {
	// earlyclobber
	p++;
	clobber = 1;
    }
    c = *p;
    if (c=='r') {
	if (mode==ASM_INPUT) {
	    for(;repl0;repl0 = cadr(repl0)) {
		if (car(car(repl0))==REGISTER && caddr(repl0)==0) {
		    r = cadr(car(repl0));
		    caddr(repl0) = ASM_USED;
		    break;
		}
            }  
	    r = get_register();
	} else {
	    r = get_register();
	}
	repl = list3(list2(REGISTER,r),repl,clobber);
    } else if (c=='m') {
	repl = list3(list2(0,0),repl,clobber);
    } else if (c=='i') {
	if (car(e1)==GVAR) {
	    e1=list3n(FNAME,0,ncaddr(e1));
	} else if (car(e1)==FNAME) {
	    e1=list3n(FNAME,0,ncaddr(e1));
	} else if (car(e1)==STRING) {
	    val = emit_string_label();
	    ascii(ncaddr(e1)->nm);
	    e1=list2(LABEL,val);
	} else if (car(e1)==CONST) {
	} else error(-1);
	repl = list3(e1,repl,clobber);
    } else if (digit(c)) {
	val = 0;
	do { val = val*10 + c-'0'; } while (digit(c=*p++));
	if (val>MAX_ASM_REG) error(-1); // too large register
	if (n-val<0) error(-1);
	repl = list3(car(nth(n-val-1,repl0)),repl,clobber);
    } else error(-1);
    return repl;
}

void
code_free_asm_operand(int repl)
{
    int reg;
    for(;repl;repl=cadr(repl)) {
	if (car(car(repl))==REGISTER) {
	    reg = cadr(car(repl));
	    if (regs[reg]!=REG_VAR) free_register(reg);
	}
    }
}


extern void
code_asm(char *asm_str,int repl)
{
    int c,i,rstr,val;
    char *p;
    int reg[MAX_ASM_REG];

    text_mode(0);
    c = *asm_str;
    if (c!='\t'&&c!=' ') printf("\t");
    for(i=0;repl && i<MAX_ASM_REG;i++) {
	reg[i] = car(repl);
	repl = cadr(repl);
    }
    p = asm_str;
    while((c = *p++)) {
	if (c=='%') {
	    c = *p++;
	    if (!c) { break;
	    } else if (c=='%') {
		printf("%%"); continue;
	    } else if (!digit(c)) {
		printf("%%%c",c); continue;
	    }
	    val = 0;
	    do { val = val*10 + c-'0'; } while (digit(c=*p++)) ;
	    p--;
	    if (val>MAX_ASM_REG) error(-1); // too large register
	    rstr = reg[val];
	    emit_asm_operand(rstr);
	} else {
	    printf("%c",c);
	}
    }
    printf("\n");
}

#endif


#if BIT_FIELD_CODE

/* bit field alignment calcuration */

static void
set_bitsz(int type,int *pbitpos, int *pbitsize,
	int *psign,int *pbitsz,int *palign,int *pl)
{ 
    int sign=0,bitsz=1; 
    int align=4,l=0;
    *pbitpos = cadr(caddr(type));
    int bitsize = *pbitsize = caddr(caddr(type));

    switch(cadr(type)) { 
    case INT:		sign=1; bitsz=32; align=4;break; 
    case UNSIGNED:		bitsz=32; align=4;break; 
    case CHAR:          sign=1; bitsz= 8; align=1;break; 
    case UCHAR: 		bitsz= 8; align=1;break; 
    case SHORT:         sign=1; bitsz=16; align=2;break; 
    case USHORT:        sign=1; bitsz=16; align=2;break; 
    case LONGLONG:      sign=1; bitsz=64; align=bitsize>32?8:4;l=1; break; 
    case ULONGLONG:            	bitsz=64; align=bitsize>32?8:4;l=1; break; 
    default: error(-1);
    }
    *psign = sign;
    *pbitsz = bitsz;
    *palign = align;
    *pl = l;
}

/*
      bit field alignment calcuration
        this is architecture depenedent
 */

extern int
code_bit_field_disp(int type,int *poffset,int *bfd,int *sz)
{
    int sign,bitsz,align;
    int i;
    int bitpos = *bfd;
    int bitpos0;
    int bitsize;
    int offset = *poffset;
    int l;
    set_bitsz(type,&bitpos0,&bitsize,&sign,&bitsz,&align,&l);

    if (bitsize>bitsz) { error(BTERR); bitsize = bitsz; }

    /* bfd means previous bit field bit offset */
    if (bitpos) {
	/* previous field is bit field and spaces may remain */
	/* calc previsous offset */

	i= offset-(bitpos+7)/8;

	for(l = bitpos;l>0;l -= 8,i++) {
	    if ((i & (align-1))==0 && l+bitsize <= bitsz) {
		/* alignment is correct and space remains */
		*poffset=offset=i;
		i = l+bitsize;
                *bfd = (i==bitsz)?0:i;
		*sz = (i+7)/8;
// printf("## bitpos=%d bitsize=%d bitsz=%d offset=%d\n",l,bitsize,bitsz,*poffset);
		return l;
	    } 
	}
    }

    /* first bit-field */

    if ((i=(offset & (align-1)))) {
	*poffset = (offset += (align-i));
    }
    bitpos = 0;
    *bfd = (bitsize==bitsz)?0:bitsize;
    *sz = (bitsize+7)/8;

// printf("## bitpos=%d bitsize=%d bitsz=%d offset=%d\n",bitpos,bitsize,bitsz,*poffset);
    return bitpos;
}

/* bit field value */

extern void
code_bit_field(int type,int adr,int reg)
{
    int sign,bitsz,l,align;
    int bitsize,bitpos;
    int i,size;
    set_bitsz(type,&bitpos,&bitsize,&sign,&bitsz,&align,&l);
    size=bitsz/8;
// printf("## %d: bitpos=%d bitsize=%d bitsz=%d\n",lineno,bitpos,bitsize,bitsz);
    /* this implementation returns -1 for int i:1; */
    if (l==1) {
#if LONGLONG_CODE
	use_int(adr);
	use_longlong(reg);
	lload(adr,0,reg);
	/* shift left */
	if ((i=bitsz-bitsize-bitpos)) 
	    loprtc(LLSHIFT,reg,list2(CONST,i));
	/* shift right */
	if ((i=bitsz-bitsize)) 
	    loprtc(sign?LRSHIFT:LURSHIFT,reg,list2(CONST,i));
#endif
    } else {
	use_int(adr);
	use_int(reg);
	printf("\t%s %d(%s),%s\n",cload(sign,size),0,register_name(adr,regu(sign,size,0)),
		    register_name(reg,0));
	/* shift left */
	if ((i=32-bitsize-bitpos)) 
	    oprtc(LSHIFT,reg,list2(CONST,i));
	/* shift right */
	if ((i=32-bitsize)) 
	    oprtc(sign?RSHIFT:URSHIFT,reg,list2(CONST,i));
    }
}

/* bit field replacement */

static void
make_mask_and_or(int mask,int reg, int dest)
{
printf("## mask 0x%08x ~0x%08x\n",mask,~mask);
	printf("\tpushq %s\n",register_name(reg,0));
	/* make and-mask  */
	oprtc(BOR,reg,list2(CONST,~mask));
	/* do conjunction  */
	printf("\tandl %s,%s\n",register_name(reg,SIZE_OF_INT),register_name(dest,SIZE_OF_INT));
	/* make or-mask  */
	printf("\tpopq %s\n",register_name(reg,0));
	oprtc(BAND,reg,list2(CONST,mask));
	/* do disjunction  */
	printf("\torl %s,%s\n",register_name(reg,SIZE_OF_INT),register_name(dest,SIZE_OF_INT));
	printf("\txchg %s,%s\n",register_name(reg,SIZE_OF_INT),register_name(dest,SIZE_OF_INT));
}

static void
make_mask_and_or_l(long mask,int reg, int dest)
{
        printf("## mask 0x%08lx ~0x%08lx\n",mask,~mask);
	printf("\tpushq %s\n",register_name(reg,0));
	/* make and-mask  */
	loprtc(BOR,reg,llist2(LCONST,~mask));
	/* do conjunction  */
	printf("\tandq %s,%s\n",register_name(reg,0),register_name(dest,0));
	/* make or-mask  */
	printf("\tpopq %s\n",register_name(reg,0));
	loprtc(BAND,reg,llist2(LCONST,mask));
	/* do disjunction  */
	printf("\torq %s,%s\n",register_name(reg,0),register_name(dest,0));
	printf("\txchgq %s,%s\n",register_name(reg,0),register_name(dest,0));
}

extern void
code_bit_replace(int adr,int value,int type)
{
    int sign,bitsz,l,align;
    int bitsize,bitpos;
    long mask = 0;
    int size;
    set_bitsz(type,&bitpos,&bitsize,&sign,&bitsz,&align,&l);
    size = bitsz/8;
// printf("## %d: bitpos=%d bitsize=%d bitsz=%d\n",lineno,bitpos,bitsize,bitsz);
    if (l) {
#if LONGLONG_CODE
	// use_int(adr);
	use_longlong(value);
	/* shift left */
	if (bitpos) 
	    loprtc(LLSHIFT,value,list2(CONST,bitpos));
	/* make and-mask */
	printf("\tpushq %s\n",register_name(adr,0)); 
	printf("\t%s (%s),%s\n",cload(sign,size),register_name(adr,0),
		    register_name(adr,regu(sign,size,0)));
	mask = make_mask_64(64-bitpos-bitsize,63-bitpos);
	make_mask_and_or_l(mask,value, adr);
	set_lreg(value,0);
	printf("\tpopq %s\n",register_name(adr,0));
	printf("\t%s %s,(%s)\n",move(0),
               register_name(value,0), register_name(adr,0));
#endif
    } else {
	// use_int(adr);
	use_int(value);
	printf("\tpushq %s\n",register_name(adr,0));
	printf("\t%s %d(%s),%s\n",cload(sign,size),0,register_name(adr,0),
		    register_name(adr,regu(sign,size,0)));
	/* shift left */
	if (bitpos) 
	    oprtc(LSHIFT,value,list2(CONST,bitpos));
	/* make and-mask */
	mask = make_mask(32-bitpos-bitsize,31-bitpos);
	make_mask_and_or(mask,value, adr);
	set_ireg(value,0);
	printf("\tpopq %s\n",register_name(adr,0));
        code_assign(adr,size==4?0:size,value);
    }
    if (use) {
	code_bit_field(type,adr,USE_CREG);
    }
}


static void
make_mask_and_or_const(long mask,int reg,long c)
{
    long a;
    int t = 0;
    char *trn;
// printf("## mask 0x%08x ~0x%08x\n",mask,~mask);
    a = ~mask|c;
    if (a!=-1) {
	/* do conjunction  */
      if (is_data_reg(reg) && ((a& ~0xffff)==~0xffff)) {
	    if ((a& ~0xff)==~0xff)
		printf("\tandb $%ld,%s\n",a&0xff,register_name(reg,1));
	    else
		printf("\tandw $%ld,%s\n",a&0xffff,register_name(reg,2));
	} else {
	    t = get_register(); trn = register_name(t,0);
	    printf("\tmovq $%ld,%s\n",a,trn);
	    printf("\tandq %s,%s\n",trn,register_name(reg,0));
	}
    }
    /* make or-mask  */
    c = mask&c;
    if (c!=0) {
	/* do disjunction  */
      if (is_data_reg(reg) && (!(c& ~0xffff))) {
	    if (!(c& ~0xff))
		printf("\torb $%ld,%s\n",c&0xff,register_name(reg,1));
	    else
		printf("\torw $%ld,%s\n",c&0xffff,register_name(reg,2));
	} else {
	    if (!t) {
		t = get_register(); trn = register_name(t,0);
	    }
	    if (a!=c)
		printf("\tmovq $%ld,%s\n",c,trn);
	    printf("\torq %s,%s\n",trn,register_name(reg,0));
	}
    }
    free_register(t);
}

extern void
code_bit_replace_const(int value,int adr,int type)
{
    int sign,bitsz,l,align;
    int bitpos,bitsize,size;
    long mask = 0;
    long c,lvalue;
#if LONGLONG_CODE
    long long lc;
#endif
    set_bitsz(type,&bitpos,&bitsize,&sign,&bitsz,&align,&l);
    size = bitsz/8;
// printf("## %d: bitpos=%d bitsize=%d bitsz=%d\n",lineno,bitpos,bitsize,bitsz);
    use_int(adr);
    if (l) {
#if LONGLONG_CODE
	lvalue = get_register();
	/* shift left */
	lc = lcadr(value);
	lc <<= bitpos;
	
	printf("\t%s (%s),%s\n",cload(sign,size),register_name(adr,regu(sign,size,0)),
		    register_name(lvalue,0));
	/* make and-mask upper */
	mask = make_mask_64(64-bitpos-bitsize,63-bitpos);
	make_mask_and_or_const(mask,lvalue,lc);
	set_lreg(lvalue,0);
	printf("\t%s %s,(%s)\n",move(0),register_name(lvalue,0),
		    register_name(adr,0));
	free_register(lvalue);
#endif
    } else {
	lvalue = get_register();
	printf("\t%s %d(%s),%s\n",cload(sign,size),0,register_name(adr,regu(sign,size,0)),
		    register_name(lvalue,0));
	/* shift left */
	c = cadr(value);
	c <<= bitpos;
	/* make and-mask */
	mask = make_mask(32-bitpos-bitsize,31-bitpos);
	make_mask_and_or_const(mask,lvalue,c);
	set_ireg(lvalue,0);
        code_assign(adr,size,lvalue);
	free_register(lvalue);
    }
    if (use) 
        code_bit_field(type,adr,USE_CREG);
}

#endif

int
not_simple_p(int e3)
{
    switch(e3) {
        case FUNCTION: case CONV: case STASS: case ALLOCA:
        case LDIV: case LUDIV: case LMOD: case LUMOD:
        case DIV: case UDIV: case MOD: case UMOD:
        //case LMUL: case LUMUL:
        //case LLSHIFT: case LULSHIFT: case LRSHIFT: case LURSHIFT:
        //case DDIV: case DADD: case DSUB: case DMUL: case DMINUS:
        case DPOSTINC : case DPREINC : case DASSOP :
        //case DOP+LT : case DOP+LE : case DOP+GT : case DOP+GE :
        //case DOP+EQ : case DOP+NEQ:
        case RBIT_FIELD: case BASS: case BASSOP: case LCALL:
	case INLINE:
        return 1;
    }
    return 0;
}

extern int 
code_arg_alignment(int args,NMTBL *n, int type0,int sz, int is_code)
{
    if(type0==CHAR||type0==UCHAR) {
        if (n->dsp==0) {
            n->dsp = is_code?-args-size_of_longlong:args;
            if (endian) n->dsp += size_of_longlong-1;
        }
        args += size_of_longlong;
    } else if(type0==SHORT||type0==USHORT) {
        if (n->dsp==0) {
            n->dsp = is_code?-args-size_of_longlong:args;
            if (endian) n->dsp += size_of_longlong-size_of_short;
        }
        args += size_of_longlong;
    } else if(type0>0&&(car(type0)==UNION||car(type0)==STRUCT)) {
        /* alignment in struct in argument */
        /* should be GCD of member alignment */
        /* __attribute(alignment(16)) is ignored in argments */
        int asz =  align(sz,size_of_longlong);
        n->dsp = is_code?-args-asz:args;
        args += asz;
    } else {
        /* if (n->dsp==0) (argument list in ADECL is useless, type
           list can be found in type ) */
        n->dsp = is_code?-args-size_of_longlong:args;
        args += align(sz,size_of_longlong);
    }
    return args;
}


extern int 
code_lvar_alignment(int args,NMTBL *n, int type0,int sz)
{
    return code_lvar_alignment0(args,n, type0,sz);
}


/* end */