view src/mast/compiler.c @ 64:da6d6597bd69 default tip

rollback
author anatofuz <anatofuz@cr.ie.u-ryukyu.ac.jp>
date Fri, 15 Feb 2019 20:51:54 +0900
parents 2cf249471370
children
line wrap: on
line source

#include "moar.h"
#include "nodes.h"

/* Some constants. */
#define HEADER_SIZE                 92
#define BYTECODE_VERSION            5
#define FRAME_HEADER_SIZE           (11 * 4 + 3 * 2)
#define FRAME_HANDLER_SIZE          (4 * 4 + 2 * 2)
#define FRAME_SLV_SIZE              (2 * 2 + 2 * 4)
#define SC_DEP_SIZE                 4
#define EXTOP_SIZE                  (4 + 8)
#define SCDEP_HEADER_OFFSET         12
#define EXTOP_HEADER_OFFSET         20
#define FRAME_HEADER_OFFSET         28
#define CALLSITE_HEADER_OFFSET      36
#define STRING_HEADER_OFFSET        44
#define SCDATA_HEADER_OFFSET        52
#define BYTECODE_HEADER_OFFSET      60
#define ANNOTATION_HEADER_OFFSET    68
#define HLL_NAME_HEADER_OFFSET      76
#define SPECIAL_FRAME_HEADER_OFFSET 80
#define EXTOP_BASE                  1024

/* Frame flags. */
#define FRAME_FLAG_EXIT_HANDLER     1
#define FRAME_FLAG_IS_THUNK         2
#define FRAME_FLAG_HAS_CODE_OBJ     4
#define FRAME_FLAG_NO_INLINE        8
#define FRAME_FLAG_HAS_INDEX        32768
#define FRAME_FLAG_HAS_SLV          65536

typedef struct {
    /* callsite ID */
    unsigned short callsite_id;

    /* the identifier for the callsite, to clean up later */
    unsigned char *identifier;

    /* the uthash hash handle. */
    UT_hash_handle hash_handle;
} CallsiteReuseEntry;

/* Information about a handler. */
typedef struct {
    /* Offset of start of protected region from frame start. */
    unsigned int start_offset;

    /* Offset of end of protected region, exclusive, from frame start. */
    unsigned int end_offset;

    /* Exception categry mask. */
    unsigned int category_mask;

    /* Handler action. */
    unsigned short action;

    /* Local holding block to invoke, if invokey handler. */
    unsigned short local;

    /* Label, which will need resolving. */
    MASTNode *label;

    /* Local holding a label in case we have a labeled loop. */
    unsigned short label_reg;
} FrameHandler;

/* Handler actions. */
#define HANDLER_UNWIND_GOTO      0
#define HANDLER_UNWIND_GOTO_OBJ  1
#define HANDLER_INVOKE           2

/* Information about a label. */
typedef struct {
    MAST_Label *label;
    MVMint32    offset;          /* Negative if unknown. */
    MVMuint16   num_resolve;
    MVMuint16   alloc_resolve;
    MVMuint32  *resolve;
} LabelInfo;

/* Describes the state for the frame we're currently compiling. */
typedef struct {
    /* Position of start of bytecode. */
    unsigned int bytecode_start;

    /* Position of start of frame entry. */
    unsigned int frame_start;

    /* Types of locals and lexicals, with counts. */
    unsigned short *local_types;
    unsigned short *lexical_types;
    unsigned int num_locals;
    unsigned int num_lexicals;

    /* Number of annotations. */
    unsigned int num_annotations;

    /* Handlers count and list. */
    unsigned int num_handlers;
    FrameHandler *handlers;

    /* Labels we have so far (either through finding them or finding a need
     * to fix them up). */
    LabelInfo *labels;
    unsigned int num_labels;
    unsigned int alloc_labels;

    /* Number of unresolved labels. */
    unsigned int unresolved_labels;
} FrameState;

/* Describes the current writer state for the compilation unit as a whole. */
typedef struct {
    /* The set of node types. */
    MASTNodeTypes *types;

    /* The current frame and frame count. */
    FrameState   *cur_frame;
    unsigned int  num_frames;

    /* String heap and seen hash mapping known strings to indexes. */
    MASTNode *strings;
    MASTNode *seen_strings;

    /* The SC dependencies segment; we know the size up front. */
    char         *scdep_seg;
    unsigned int  scdep_bytes;

    /* The extension ops segment; we know the size ahead of time. */
    char         *extops_seg;
    unsigned int  extops_bytes;
    unsigned int  num_extops;

    /* The frame segment. */
    char         *frame_seg;
    unsigned int  frame_pos;
    unsigned int  frame_alloc;

    /* The callsite segment and number of callsites. */
    char         *callsite_seg;
    unsigned int  callsite_pos;
    unsigned int  callsite_alloc;
    unsigned int  num_callsites;

    /* The bytecode segment. */
    char         *bytecode_seg;
    unsigned int  bytecode_pos;
    unsigned int  bytecode_alloc;

    /* The annotation segment. */
    char         *annotation_seg;
    unsigned int  annotation_pos;
    unsigned int  annotation_alloc;

    /* Current instruction info */
    const MVMOpInfo    *current_op_info;

    /* Zero-based index of current frame */
    unsigned int  current_frame_idx;

    /* Zero-based index of MAST instructions */
    unsigned int  current_ins_idx;

    /* Zero-based index of current operand */
    unsigned int  current_operand_idx;

    /* The compilation unit we're compiling. */
    MAST_CompUnit *cu;

    /* Hash for callsite descriptor strings to callsite IDs */
    CallsiteReuseEntry *callsite_reuse_head;

    /* Last Annotated node, for error reporting */
    MAST_Annotated *last_annotated;
} WriterState;

static unsigned int umax(unsigned int a, unsigned int b);
static void memcpy_endian(char *dest, const void *src, size_t size);
static void write_int64(char *buffer, size_t offset, unsigned long long value);
static void write_int32(char *buffer, size_t offset, unsigned int value);
static void write_int16(char *buffer, size_t offset, unsigned short value);
static void write_int8(char *buffer, size_t offset, unsigned char value);
static void write_double(char *buffer, size_t offset, double value);
static void ensure_space(VM, char **buffer, unsigned int *alloc, unsigned int pos, unsigned int need);
static void cleanup_frame(VM, FrameState *fs);
static void cleanup_all(VM, WriterState *ws);
static unsigned int get_string_heap_index(VM, WriterState *ws, VMSTR *strval);
static unsigned short get_frame_index(VM, WriterState *ws, MASTNode *frame);
static unsigned short type_to_local_type(VM, WriterState *ws, MASTNode *type);
static void compile_operand(VM, WriterState *ws, unsigned char op_flags, MASTNode *operand);
static unsigned short get_callsite_id(VM, WriterState *ws, MASTNode *flags, MASTNode *args);
static void compile_instruction(VM, WriterState *ws, MASTNode *node);
static void compile_frame(VM, WriterState *ws, MASTNode *node, unsigned short idx);
static char * form_string_heap(VM, WriterState *ws, unsigned int *string_heap_size);
static char * form_bytecode_output(VM, WriterState *ws, unsigned int *bytecode_size);
char * MVM_mast_compile(VM, MASTNode *node, MASTNodeTypes *types, unsigned int *size);

static unsigned int umax(unsigned int a, unsigned int b) {
    return a > b ? a : b;
}

/* copies memory dependent on endianness */
static void memcpy_endian(char *dest, const void *src, size_t size) {
#ifdef MVM_BIGENDIAN
    size_t i;
    char *srcbytes = (char *)src;
    for (i = 0; i < size; i++)
        dest[size - i - 1] = srcbytes[i];
#else
    memcpy(dest, src, size);
#endif
}

/* Writes an int64 into a buffer. */
static void write_int64(char *buffer, size_t offset, unsigned long long value) {
    memcpy_endian(buffer + offset, &value, 8);
}

/* Writes an int32 into a buffer. */
static void write_int32(char *buffer, size_t offset, unsigned int value) {
    memcpy_endian(buffer + offset, &value, 4);
}

/* Writes an int16 into a buffer. */
static void write_int16(char *buffer, size_t offset, unsigned short value) {
    memcpy_endian(buffer + offset, &value, 2);
}

/* Writes an int8 into a buffer. */
static void write_int8(char *buffer, size_t offset, unsigned char value) {
    memcpy(buffer + offset, &value, 1);
}

/* Writes an double into a buffer. */
static void write_double(char *buffer, size_t offset, double value) {
    memcpy_endian(buffer + offset, &value, 8);
}

/* Ensures the specified buffer has enough space and expands it if so. */
static void ensure_space(VM, char **buffer, unsigned int *alloc, unsigned int pos, unsigned int need) {
    if (pos + need > *alloc) {
        do { *alloc = *alloc * 2; } while (pos + need > *alloc);
        *buffer = (char *)MVM_realloc(*buffer, *alloc);
    }
}

/* Cleans up all allocated memory related to a frame. */
static void cleanup_frame(VM, FrameState *fs) {
    if (fs->local_types)
        MVM_free(fs->local_types);
    if (fs->lexical_types)
        MVM_free(fs->lexical_types);
    if (fs->handlers)
        MVM_free(fs->handlers);
    if (fs->labels) {
        MVMuint32 i;
        for (i = 0; i < fs->num_labels; i++)
            if (fs->labels[i].alloc_resolve)
                MVM_free(fs->labels[i].resolve);
        MVM_free(fs->labels);
    }
    MVM_free(fs);
}

/* Cleans up all allocated memory related to this compilation. */
static void cleanup_all(VM, WriterState *ws) {
    CallsiteReuseEntry *current, *tmp;
    unsigned bucket_tmp;
    if (ws->cur_frame)
        cleanup_frame(vm, ws->cur_frame);
    if (ws->scdep_seg)
        MVM_free(ws->scdep_seg);
    if (ws->extops_seg)
        MVM_free(ws->extops_seg);
    if (ws->frame_seg)
        MVM_free(ws->frame_seg);
    if (ws->callsite_seg)
        MVM_free(ws->callsite_seg);
    if (ws->bytecode_seg)
        MVM_free(ws->bytecode_seg);
    if (ws->annotation_seg)
        MVM_free(ws->annotation_seg);
    HASH_ITER(hash_handle, ws->callsite_reuse_head, current, tmp, bucket_tmp) {
        MVM_free(current->identifier);
    }
    MVM_HASH_DESTROY(hash_handle, CallsiteReuseEntry, ws->callsite_reuse_head);
    MVM_free(ws);
}

/* Gets the index of a string already in the string heap, or
 * adds it to the heap if it's not already there. */
static unsigned int get_string_heap_index(VM, WriterState *ws, VMSTR *strval) {
    if (EXISTSKEY(vm, ws->seen_strings, strval)) {
        return (unsigned int)ATKEY_I(vm, ws->seen_strings, strval);
    }
    else {
        unsigned int index = (unsigned int)ELEMS(vm, ws->strings);
        if (index >= 0x7FFFFFFF) {
            cleanup_all(vm, ws);
            DIE(vm, "Too many strings in compilation unit");
        }
        BINDPOS_S(vm, ws->strings, index, strval);
        BINDKEY_I(vm, ws->seen_strings, strval, index);
        return index;
    }
}

/* Locates the index of a frame. */
static unsigned short get_frame_index(VM, WriterState *ws, MASTNode *frame) {
    if (((MAST_Frame *)frame)->flags & FRAME_FLAG_HAS_INDEX) {
        return (short)((MAST_Frame *)frame)->index;
    }
    else {
        int num_frames = ELEMS(vm, ws->cu->frames);
        unsigned short i;
        for (i = 0; i < num_frames; i++)
            if (ATPOS(vm, ws->cu->frames, i) == frame)
                return i;
        cleanup_all(vm, ws);
        DIE(vm, "MAST::Frame passed for code ref not found in compilation unit");
    }
}

/* Takes a 6model object type and turns it into a local/lexical type flag. */
static unsigned short type_to_local_type(VM, WriterState *ws, MASTNode *type) {
    const MVMStorageSpec *ss;
    if (VM_OBJ_IS_NULL(type))
        return MVM_reg_obj;
    ss = REPR(type)->get_storage_spec(vm, STABLE(type));
    if (ss->inlineable) {
        switch (ss->boxed_primitive) {
            case MVM_STORAGE_SPEC_BP_INT:
                if (ss->is_unsigned) {
                    switch (ss->bits) {
                        case 8:
                            return MVM_reg_uint8;
                        case 16:
                            return MVM_reg_uint16;
                        case 32:
                            return MVM_reg_uint32;
                        case 64:
                            return MVM_reg_uint64;
                        default:
                            cleanup_all(vm, ws);
                            DIE(vm, "Invalid int size for local/lexical");
                    }
                }
                else {
                    switch (ss->bits) {
                        case 8:
                            return MVM_reg_int8;
                        case 16:
                            return MVM_reg_int16;
                        case 32:
                            return MVM_reg_int32;
                        case 64:
                            return MVM_reg_int64;
                        default:
                            cleanup_all(vm, ws);
                            DIE(vm, "Invalid int size for local/lexical");
                    }
                }
                break;
            case MVM_STORAGE_SPEC_BP_NUM:
                switch (ss->bits) {
                    case 32:
                        return MVM_reg_num32;
                    case 64:
                        return MVM_reg_num64;
                    default:
                        cleanup_all(vm, ws);
                        DIE(vm, "Invalid num size for local/lexical");
                }
                break;
            case MVM_STORAGE_SPEC_BP_STR:
                return MVM_reg_str;
            default:
                cleanup_all(vm, ws);
                DIE(vm, "Type used for local/lexical has invalid boxed primitive in storage spec");
        }
    }
    else {
        return MVM_reg_obj;
    }
}

/* Grows label storage. */
static void add_label(VM, FrameState *fs, MAST_Label *l, MVMint32 offset) {
    if (fs->num_labels == fs->alloc_labels) {
        if (fs->alloc_labels)
            fs->alloc_labels *= 2;
        else
            fs->alloc_labels = 8;
        fs->labels = MVM_realloc(fs->labels, fs->alloc_labels * sizeof(LabelInfo));
    }
    fs->labels[fs->num_labels].label         = l;
    fs->labels[fs->num_labels].offset        = offset;
    fs->labels[fs->num_labels].resolve       = NULL;
    fs->labels[fs->num_labels].num_resolve   = 0;
    fs->labels[fs->num_labels].alloc_resolve = 0;
    fs->num_labels++;
}

/* Takes a label and either writes its offset if we already saw it, or writes
 * a zero and records that a fixups is needed. */
static void write_label_or_add_fixup(VM, WriterState *ws, MAST_Label *l) {
    FrameState *fs   = ws->cur_frame;
    LabelInfo  *info = NULL;
    MVMuint32   i;

    /* Ensure we've space to write an offset. */
    ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 4);

    /* Look for the label. */
    for (i = 0; i < fs->num_labels; i++) {
        if (fs->labels[i].label == l) {
            /* Found it. If we know its offset, write and we're done. */
            MVMint32 offset = fs->labels[i].offset;
            if (offset >= 0) {
                write_int32(ws->bytecode_seg, ws->bytecode_pos, offset);
                ws->bytecode_pos += 4;
                return;
            }

            /* Otherwise, note this label to add the resolve need to. */
            info = &(fs->labels[i]);
            break;
        }
    }

    /* If we don't have an entry for this label yet, add it. */
    if (!info) {
        add_label(vm, fs, l, -1);
        info = &(fs->labels[fs->num_labels - 1]);
    }
    if (info->num_resolve == info->alloc_resolve) {
        if (info->alloc_resolve)
            info->alloc_resolve *= 2;
        else
            info->alloc_resolve = 8;
        info->resolve = MVM_realloc(info->resolve, info->alloc_resolve * sizeof(MVMuint32));
    }
    info->resolve[info->num_resolve] = ws->bytecode_pos;
    info->num_resolve++;
    fs->unresolved_labels++;

    /* Write zero, to be fixed up later. */
    write_int32(ws->bytecode_seg, ws->bytecode_pos, 0);
    ws->bytecode_pos += 4;
}

/* Takes a label, and either adds it to the labels collection or, if it's been
 * seen already, resolves its fixups. */
static void add_label_and_resolve_fixups(VM, WriterState *ws, MAST_Label *l) {
    FrameState *fs     = ws->cur_frame;
    MVMuint32   offset = ws->bytecode_pos - ws->cur_frame->bytecode_start;
    MVMuint32   i, j;

    /* See if it has an existing entry. */
    for (i = 0; i < fs->num_labels; i++) {
        if (fs->labels[i].label == l) {
            /* Found it. Must not already have an offset, or it's a dupe. */
            if (fs->labels[i].offset < 0) {
                /* Fix up existing usages. */
                MVMuint32 *resolve = fs->labels[i].resolve;
                MVMuint32  nr      = fs->labels[i].num_resolve;
                for (j = 0; j < nr; j++)
                    write_int32(ws->bytecode_seg, resolve[j], offset);
                fs->labels[i].offset        = offset;
                fs->labels[i].alloc_resolve = 0;
                fs->labels[i].num_resolve   = 0;
                fs->unresolved_labels      -= nr;
                MVM_free(fs->labels[i].resolve);
            }
            else {
                cleanup_all(vm, ws);
                DIE(vm, "Duplicate label");
            }
            return;
        }
    }

    /* If we get here, no entry; create one. */
    add_label(vm, fs, l, offset);
}

/* Rreturns a label's offset, dying if it's not possible. */
static MVMuint32 demand_label_offset(VM, WriterState *ws, MAST_Label *l,
                                     const char *error) {
    FrameState *fs = ws->cur_frame;
    MVMuint32   nl = fs->num_labels;
    MVMuint32   i;
    for (i = 0; i < nl; i++) {
        if (fs->labels[i].label == l) {
            if (fs->labels[i].offset >= 0)
                return fs->labels[i].offset;
            break;
        }
    }
    cleanup_all(vm, ws);
    DIE(vm, "%s", error);
}

/* Compiles the operand to an instruction; this involves checking
 * that we have a node of the correct type for it and writing out
 * the appropriate thing to the bytecode stream. */
static void compile_operand(VM, WriterState *ws, unsigned char op_flags, MASTNode *operand) {
    unsigned char op_rw   = op_flags & MVM_operand_rw_mask;
    unsigned char op_type = op_flags & MVM_operand_type_mask;
    unsigned short int local_type;
    if (op_rw == MVM_operand_literal) {
        /* Literal; go by type. */
        switch (op_type) {
            case MVM_operand_int64: {
                if (ISTYPE(vm, operand, ws->types->IVal)) {
                    MAST_IVal *iv = GET_IVal(operand);
                    ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 8);
                    write_int64(ws->bytecode_seg, ws->bytecode_pos, iv->value);
                    ws->bytecode_pos += 8;
                }
                else {
                    cleanup_all(vm, ws);
                    DIE(vm, "Expected MAST::IVal, but didn't get one");
                }
                break;
            }
            case MVM_operand_int16: {
                if (ISTYPE(vm, operand, ws->types->IVal)) {
                    MAST_IVal *iv = GET_IVal(operand);
                    ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 2);
                    if (iv->value > 32767 || iv->value < -32768) {
                        cleanup_all(vm, ws);
                        DIE(vm, "Value outside range of 16-bit MAST::IVal");
                    }
                    write_int16(ws->bytecode_seg, ws->bytecode_pos, (short)iv->value);
                    ws->bytecode_pos += 2;
                }
                else {
                    cleanup_all(vm, ws);
                    DIE(vm, "Expected MAST::IVal, but didn't get one");
                }
                break;
            }
            case MVM_operand_num64: {
                if (ISTYPE(vm, operand, ws->types->NVal)) {
                    MAST_NVal *nv = GET_NVal(operand);
                    ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 8);
                    write_double(ws->bytecode_seg, ws->bytecode_pos, nv->value);
                    ws->bytecode_pos += 8;
                }
                else {
                    cleanup_all(vm, ws);
                    DIE(vm, "Expected MAST::NVal, but didn't get one");
                }
                break;
            }
            case MVM_operand_str: {
                if (ISTYPE(vm, operand, ws->types->SVal)) {
                    MAST_SVal *sv = GET_SVal(operand);
                    ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 4);
                    write_int32(ws->bytecode_seg, ws->bytecode_pos,
                        get_string_heap_index(vm, ws, sv->value));
                    ws->bytecode_pos += 4;
                }
                else {
                    cleanup_all(vm, ws);
                    DIE(vm, "Expected MAST::SVal, but didn't get one");
                }
                break;
            }
            case MVM_operand_ins: {
                if (ISTYPE(vm, operand, ws->types->Label)) {
                    write_label_or_add_fixup(vm, ws, GET_Label(operand));
                }
                else {
                    cleanup_all(vm, ws);
                    DIE(vm, "Expected MAST::Label, but didn't get one");
                }
                break;
            }
            case MVM_operand_coderef: {
                if (ISTYPE(vm, operand, ws->types->Frame)) {
                    /* Find the frame index in the compilation unit. */
                    ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 2);
                    write_int16(ws->bytecode_seg, ws->bytecode_pos,
                        get_frame_index(vm, ws, operand));
                    ws->bytecode_pos += 2;
                }
                else {
                    cleanup_all(vm, ws);
                    DIE(vm, "Expected MAST::Frame, but didn't get one");
                }
                break;
            }
            default:
                cleanup_all(vm, ws);
                DIE(vm, "Unhandled literal type in MAST compiler");
        }
    }
    else if (op_rw == MVM_operand_read_reg || op_rw == MVM_operand_write_reg) {
        /* The operand node had best be a MAST::Local. */
        if (ISTYPE(vm, operand, ws->types->Local)) {
            MAST_Local *l = GET_Local(operand);

            /* Ensure it's within the set of known locals. */
            if (l->index >= ws->cur_frame->num_locals) {
                cleanup_all(vm, ws);
                DIE(vm, "MAST::Local index out of range");
            }

            /* Check the type matches. */
            local_type = ws->cur_frame->local_types[l->index];
            if (op_type != local_type << 3 && op_type != MVM_operand_type_var) {
                unsigned int  current_frame_idx = ws->current_frame_idx;
                unsigned int  current_ins_idx = ws->current_ins_idx;
                const char *name = ws->current_op_info->name;
                unsigned int  current_operand_idx = ws->current_operand_idx;
                cleanup_all(vm, ws);
                DIE(vm, "At Frame %u, Instruction %u, op '%s', operand %u, "
                    "MAST::Local of wrong type (%u) specified; expected %u",
                    current_frame_idx, current_ins_idx,
                    name, current_operand_idx,
                    local_type, (op_type >> 3));
            }

            /* Write the operand type. */
            if (l->index < 0 || l->index > 32768)
                DIE(vm, "Frame %u local access out of range", ws->current_frame_idx);
            ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 2);
            write_int16(ws->bytecode_seg, ws->bytecode_pos, (unsigned short)l->index);
            ws->bytecode_pos += 2;
        }
        else {
            unsigned int  current_frame_idx = ws->current_frame_idx;
            unsigned int  current_ins_idx = ws->current_ins_idx;
            const char *name = ws->current_op_info->name;
            unsigned int  current_operand_idx = ws->current_operand_idx;
            cleanup_all(vm, ws);
            DIE(vm, "At Frame %u, Instruction %u, op '%s', operand %u, expected MAST::Local, but didn't get one",
                current_frame_idx, current_ins_idx, name, current_operand_idx);
        }
    }
    else if (op_rw == MVM_operand_read_lex || op_rw == MVM_operand_write_lex) {
        /* The operand node should be a MAST::Lexical. */
        if (ISTYPE(vm, operand, ws->types->Lexical)) {
            MAST_Lexical *l = GET_Lexical(operand);

            /* Write the index, then the frame count. */
            ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 4);
            write_int16(ws->bytecode_seg, ws->bytecode_pos, (unsigned short)l->index);
            ws->bytecode_pos += 2;
            write_int16(ws->bytecode_seg, ws->bytecode_pos, (unsigned short)l->frames_out);
            ws->bytecode_pos += 2;
        }
        else {
            cleanup_all(vm, ws);
            DIE(vm, "Expected MAST::Lexical, but didn't get one");
        }
    }
    else {
        cleanup_all(vm, ws);
        DIE(vm, "Unknown operand type cannot be compiled");
    }
    ws->current_operand_idx++;
}

/* Takes a set of flags describing a callsite. Writes out a callsite
 * descriptor and returns the index of it. */
static unsigned short get_callsite_id(VM, WriterState *ws, MASTNode *flag_node, MASTNode *args) {
    unsigned int        num_nameds = 0;
    unsigned short      i, identifier_len;
    unsigned char      *flags, *identifier;
    unsigned int       *named_idxs;
    CallsiteReuseEntry *entry = NULL;

    /* Get callsite elements and work out if a padding byte will be needed. */
    unsigned short elems = (unsigned short)ELEMS(vm, flag_node);
    unsigned short align = elems % 2;

    /* See if the callsite has any named args, and get string pool entries
     * for them if so. */
    flags      = (unsigned char *)MVM_malloc(elems);
    named_idxs = (unsigned int *)MVM_malloc(elems * sizeof(int));
    for (i = 0; i < elems; i++) {
        flags[i] = (unsigned char)ATPOS_I_C(vm, flag_node, i);
        if (flags[i] & (MVM_CALLSITE_ARG_NAMED)) {
            MASTNode *argname = ATPOS(vm, args, i + num_nameds);
            if (ISTYPE(vm, argname, ws->types->SVal)) {
                named_idxs[num_nameds] = get_string_heap_index(vm, ws,
                    ((MAST_SVal *)argname)->value);
                num_nameds++;
            }
            else {
                DIE(vm, "Malformed callsite args: missing MAST::SVal for argument name");
            }
        }
    }

    /* See if we already know this callsite. */
    identifier_len = elems + num_nameds * sizeof(int);
    identifier     = MVM_malloc(identifier_len);
    memcpy(identifier, flags, elems);
    memcpy(identifier + elems, named_idxs, identifier_len - elems);
    HASH_FIND(hash_handle, ws->callsite_reuse_head, identifier, identifier_len, entry);
    if (entry) {
        MVM_free(flags);
        MVM_free(named_idxs);
        MVM_free(identifier);
        return entry->callsite_id;
    }
    entry = (CallsiteReuseEntry *)MVM_malloc(sizeof(CallsiteReuseEntry));
    entry->callsite_id = (unsigned short)ws->num_callsites;
    entry->identifier = identifier;
    HASH_ADD_KEYPTR(hash_handle, ws->callsite_reuse_head, identifier, identifier_len, entry);

    /* Emit callsite; be sure to pad if there's uneven number of flags. */
    ensure_space(vm, &ws->callsite_seg, &ws->callsite_alloc, ws->callsite_pos,
        2 + elems + align);
    write_int16(ws->callsite_seg, ws->callsite_pos, elems);
    ws->callsite_pos += 2;
    for (i = 0; i < elems; i++)
        write_int8(ws->callsite_seg, ws->callsite_pos++, flags[i]);
    if (align)
        write_int8(ws->callsite_seg, ws->callsite_pos++, 0);

    /* Emit any nameds. */
    if (num_nameds) {
        ensure_space(vm, &ws->callsite_seg, &ws->callsite_alloc, ws->callsite_pos,
            4 * num_nameds);
        for (i = 0; i < num_nameds; i++) {
            write_int32(ws->callsite_seg, ws->callsite_pos, named_idxs[i]);
            ws->callsite_pos += 4;
        }
    }

    MVM_free(flags);
    MVM_free(named_idxs);

    return (unsigned short)ws->num_callsites++;
}

#define OVERRIDE_WITH_32 1
#define OVERRIDE_WITH_16 2

/* Compiles an instruction (which may actaully be any of the
 * nodes valid directly in a Frame's instruction list, which
 * means labels are valid too). */
static void compile_instruction(VM, WriterState *ws, MASTNode *node) {
    if (ISTYPE(vm, node, ws->types->Op)) {
        MAST_Op   *o = GET_Op(node);
        const MVMOpInfo *info;
        int        i;
        unsigned char override_second_argument = 0;

        /* Look up opcode and get argument info. */
        unsigned short op   = o->op;
        info = MVM_op_get_op(op);
        if (!info)
            DIE(vm, "Invalid op specified in instruction %d", op);
        ws->current_op_info = info;
        ws->current_operand_idx = 0;

        /* Ensure argument count matches up. */
        if (info->num_operands != 0 && ELEMS(vm, o->operands) != info->num_operands) {
            unsigned int  current_frame_idx = ws->current_frame_idx;
            unsigned int  current_ins_idx = ws->current_ins_idx;
            const char *name = ws->current_op_info->name;
            cleanup_all(vm, ws);
            DIE(vm, "At Frame %u, Instruction %u, op '%s' has invalid number (%u) of operands; needs %u.",
                current_frame_idx, current_ins_idx, name,
                ELEMS(vm, o->operands), info->num_operands);
        }

        /* If we're outputting a const_i64 instruction, we may want to */
        /* turn it into a const_i64_32 or const_i64_16 instead if it fits */
        if (op == MVM_OP_const_i64) {
            MASTNode *operand = ATPOS(vm, o->operands, 1);
            MAST_IVal *iv = GET_IVal(operand);
            if (INT16_MIN <= iv->value && iv->value <= INT16_MAX) {
                override_second_argument = OVERRIDE_WITH_16;
            } else if (INT32_MIN <= iv->value && iv->value <= INT32_MAX) {
                override_second_argument = OVERRIDE_WITH_32;
            }
        }

        /* Write opcode. */
        ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 2);
        if (override_second_argument == 0)
            write_int16(ws->bytecode_seg, ws->bytecode_pos, op);
        else if (override_second_argument == OVERRIDE_WITH_16)
            write_int16(ws->bytecode_seg, ws->bytecode_pos, MVM_OP_const_i64_16);
        else if (override_second_argument == OVERRIDE_WITH_32)
            write_int16(ws->bytecode_seg, ws->bytecode_pos, MVM_OP_const_i64_32);
        ws->bytecode_pos += 2;

        /* Write operands. */
        for (i = 0; i < info->num_operands; i++) {
            if (i == 1 && override_second_argument) {
                MASTNode *operand = ATPOS(vm, o->operands, 1);
                MAST_IVal *iv = GET_IVal(operand);
                if (override_second_argument == OVERRIDE_WITH_32) {
                    ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 4);
                    write_int32(ws->bytecode_seg, ws->bytecode_pos, (MVMint32)iv->value);
                    ws->bytecode_pos += 4;
                } else {
                    ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 2);
                    write_int16(ws->bytecode_seg, ws->bytecode_pos, (MVMint16)iv->value);
                    ws->bytecode_pos += 2;
                }
            } else {
                compile_operand(vm, ws, info->operands[i], ATPOS(vm, o->operands, i));
            }
        }
    }
    else if (ISTYPE(vm, node, ws->types->ExtOp)) {
        MAST_ExtOp *o = GET_ExtOp(node);
        MASTNode   *operands;
        int         i, num_operands;

        /* Look up opcode and get argument info. */
        unsigned short op = o->op;
        if (op < EXTOP_BASE || (op - EXTOP_BASE) >= ELEMS(vm, ws->cu->extop_sigs))
            DIE(vm, "Invalid extension op %d specified", op);
        operands = ATPOS(vm, ws->cu->extop_sigs, op - EXTOP_BASE);
        if (VM_OBJ_IS_NULL(operands))
            DIE(vm, "Missing extension op operand array for instruction %d", op);
        ws->current_op_info = NULL;
        ws->current_operand_idx = 0;

        /* Ensure argument count matches up. */
        num_operands = ELEMS(vm, operands);
        if (ELEMS(vm, o->operands) != num_operands) {
            unsigned int  current_frame_idx = ws->current_frame_idx;
            unsigned int  current_ins_idx = ws->current_ins_idx;
            char *c_name = VM_STRING_TO_C_STRING(vm, o->name);
            char *waste[] = { c_name, NULL };
            cleanup_all(vm, ws);
            DIE_FREE(vm, waste, "At Frame %u, Instruction %u, op '%s' has invalid number (%u) of operands; needs %u.",
                current_frame_idx, current_ins_idx,
                c_name,
                ELEMS(vm, o->operands), num_operands);
        }

        /* Write opcode. */
        ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 2);
        write_int16(ws->bytecode_seg, ws->bytecode_pos, op);
        ws->bytecode_pos += 2;

        /* Write operands. */
        for (i = 0; i < num_operands; i++)
            compile_operand(vm, ws, ATPOS_I(vm, operands, i), ATPOS(vm, o->operands, i));
    }
    else if (ISTYPE(vm, node, ws->types->Label)) {
        add_label_and_resolve_fixups(vm, ws, GET_Label(node));
    }
    else if (ISTYPE(vm, node, ws->types->Call)) {
        MAST_Call *c           = GET_Call(node);
        unsigned short call_op  = c->op == 0 ? MVM_OP_invoke_v : MVM_OP_nativeinvoke_v;
        unsigned char res_type = 0;
        unsigned short num_flags, flag_pos, arg_pos, arg_out_pos;

        /* Emit callsite (may re-use existing one) and emit loading of it. */
        unsigned short callsite_id = get_callsite_id(vm, ws, c->flags, c->args);
        ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 4);
        write_int16(ws->bytecode_seg, ws->bytecode_pos, MVM_OP_prepargs);
        ws->bytecode_pos += 2;
        write_int16(ws->bytecode_seg, ws->bytecode_pos, callsite_id);
        ws->bytecode_pos += 2;

        /* for errors */
        ws->current_op_info = MVM_op_get_op(MVM_OP_prepargs);
        ws->current_operand_idx = 0;

        /* Set up args. */
        num_flags = (unsigned short)ELEMS(vm, c->flags);
        arg_pos = c->op == 0 ? 0 : 1;
        arg_out_pos = 0;
        for (flag_pos = 0; flag_pos < num_flags; flag_pos++) {
            /* Handle any special flags. */
            unsigned char flag = (unsigned char)ATPOS_I_C(vm, c->flags, flag_pos);
            if (flag & MVM_CALLSITE_ARG_NAMED) {
                ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 6);
                write_int16(ws->bytecode_seg, ws->bytecode_pos, MVM_OP_argconst_s);
                ws->bytecode_pos += 2;
                write_int16(ws->bytecode_seg, ws->bytecode_pos, arg_out_pos);
                ws->bytecode_pos += 2;
                compile_operand(vm, ws, MVM_operand_str, ATPOS(vm, c->args, arg_pos));
                arg_pos++;
                arg_out_pos++;
            }
            else if (flag & MVM_CALLSITE_ARG_FLAT) {
                /* don't need to do anything special */
            }

            /* Now go by flag type. */
            ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 6);
            if (flag & MVM_CALLSITE_ARG_OBJ) {
                write_int16(ws->bytecode_seg, ws->bytecode_pos, MVM_OP_arg_o);
                ws->bytecode_pos += 2;
                write_int16(ws->bytecode_seg, ws->bytecode_pos, arg_out_pos);
                ws->bytecode_pos += 2;
                compile_operand(vm, ws, MVM_operand_read_reg | MVM_operand_obj,
                    ATPOS(vm, c->args, arg_pos));
            }
            else if (flag & MVM_CALLSITE_ARG_STR) {
                write_int16(ws->bytecode_seg, ws->bytecode_pos, MVM_OP_arg_s);
                ws->bytecode_pos += 2;
                write_int16(ws->bytecode_seg, ws->bytecode_pos, arg_out_pos);
                ws->bytecode_pos += 2;
                compile_operand(vm, ws, MVM_operand_read_reg | MVM_operand_str,
                    ATPOS(vm, c->args, arg_pos));
            }
            else if (flag & MVM_CALLSITE_ARG_INT) {
                write_int16(ws->bytecode_seg, ws->bytecode_pos, MVM_OP_arg_i);
                ws->bytecode_pos += 2;
                write_int16(ws->bytecode_seg, ws->bytecode_pos, arg_out_pos);
                ws->bytecode_pos += 2;
                compile_operand(vm, ws, MVM_operand_read_reg | MVM_operand_int64,
                    ATPOS(vm, c->args, arg_pos));
            }
            else if (flag & MVM_CALLSITE_ARG_NUM) {
                write_int16(ws->bytecode_seg, ws->bytecode_pos, MVM_OP_arg_n);
                ws->bytecode_pos += 2;
                write_int16(ws->bytecode_seg, ws->bytecode_pos, arg_out_pos);
                ws->bytecode_pos += 2;
                compile_operand(vm, ws, MVM_operand_read_reg | MVM_operand_num64,
                    ATPOS(vm, c->args, arg_pos));
            }
            else {
                unsigned int  current_frame_idx = ws->current_frame_idx;
                unsigned int  current_ins_idx = ws->current_ins_idx;
                const char *name = ws->current_op_info->name;
                cleanup_all(vm, ws);
                /*
                DIE(vm, "At Frame %u, Instruction %u, op '%s', "
                        "file %s, line %u, unhandled arg type %u.",
                    current_frame_idx, current_ins_idx, name,
                    ws->last_annotated ? VM_STRING_TO_C_STRING(vm, ws->last_annotated->file) : "",
                    ws->last_annotated ? ws->last_annotated->line : 0,
                    flag);
                */
                DIE(vm, "At Frame %u, Instruction %u, op '%s', unhandled arg type %u.",
                    current_frame_idx, current_ins_idx, name, flag);
            }

            arg_pos++;
            arg_out_pos++;
        }

        /* Select operation based on return type. */
        if (ISTYPE(vm, c->result, ws->types->Local)) {
            MAST_Local *l = GET_Local(c->result);

            /* Ensure it's within the set of known locals. */
            if (l->index >= ws->cur_frame->num_locals) {
                cleanup_all(vm, ws);
                DIE(vm, "MAST::Local index out of range");
            }

            /* Go by type. */
            switch (ws->cur_frame->local_types[l->index]) {
                case MVM_reg_int64:
                    call_op = c->op == 0 ? MVM_OP_invoke_i : MVM_OP_nativeinvoke_i;
                    res_type = MVM_operand_int64;
                    break;
                case MVM_reg_num64:
                    call_op = c->op == 0 ? MVM_OP_invoke_n : MVM_OP_nativeinvoke_n;
                    res_type = MVM_operand_num64;
                    break;
                case MVM_reg_str:
                    call_op = c->op == 0 ? MVM_OP_invoke_s : MVM_OP_nativeinvoke_s;
                    res_type = MVM_operand_str;
                    break;
                case MVM_reg_obj:
                    call_op = c->op == 0 ? MVM_OP_invoke_o : MVM_OP_nativeinvoke_o;
                    res_type = MVM_operand_obj;
                    break;
                default:
                    cleanup_all(vm, ws);
                    DIE(vm, "Invalid MAST::Local type for return value");
            }
        }

        /* Emit the invocation op. */
        ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, c->op == 0 ? 6 : 8);
        write_int16(ws->bytecode_seg, ws->bytecode_pos, call_op);
        ws->bytecode_pos += 2;
        if (call_op != MVM_OP_invoke_v && call_op != MVM_OP_nativeinvoke_v)
            compile_operand(vm, ws, MVM_operand_read_reg | res_type, c->result);
        compile_operand(vm, ws, MVM_operand_read_reg | MVM_operand_obj, c->target);
        if (c->op != 0)
            compile_operand(vm, ws, MVM_operand_read_reg | MVM_operand_obj, ATPOS(vm, c->args, 0));
    }
    else if (ISTYPE(vm, node, ws->types->Annotated)) {
        MAST_Annotated *a = GET_Annotated(node);
        unsigned int i;
        unsigned int num_ins = ELEMS(vm, a->instructions);
        unsigned int offset = ws->bytecode_pos - ws->cur_frame->bytecode_start;

        ws->last_annotated = a;
        ensure_space(vm, &ws->annotation_seg, &ws->annotation_alloc, ws->annotation_pos, 12);
        write_int32(ws->annotation_seg, ws->annotation_pos, offset);
        write_int32(ws->annotation_seg, ws->annotation_pos + 4, get_string_heap_index(vm, ws, a->file));
        write_int32(ws->annotation_seg, ws->annotation_pos + 8, (unsigned int)a->line);
        ws->annotation_pos += 12;
        ws->cur_frame->num_annotations++;

        for (i = 0; i < num_ins; i++)
            compile_instruction(vm, ws, ATPOS(vm, a->instructions, i));
    }
    else if (ISTYPE(vm, node, ws->types->HandlerScope)) {
        MAST_HandlerScope *hs = GET_HandlerScope(node);
        unsigned int i;
        unsigned int num_ins = ELEMS(vm, hs->instructions);
        unsigned int start   = ws->bytecode_pos - ws->cur_frame->bytecode_start;
        unsigned int end;

        for (i = 0; i < num_ins; i++)
            compile_instruction(vm, ws, ATPOS(vm, hs->instructions, i));
        end = ws->bytecode_pos - ws->cur_frame->bytecode_start;

        ws->cur_frame->num_handlers++;
        if (ws->cur_frame->handlers)
            ws->cur_frame->handlers = (FrameHandler *)MVM_realloc(ws->cur_frame->handlers,
                ws->cur_frame->num_handlers * sizeof(FrameHandler));
        else
            ws->cur_frame->handlers = (FrameHandler *)MVM_malloc(
                ws->cur_frame->num_handlers * sizeof(FrameHandler));

        i = ws->cur_frame->num_handlers - 1;
        ws->cur_frame->handlers[i].start_offset = start;
        ws->cur_frame->handlers[i].end_offset = end;
        ws->cur_frame->handlers[i].category_mask = (unsigned int)hs->category_mask;
        ws->cur_frame->handlers[i].action = (unsigned short)hs->action;
        if (ws->cur_frame->handlers[i].category_mask & MVM_EX_CAT_LABELED) {
            if (ISTYPE(vm, hs->label_local, ws->types->Local)) {
                MAST_Local *l = GET_Local(hs->label_local);

                /* Ensure it's within the set of known locals and an object. */
                if (l->index >= ws->cur_frame->num_locals) {
                    cleanup_all(vm, ws);
                    DIE(vm, "MAST::Local index out of range in HandlerScope");
                }
                if (ws->cur_frame->local_types[l->index] != MVM_reg_obj) {
                    cleanup_all(vm, ws);
                    DIE(vm, "MAST::Local for HandlerScope must be an object");
                }

                /* Stash local index. */
                ws->cur_frame->handlers[i].label_reg = (unsigned short)l->index;
            }
            else {
                cleanup_all(vm, ws);
                DIE(vm, "MAST::Local required for HandlerScope with loop label");
            }
        }

        /* Ensure we have a label. */
        if (ISTYPE(vm, hs->goto_label, ws->types->Label)) {
            ws->cur_frame->handlers[i].label = hs->goto_label;
        }
        else {
            cleanup_all(vm, ws);
            DIE(vm, "MAST::Label required for HandlerScope goto");
        }

        /* May need a block also. */
        if (hs->action == HANDLER_INVOKE) {
            if (ISTYPE(vm, hs->block_local, ws->types->Local)) {
                MAST_Local *l = GET_Local(hs->block_local);

                /* Ensure it's within the set of known locals and an object. */
                if (l->index >= ws->cur_frame->num_locals) {
                    cleanup_all(vm, ws);
                    DIE(vm, "MAST::Local index out of range in HandlerScope");
                }
                if (ws->cur_frame->local_types[l->index] != MVM_reg_obj) {
                    cleanup_all(vm, ws);
                    DIE(vm, "MAST::Local for HandlerScope must be an object");
                }

                /* Stash local index. */
                ws->cur_frame->handlers[i].local = (unsigned short)l->index;
            }
            else {
                cleanup_all(vm, ws);
                DIE(vm, "MAST::Local required for HandlerScope invoke action");
            }
        }
        else if (hs->action == HANDLER_UNWIND_GOTO || hs->action == HANDLER_UNWIND_GOTO_OBJ) {
            ws->cur_frame->handlers[i].local = 0;
        }
        else {
            cleanup_all(vm, ws);
            DIE(vm, "Invalid action code for handler scope");
        }
    }
    else {
        cleanup_all(vm, ws);
        DIE(vm, "Invalid MAST node in instruction list (must be Op, ExtOp, Call, Label, or Annotated)");
    }
    ws->current_ins_idx++;
}

/* Compiles a frame. */
static void compile_frame(VM, WriterState *ws, MASTNode *node, unsigned short idx) {
    MAST_Frame  *f;
    FrameState  *fs;
    unsigned int i, num_ins, instructions_start;
    MASTNode *last_inst = NULL;
    MVMuint16 num_slvs;

    /* Ensure we have a node of the right type. */
    if (!ISTYPE(vm, node, ws->types->Frame)) {
        cleanup_all(vm, ws);
        DIE(vm, "Child of CompUnit must be a Frame");
    }
    f = GET_Frame(node);

    /* Allocate frame state. */
    fs = ws->cur_frame    = (FrameState *)MVM_malloc(sizeof(FrameState));
    fs->bytecode_start    = ws->bytecode_pos;
    fs->frame_start       = ws->frame_pos;
    fs->labels            = NULL;
    fs->num_labels        = 0;
    fs->alloc_labels      = 0;
    fs->unresolved_labels = 0;

    /* Count locals and lexicals. */
    fs->num_locals   = ELEMS(vm, f->local_types);
    fs->num_lexicals = ELEMS(vm, f->lexical_types);

    if (fs->num_locals > (1 << 16)) {
        cleanup_all(vm, ws);
        DIE(vm, "Too many locals in this frame.");
    }

    if (ELEMS(vm, f->lexical_names) != fs->num_lexicals) {
        cleanup_all(vm, ws);
        DIE(vm, "Lexical types list and lexical names list have unequal length");
    }

    /* initialize number of annotation */
    fs->num_annotations = 0;

    /* initialize number of handlers and handlers pointer */
    fs->num_handlers = 0;
    fs->handlers = NULL;

    /* Ensure space is available to write frame entry, and write the
     * header, apart from the bytecode length, which we'll fill in
     * later. */
    ensure_space(vm, &ws->frame_seg, &ws->frame_alloc, ws->frame_pos,
        FRAME_HEADER_SIZE + fs->num_locals * 2 + fs->num_lexicals * 6);
    write_int32(ws->frame_seg, ws->frame_pos, fs->bytecode_start);
    write_int32(ws->frame_seg, ws->frame_pos + 4, 0); /* Filled in later. */
    write_int32(ws->frame_seg, ws->frame_pos + 8, fs->num_locals);
    write_int32(ws->frame_seg, ws->frame_pos + 12, fs->num_lexicals);
    write_int32(ws->frame_seg, ws->frame_pos + 16,
        get_string_heap_index(vm, ws, f->cuuid));
    write_int32(ws->frame_seg, ws->frame_pos + 20,
        get_string_heap_index(vm, ws, f->name));

    /* Handle outer. The current index means "no outer". */
    if (ISTYPE(vm, f->outer, ws->types->Frame)) {
        /* First, see if we have the index cached. If not, go hunting. */
        if (((MAST_Frame *)f->outer)->flags & FRAME_FLAG_HAS_INDEX) {
            write_int16(ws->frame_seg, ws->frame_pos + 24,
                ((MAST_Frame *)f->outer)->index);
        }
        else {
            unsigned short j, found, num_frames;
            found = 0;
            num_frames = (unsigned short)ELEMS(vm, ws->cu->frames);
            for (j = 0; j < num_frames; j++) {
                if (ATPOS(vm, ws->cu->frames, j) == f->outer) {
                    write_int16(ws->frame_seg, ws->frame_pos + 24, j);
                    found = 1;
                    break;
                }
            }
            if (!found) {
                cleanup_all(vm, ws);
                DIE(vm, "Could not locate outer frame in frame list");
            }
        }
    }
    else {
        write_int16(ws->frame_seg, ws->frame_pos + 24, idx);
    }

    write_int32(ws->frame_seg, ws->frame_pos + 26, ws->annotation_pos);
    write_int32(ws->frame_seg, ws->frame_pos + 30, 0); /* number of annotation; fill in later */
    write_int32(ws->frame_seg, ws->frame_pos + 34, 0); /* number of handlers; fill in later */
    write_int16(ws->frame_seg, ws->frame_pos + 38, (MVMint16)f->flags);
    num_slvs = f->flags & FRAME_FLAG_HAS_SLV
        ? (MVMuint16)ELEMS(vm, f->static_lex_values) / 4
        : 0;
    write_int16(ws->frame_seg, ws->frame_pos + 40, num_slvs);

    if (f->flags & FRAME_FLAG_HAS_CODE_OBJ) {
        write_int32(ws->frame_seg, ws->frame_pos + 42, f->code_obj_sc_dep_idx + 1);
        write_int32(ws->frame_seg, ws->frame_pos + 46, f->code_obj_sc_idx);
    }
    else {
        write_int32(ws->frame_seg, ws->frame_pos + 42, 0);
        write_int32(ws->frame_seg, ws->frame_pos + 46, 0);
    }

    ws->frame_pos += FRAME_HEADER_SIZE;

    /* Write locals, as well as collecting our own array of type info. */
    fs->local_types = (short unsigned int *)MVM_malloc(sizeof(unsigned short) * fs->num_locals);
    for (i = 0; i < fs->num_locals; i++) {
        unsigned short local_type = type_to_local_type(vm, ws, ATPOS(vm, f->local_types, i));
        fs->local_types[i] = local_type;
        write_int16(ws->frame_seg, ws->frame_pos, local_type);
        ws->frame_pos += 2;
    }

    /* Write lexicals. */
    fs->lexical_types = (short unsigned int *)MVM_malloc(sizeof(unsigned short) * fs->num_lexicals);
    for (i = 0; i < fs->num_lexicals; i++) {
        unsigned short lexical_type = type_to_local_type(vm, ws, ATPOS(vm, f->lexical_types, i));
        fs->lexical_types[i] = lexical_type;
        write_int16(ws->frame_seg, ws->frame_pos, lexical_type);
        ws->frame_pos += 2;
        write_int32(ws->frame_seg, ws->frame_pos,
            get_string_heap_index(vm, ws, ATPOS_S_C(vm, f->lexical_names, i)));
        ws->frame_pos += 4;
    }

    /* Save the location of the start of instructions */
    instructions_start = ws->bytecode_pos;

    /* Compile the instructions. */
    ws->current_ins_idx = 0;
    num_ins = ELEMS(vm, f->instructions);
    for (i = 0; i < num_ins; i++)
        compile_instruction(vm, ws, last_inst = ATPOS(vm, f->instructions, i));

    /* Fixup frames that don't have a return instruction, so
     * we don't have to check against bytecode length every
     * time through the runloop. */
    if (!last_inst || !ISTYPE(vm, last_inst, ws->types->Op)
            || (   GET_Op(last_inst)->op != MVM_OP_return
                && GET_Op(last_inst)->op != MVM_OP_return_i
                && GET_Op(last_inst)->op != MVM_OP_return_n
                && GET_Op(last_inst)->op != MVM_OP_return_s
                && GET_Op(last_inst)->op != MVM_OP_return_o
            )) {
        ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 2);
        write_int16(ws->bytecode_seg, ws->bytecode_pos, MVM_OP_return);
        ws->bytecode_pos += 2;
    }

    /* Fill in bytecode length. */
    write_int32(ws->frame_seg, fs->frame_start + 4, ws->bytecode_pos - instructions_start);

    /* Fill in number of annotations. */
    write_int32(ws->frame_seg, fs->frame_start + 30, fs->num_annotations);

    /* Fill in number of handlers. */
    write_int32(ws->frame_seg, fs->frame_start + 34, fs->num_handlers);

    /* Write handlers. */
    for (i = 0; i < fs->num_handlers; i++) {
        ensure_space(vm, &ws->frame_seg, &ws->frame_alloc, ws->frame_pos,
            FRAME_HANDLER_SIZE);
        write_int32(ws->frame_seg, ws->frame_pos, fs->handlers[i].start_offset);
        ws->frame_pos += 4;
        write_int32(ws->frame_seg, ws->frame_pos, fs->handlers[i].end_offset);
        ws->frame_pos += 4;
        write_int32(ws->frame_seg, ws->frame_pos, fs->handlers[i].category_mask);
        ws->frame_pos += 4;
        write_int16(ws->frame_seg, ws->frame_pos, fs->handlers[i].action);
        ws->frame_pos += 2;
        write_int16(ws->frame_seg, ws->frame_pos, fs->handlers[i].local);
        ws->frame_pos += 2;
        if (ws->cur_frame->handlers[i].label)
            write_int32(ws->frame_seg, ws->frame_pos,
                demand_label_offset(vm, ws, GET_Label(fs->handlers[i].label),
                    "HandlerScope uses unresolved label"));
        else
            write_int32(ws->frame_seg, ws->frame_pos, 0);
        ws->frame_pos += 4;
        if (fs->handlers[i].category_mask & MVM_EX_CAT_LABELED) {
            ensure_space(vm, &ws->frame_seg, &ws->frame_alloc, ws->frame_pos, 2);
            write_int16(ws->frame_seg, ws->frame_pos, fs->handlers[i].label_reg);
            ws->frame_pos += 2;
        }
    }

    /* Write static lex values. */
    ensure_space(vm, &ws->frame_seg, &ws->frame_alloc, ws->frame_pos,
        FRAME_SLV_SIZE * num_slvs);
    for (i = 0; i < num_slvs; i++) {
        write_int16(ws->frame_seg, ws->frame_pos,
            (MVMuint16)ATPOS_I(vm, f->static_lex_values, 4 * i));
        write_int16(ws->frame_seg, ws->frame_pos + 2,
            (MVMuint16)ATPOS_I(vm, f->static_lex_values, 4 * i + 1));
        write_int32(ws->frame_seg, ws->frame_pos + 4,
            (MVMuint32)ATPOS_I(vm, f->static_lex_values, 4 * i + 2));
        write_int32(ws->frame_seg, ws->frame_pos + 8,
            (MVMuint32)ATPOS_I(vm, f->static_lex_values, 4 * i + 3));
        ws->frame_pos += FRAME_SLV_SIZE;
    }

    /* Any leftover labels? */
    if (fs->unresolved_labels) {
        cleanup_all(vm, ws);
        DIE(vm, "Frame has %u unresolved labels", fs->unresolved_labels);
    }

    /* Free the frame state. */
    cleanup_frame(vm, fs);
    ws->cur_frame = NULL;

    /* Increment frame count. */
    ws->num_frames++;
}

/* Takes all of the strings and joins them into a heap, encoding them as
 * UTF-8. */
static char * form_string_heap(VM, WriterState *ws, unsigned int *string_heap_size) {
    char         *heap;
    unsigned int  i, num_strings, heap_size, heap_alloc;

    /* If we've nothing to do, just return immediately. */
    num_strings = ELEMS(vm, ws->strings);
    if (num_strings == 0) {
        *string_heap_size = 0;
        return NULL;
    }

    /* Allocate heap starting point (just a guess). */
    heap_size = 0;
    heap_alloc = num_strings * 32;
    heap = (char *)MVM_malloc(heap_alloc);

    /* Add each string to the heap. */
    for (i = 0; i < num_strings; i++) {
        MVMuint64 bytelen;
        char *encoded;
        MVMGraphemeIter gi;
        unsigned short align;
        unsigned int need;

        /* Decide if we can get away with Latin-1 with an assumption of the
         * string already being in NFG. Latin-1 is except \r, which we also
         * check for here. */
        MVMint32   need_utf8 = 0;
        MVMString *str       = ATPOS_S(vm, ws->strings, i);
        MVM_string_gi_init(tc, &gi, str);
        while (MVM_string_gi_has_more(tc, &gi)) {
            MVMGrapheme32 g = MVM_string_gi_get_grapheme(tc, &gi);
            if (g < 0 || g >= 0xFF || g == 0x0D) {
                need_utf8 = 1;
                break;
            }
        }

        /* Encode it with the chosen algorithm. */
        encoded = need_utf8
            ? MVM_string_utf8_encode(tc, str, &bytelen, 0)
            : MVM_string_latin1_encode(tc, str, &bytelen, 0);
        if (bytelen > 0x3FFFFFFF) {
            cleanup_all(vm, ws);
            DIE(vm, "String too long for string constants segment");
        }

        /* Ensure we have space. */
        align = bytelen & 3 ? 4 - (bytelen & 3) : 0;
        need  = 4 + bytelen + align;
        if (heap_size + need >= heap_alloc) {
            heap_alloc = umax(heap_alloc * 2, heap_size + need);
            heap = (char *)MVM_realloc(heap, heap_alloc);
        }

        /* Write byte length and UTF-8 flag into heap. */
        write_int32(heap, heap_size, (bytelen << 1) | need_utf8);
        heap_size += 4;

        /* Write string. */
        memcpy(heap + heap_size, encoded, bytelen);
        MVM_free(encoded);
        heap_size += bytelen;

        /* Add alignment. Whilst we never read this memory, it's useful to
           ensure it is initialised, otherwise valgrind (and similar tools)
           will rightly complain that we're writing garbage to disk. */
        if (align) {
            memset(heap + heap_size, 0, align);
            heap_size += align;
        }
    }

    *string_heap_size = heap_size;
    return heap;
}

/* Takes all the pieces and forms the bytecode output. */
static char * form_bytecode_output(VM, WriterState *ws, unsigned int *bytecode_size) {
    MVMuint32     size    = 0;
    MVMuint32     pos     = 0;
    char         *output;
    unsigned int  string_heap_size;
    char         *string_heap;
    unsigned int  hll_str_idx;

    /* Store HLL name string, if any. */
    if (!VM_STRING_IS_NULL(ws->cu->hll))
        hll_str_idx = get_string_heap_index(vm, ws, ws->cu->hll);
    else
        hll_str_idx = get_string_heap_index(vm, ws, EMPTY_STRING(vm));

    /* Build string heap. */
    string_heap = form_string_heap(vm, ws, &string_heap_size);

    /* Work out total size. */
    size += MVM_ALIGN_SECTION(HEADER_SIZE);
    size += MVM_ALIGN_SECTION(string_heap_size);
    size += MVM_ALIGN_SECTION(ws->scdep_bytes);
    size += MVM_ALIGN_SECTION(ws->extops_bytes);
    size += MVM_ALIGN_SECTION(ws->frame_pos);
    size += MVM_ALIGN_SECTION(ws->callsite_pos);
    size += MVM_ALIGN_SECTION(ws->bytecode_pos);
    size += MVM_ALIGN_SECTION(ws->annotation_pos);
    if (vm->serialized)
        size += MVM_ALIGN_SECTION(vm->serialized_size);

    /* Allocate space for the bytecode output. */
    output = (char *)MVM_calloc(1, size);

    /* Generate start of header. */
    memcpy(output, "MOARVM\r\n", 8);
    write_int32(output, 8, BYTECODE_VERSION);
    pos += MVM_ALIGN_SECTION(HEADER_SIZE);

    /* Add SC dependencies section and its header entries. */
    write_int32(output, SCDEP_HEADER_OFFSET, pos);
    write_int32(output, SCDEP_HEADER_OFFSET + 4, ELEMS(vm, ws->cu->sc_handles));
    memcpy(output + pos, ws->scdep_seg, ws->scdep_bytes);
    pos += MVM_ALIGN_SECTION(ws->scdep_bytes);

    /* Add extension ops section and its header entries. */
    write_int32(output, EXTOP_HEADER_OFFSET, pos);
    write_int32(output, EXTOP_HEADER_OFFSET + 4, ws->num_extops);
    memcpy(output + pos, ws->extops_seg, ws->extops_bytes);
    pos += MVM_ALIGN_SECTION(ws->extops_bytes);

    /* Add frames section and its header entries. */
    write_int32(output, FRAME_HEADER_OFFSET, pos);
    write_int32(output, FRAME_HEADER_OFFSET + 4, ws->num_frames);
    memcpy(output + pos, ws->frame_seg, ws->frame_pos);
    pos += MVM_ALIGN_SECTION(ws->frame_pos);

    /* Add callsites section and its header entries. */
    write_int32(output, CALLSITE_HEADER_OFFSET, pos);
    write_int32(output, CALLSITE_HEADER_OFFSET + 4, ws->num_callsites);
    memcpy(output + pos, ws->callsite_seg, ws->callsite_pos);
    pos += MVM_ALIGN_SECTION(ws->callsite_pos);

    /* Add strings heap section and its header entries. */
    write_int32(output, STRING_HEADER_OFFSET, pos);
    write_int32(output, STRING_HEADER_OFFSET + 4, ELEMS(vm, ws->strings));
    memcpy(output + pos, string_heap, string_heap_size);
    pos += MVM_ALIGN_SECTION(string_heap_size);
    if (string_heap) {
        MVM_free(string_heap);
        string_heap = NULL;
    }

    /* SC data. Write it if we have it. */
    if (vm->serialized) {
        write_int32(output, SCDATA_HEADER_OFFSET, pos);
        write_int32(output, SCDATA_HEADER_OFFSET + 4, vm->serialized_size);
        memcpy(output + pos, vm->serialized, vm->serialized_size);
        pos += MVM_ALIGN_SECTION(vm->serialized_size);
        MVM_free(vm->serialized);
        vm->serialized = NULL;
        vm->serialized_size = 0;
    }

    /* Add bytecode section and its header entries (offset, length). */
    write_int32(output, BYTECODE_HEADER_OFFSET, pos);
    write_int32(output, BYTECODE_HEADER_OFFSET + 4, ws->bytecode_pos);
    memcpy(output + pos, ws->bytecode_seg, ws->bytecode_pos);
    pos += MVM_ALIGN_SECTION(ws->bytecode_pos);

    /* Add annotation section and its header entries (offset, length). */
    write_int32(output, ANNOTATION_HEADER_OFFSET, pos);
    write_int32(output, ANNOTATION_HEADER_OFFSET + 4, ws->annotation_pos);
    memcpy(output + pos, ws->annotation_seg, ws->annotation_pos);
    pos += MVM_ALIGN_SECTION(ws->annotation_pos);

    /* Add HLL and special frame indexes. */
    write_int32(output, HLL_NAME_HEADER_OFFSET, hll_str_idx);
    if (VM_OBJ_IS_NULL(ws->cu->main_frame))
        write_int32(output, SPECIAL_FRAME_HEADER_OFFSET, 0);
    else
        write_int32(output, SPECIAL_FRAME_HEADER_OFFSET, 1 + get_frame_index(vm, ws, ws->cu->main_frame));
    if (VM_OBJ_IS_NULL(ws->cu->load_frame))
        write_int32(output, SPECIAL_FRAME_HEADER_OFFSET + 4, 0);
    else
        write_int32(output, SPECIAL_FRAME_HEADER_OFFSET + 4, 1 + get_frame_index(vm, ws, ws->cu->load_frame));
    if (VM_OBJ_IS_NULL(ws->cu->deserialize_frame))
        write_int32(output, SPECIAL_FRAME_HEADER_OFFSET + 8, 0);
    else
        write_int32(output, SPECIAL_FRAME_HEADER_OFFSET + 8, 1 + get_frame_index(vm, ws, ws->cu->deserialize_frame));

    /* Sanity...should never fail. */
    if (pos != size)
        DIE(vm, "Bytecode generated did not match expected size");

    *bytecode_size = size;
    return output;
}

/* Main entry point to the MAST to bytecode compiler. */
char * MVM_mast_compile(VM, MASTNode *node, MASTNodeTypes *types, unsigned int *size) {
    MAST_CompUnit  *cu;
    WriterState    *ws;
    char           *bytecode;
    unsigned short  i, num_depscs, num_frames;
    unsigned int    bytecode_size;

    /* Ensure we have a compilation unit. */
    if (!ISTYPE(vm, node, types->CompUnit))
        DIE(vm, "Top-level MAST node must be a CompUnit");
    cu = GET_CompUnit(node);

    /* Initialize the writer state structure. */
    ws = (WriterState *)MVM_malloc(sizeof(WriterState));
    ws->types            = types;
    ws->strings          = NEWLIST_S(vm);
    ws->seen_strings     = NEWHASH(vm);
    ws->cur_frame        = NULL;
    ws->scdep_bytes      = ELEMS(vm, cu->sc_handles) * SC_DEP_SIZE;
    ws->scdep_seg        = ws->scdep_bytes ? (char *)MVM_malloc(ws->scdep_bytes) : NULL;
    ws->num_extops       = ELEMS(vm, cu->extop_names);
    ws->extops_bytes     = ws->num_extops * EXTOP_SIZE;
    ws->extops_seg       = (char *)MVM_malloc(ws->extops_bytes);
    ws->frame_pos        = 0;
    ws->frame_alloc      = 192 * ELEMS(vm, cu->frames);
    ws->frame_seg        = (char *)MVM_malloc(ws->frame_alloc);
    ws->num_frames       = 0;
    ws->callsite_pos     = 0;
    ws->callsite_alloc   = 4096;
    ws->callsite_seg     = (char *)MVM_malloc(ws->callsite_alloc);
    ws->num_callsites    = 0;
    ws->bytecode_pos     = 0;
    ws->bytecode_alloc   = 128 * ELEMS(vm, cu->frames);
    ws->bytecode_seg     = (char *)MVM_malloc(ws->bytecode_alloc);
    ws->annotation_pos   = 0;
    ws->annotation_alloc = 64 * ELEMS(vm, cu->frames);
    ws->annotation_seg   = (char *)MVM_malloc(ws->annotation_alloc);
    ws->cu               = cu;
    ws->current_frame_idx= 0;

    /* If we have any strings from serializing, then we'll seed our own string
     * heap with them. This means the compilation unit string heap will align
     * perfectly with what the serialization blob needs, and thus we can use
     * it in deserialization. Note we use get_string_heap_index for its side
     * effects only here. Start from 1, as 0 means NULL string. */
    if (vm->serialized_string_heap) {
        MVMint64 elems = ELEMS(vm, vm->serialized_string_heap);
        for (i = 1; i < elems; i++)
            (void)get_string_heap_index(vm, ws, ATPOS_S(vm, vm->serialized_string_heap, i));
        vm->serialized_string_heap = NULL;
    }

    /* Initialize callsite reuse cache */
    ws->callsite_reuse_head = NULL;

    /* Store each of the dependent SCs. */
    num_depscs = ELEMS(vm, ws->cu->sc_handles);
    for (i = 0; i < num_depscs; i++)
        write_int32(ws->scdep_seg, i * SC_DEP_SIZE,
            get_string_heap_index(vm, ws,
                ATPOS_S_C(vm, ws->cu->sc_handles, i)));

    /* Store each of the extop names and signatures. */
    for (i = 0; i < ws->num_extops; i++) {
        MASTNode *sig_array;
        int num_operands, j;

        write_int32(ws->extops_seg, i * EXTOP_SIZE,
            get_string_heap_index(vm, ws,
                ATPOS_S_C(vm, ws->cu->extop_names, i)));

        sig_array = ATPOS(vm, ws->cu->extop_sigs, i);
        num_operands = ELEMS(vm, sig_array);
        for (j = 0; j < 8; j++)
            write_int8(ws->extops_seg, i * EXTOP_SIZE + 4 + j,
                j < num_operands
                    ? ATPOS_I(vm, sig_array, j)
                    : 0);
    }

    /* Visit and compile each of the frames. */
    num_frames = (unsigned short)ELEMS(vm, cu->frames);
    for (i = 0; i < num_frames; i++)
        compile_frame(vm, ws, ATPOS(vm, cu->frames, i), ws->current_frame_idx = i);

    /* Join all the pieces into a bytecode file. */
    bytecode = form_bytecode_output(vm, ws, &bytecode_size);

    /* Cleanup and hand back result. */
    cleanup_all(vm, ws);

    *size = bytecode_size;
    return bytecode;
}