Mercurial > hg > Members > anatofuz > MoarVM
view src/jit/linear_scan.c @ 0:2cf249471370
convert mercurial for git
author | Takahiro SHIMIZU <anatofuz@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 08 May 2018 16:09:12 +0900 |
parents | |
children |
line wrap: on
line source
#include "moar.h" #include "internal.h" #define __COMMA__ , static MVMint8 available_gpr[] = { MVM_JIT_ARCH_AVAILABLE_GPR(MVM_JIT_REG) }; static MVMint8 available_num[] = { MVM_JIT_ARCH_NUM(MVM_JIT_REG) }; /* bitmap, so make it '|' to combine the shifted register numbers */ #undef __COMMA__ #define __COMMA__ | #define SHIFT(x) (1 << (MVM_JIT_REG(x))) static const MVMBitmap NVR_GPR_BITMAP = MVM_JIT_ARCH_NONVOLATILE_GPR(SHIFT); static const MVMBitmap AVAILABLE_GPR_BITMAP = MVM_JIT_ARCH_AVAILABLE_GPR(SHIFT); #undef SHIFT #undef __COMMA__ #define MAX_ACTIVE sizeof(available_gpr) #define NYI(x) MVM_oops(tc, #x "not yet implemented") #define _ASSERT(b, msg) if (!(b)) do { MVM_panic(1, msg); } while (0) #if MVM_JIT_DEBUG #define _DEBUG(fmt, ...) do { fprintf(stderr, fmt "%s", __VA_ARGS__, "\n"); } while(0) #else #define _DEBUG(fmt, ...) do {} while(0) #endif typedef struct { MVMint32 key; MVMint32 idx; } UnionFind; typedef struct ValueRef ValueRef; struct ValueRef { MVMint32 tile_idx; MVMint32 value_idx; ValueRef *next; }; struct Hole { MVMint32 start, end; struct Hole *next; }; typedef struct { /* double-ended queue of value refs */ ValueRef *first, *last; /* order number of first and last refs */ MVMint32 start, end; /* list of holes in ascending order */ struct Hole *holes; /* We can have at most two synthetic tiles, one attached to the first * definition and one to the last use... we could also point directly into * the values array of the tile, but it is not directly necessary */ MVMJitTile *synthetic[2]; MVMint8 register_spec; MVMJitStorageClass reg_cls; MVMint32 reg_num; MVMint8 reg_type; MVMint32 spill_pos; MVMint32 spill_idx; } LiveRange; typedef struct { MVMJitCompiler *compiler; /* Sets of values */ UnionFind *sets; /* single buffer for uses, definitions */ ValueRef *refs; MVMint32 refs_num; /* single buffer for number of holes */ struct Hole *holes; MVMint32 holes_top; /* All values ever defined by the register allcoator */ MVM_VECTOR_DECL(LiveRange, values); /* 'Currently' active values */ MVMint32 active_top; MVMint32 active[MAX_ACTIVE]; /* Values still left to do (heap) */ MVM_VECTOR_DECL(MVMint32, worklist); /* Retired values (to be assigned registers) (heap) */ MVM_VECTOR_DECL(MVMint32, retired); /* Spilled values */ MVM_VECTOR_DECL(MVMint32, spilled); /* Register handout ring */ MVMint8 reg_ring[MAX_ACTIVE]; MVMint32 reg_give, reg_take; } RegisterAllocator; /* For first/last ref comparison, the tile indexes are doubled, and the indexes * of synthetics are biased with +1/-1. We use this extra space on the number * line to ensure consistent ordering and expiring behavior for 'synthetic' live * ranges that either start before an instruction (loading a required value) or * end just after one (storing the produced value). Without this, ordering * problems can cause two 'atomic' live ranges to be allocated and expired * before their actual last use */ MVM_STATIC_INLINE MVMint32 order_nr(MVMint32 tile_idx) { return tile_idx * 2; } MVM_STATIC_INLINE MVMint32 is_definition(ValueRef *v) { return (v->value_idx == 0); } MVM_STATIC_INLINE MVMint32 is_arglist_ref(MVMJitTileList *list, ValueRef *v) { return (list->items[v->tile_idx]->op == MVM_JIT_ARGLIST); } MVM_STATIC_INLINE MVMint32 live_range_is_empty(LiveRange *range) { return (range->first == NULL && range->synthetic[0] == NULL && range->synthetic[1] == NULL); } /* allocate a new live range value by pointer-bumping */ MVMint32 live_range_init(RegisterAllocator *alc) { LiveRange *range; MVMint32 idx = alc->values_num++; MVM_VECTOR_ENSURE_SIZE(alc->values, idx); alc->values[idx].spill_idx = INT32_MAX; alc->values[idx].start = INT32_MAX; return idx; } /* append ref to end of queue */ static void live_range_add_ref(RegisterAllocator *alc, LiveRange *range, MVMint32 tile_idx, MVMint32 value_idx) { ValueRef *ref = alc->refs + alc->refs_num++; ref->tile_idx = tile_idx; ref->value_idx = value_idx; if (range->first == NULL) { range->first = ref; } if (range->last != NULL) { range->last->next = ref; } range->last = ref; ref->next = NULL; range->start = MIN(order_nr(tile_idx), range->start); range->end = MAX(order_nr(tile_idx), range->end); } /* merge value ref sets */ static void live_range_merge(LiveRange *a, LiveRange *b) { ValueRef *head = NULL, *tail = NULL; MVMint32 i; _DEBUG("Merging live ranges (%d-%d) and (%d-%d)", (a)->start, (a)->end, (b)->start, (b)->end); if (a->start <= b->start) { head = a->first; a->first = a->first->next; } else { head = b->first; b->first = b->first->next; } tail = head; while (a->first != NULL && b->first != NULL) { if (a->first->tile_idx <= b->first->tile_idx) { tail->next = a->first; a->first = a->first->next; } else { tail->next = b->first; b->first = b->first->next; } tail = tail->next; } while (a->first != NULL) { tail->next = a->first; a->first = a->first->next; tail = tail->next; } while (b->first != NULL) { tail->next = b->first; b->first = b->first->next; tail = tail->next; } a->first = head; a->last = tail; for (i = 0; i < 2; i++) { if (b->synthetic[i] == NULL) { continue; } if (a->synthetic[i] != NULL) { MVM_panic(1, "Can't merge the same synthetic!"); } } a->start = MIN(a->start, b->start); a->end = MAX(a->end, b->end); /* deinitialize the live range */ b->start = INT32_MAX; b->end = 0; } static struct Hole * live_range_has_hole(LiveRange *value, MVMint32 order_nr) { struct Hole *h; /* By construction these are in linear ascending order, and never overlap */ for (h = value->holes; h != NULL && h->start <= order_nr; h = h->next) { if (h->end >= order_nr) return h; } return NULL; } UnionFind * value_set_find(UnionFind *sets, MVMint32 key) { while (sets[key].key != key) { key = sets[key].key; } return sets + key; } MVMint32 value_set_union(UnionFind *sets, LiveRange *values, MVMint32 a, MVMint32 b) { /* dereference the sets to their roots */ a = value_set_find(sets, a)->key; b = value_set_find(sets, b)->key; if (a == b) { /* secretly the same set anyway, could happen in some combinations of * IF, COPY, and DO. */ return a; } if (values[sets[b].idx].start < values[sets[a].idx].start) { /* ensure we're picking the first one to start so that we maintain the * first-definition heap order */ MVMint32 t = a; a = b; b = t; } sets[b].key = a; /* point b to a */ live_range_merge(values + sets[a].idx, values + sets[b].idx); return a; } MVM_STATIC_INLINE void heap_swap(MVMint32 *heap, MVMint32 a, MVMint32 b) { MVMint32 t = heap[a]; heap[a] = heap[b]; heap[b] = t; } /* Functions to maintain a heap of references to the live ranges */ void live_range_heap_down(LiveRange *values, MVMint32 *heap, MVMint32 top, MVMint32 item, MVMint32 (*cmp)(LiveRange *values, MVMint32 a, MVMint32 b)) { while (item < top) { MVMint32 left = item * 2 + 1; MVMint32 right = left + 1; MVMint32 swap; if (right < top) { swap = cmp(values, heap[left], heap[right]) < 0 ? left : right; } else if (left < top) { swap = left; } else { break; } if (cmp(values, heap[swap], heap[item]) < 0) { heap_swap(heap, swap, item); item = swap; } else { break; } } } void live_range_heap_up(LiveRange *values, MVMint32 *heap, MVMint32 item, MVMint32 (*cmp)(LiveRange* values, MVMint32 a, MVMint32 b)) { while (item > 0) { MVMint32 parent = (item-1)/2; if (cmp(values, heap[parent], heap[item]) > 0) { heap_swap(heap, item, parent); item = parent; } else { break; } } } MVMint32 live_range_heap_pop(LiveRange *values, MVMint32 *heap, size_t *top, MVMint32 (*cmp)(LiveRange* values, MVMint32 a, MVMint32 b)) { MVMint32 v = heap[0]; MVMint32 t = --(*top); /* pop by swap and heap-down */ heap[0] = heap[t]; live_range_heap_down(values, heap, t, 0, cmp); return v; } void live_range_heap_push(LiveRange *values, MVMint32 *heap, size_t *top, MVMint32 v, MVMint32 (*cmp)(LiveRange* values, MVMint32 a, MVMint32 b)) { /* NB, caller should use MVM_ENSURE_SPACE prior to calling */ MVMint32 t = (*top)++; heap[t] = v; live_range_heap_up(values, heap, t, cmp); } MVMint32 live_range_heap_peek(LiveRange *values, MVMint32 *heap) { return values[heap[0]].start; } void live_range_heapify(LiveRange *values, MVMint32 *heap, MVMint32 top, MVMint32 (*cmp)(LiveRange* values, MVMint32 a, MVMint32 b)) { MVMint32 i = top, mid = top/2; while (i-- > mid) { live_range_heap_up(values, heap, i, cmp); } } MVMint32 values_cmp_first_ref(LiveRange *values, MVMint32 a, MVMint32 b) { return values[a].start - values[b].start; } MVMint32 values_cmp_last_ref(LiveRange *values, MVMint32 a, MVMint32 b) { return values[a].end - values[b].end; } /* register assignment logic */ #define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0])) #define NEXT_IN_RING(a,x) (((x)+1) == ARRAY_SIZE(a) ? 0 : ((x)+1)) MVMint8 get_register(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitStorageClass reg_cls) { /* ignore storage class for now */ MVMint8 reg_num; reg_num = alc->reg_ring[alc->reg_take]; if (reg_num >= 0) { /* not empty */ alc->reg_ring[alc->reg_take] = -1; /* mark used */ alc->reg_take = NEXT_IN_RING(alc->reg_ring, alc->reg_take); } return reg_num; } void free_register(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitStorageClass reg_cls, MVMint8 reg_num) { if (alc->reg_ring[alc->reg_give] != -1) { MVM_oops(tc, "No space to release register %d to ring", reg_num); } alc->reg_ring[alc->reg_give] = reg_num; alc->reg_give = NEXT_IN_RING(alc->reg_ring, alc->reg_give); } void assign_register(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list, MVMint32 lv, MVMJitStorageClass reg_cls, MVMint8 reg_num) { /* What to do here: * - update tiles using this live range to refer to this register * - update allocator to mark this register as used by this live range */ LiveRange *range = alc->values + lv; ValueRef *ref; MVMint32 i; range->reg_cls = reg_cls; range->reg_num = reg_num; for (ref = range->first; ref != NULL; ref = ref->next) { if (is_arglist_ref(list, ref)) { /* don't assign registers to ARGLIST references, that will never * work */ continue; } else { MVMJitTile *tile = list->items[ref->tile_idx]; tile->values[ref->value_idx] = reg_num; } } for (i = 0; i < 2; i++) { MVMJitTile *tile = range->synthetic[i]; if (tile != NULL) { tile->values[i] = reg_num; } } } MVM_STATIC_INLINE void close_hole(RegisterAllocator *alc, MVMint32 ref, MVMint32 tile_idx) { LiveRange *v = alc->values + ref; if (v->holes && v->holes->start < order_nr(tile_idx)) { v->holes->start = order_nr(tile_idx); _DEBUG("Closed hole in live range %d at %d", ref, order_nr(tile_idx)); } } MVM_STATIC_INLINE void open_hole(RegisterAllocator *alc, MVMint32 ref, MVMint32 tile_idx) { LiveRange *v = alc->values + ref; if (v->start < order_nr(tile_idx) && (v->holes == NULL || v->holes->start > order_nr(tile_idx))) { struct Hole *hole = alc->holes + alc->holes_top++; hole->next = v->holes; hole->start = 0; hole->end = order_nr(tile_idx); v->holes = hole; _DEBUG("Opened hole in live range %d at %d", ref, order_nr(tile_idx)); } } /* Find holes in live ranges, as per Wimmer (2010). This is required only * because the spill-strategy arround CALLs is (sometimes) to load-and-restore, * rather than do a full spill, in the not-so-rare case that many of the live * values will be temporaries and the call is only necessary in a branch */ static void find_holes(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list) { MVMint32 i, j, k; MVMint32 bitmap_size = (alc->values_num >> 5) + 1; /* convenience macros */ #define _BITMAP(_a) (bitmaps + (_a)*bitmap_size) #define _SUCC(_a, _z) (list->blocks[(_a)].succ[(_z)]) MVMBitmap *bitmaps = MVM_calloc(list->blocks_num + 1, sizeof(MVMBitmap) * bitmap_size); /* last bitmap is allocated to hold diff, which is how we know which live * ranges holes potentially need to be closed */ MVMBitmap *diff = _BITMAP(list->blocks_num); for (j = list->blocks_num - 1; j >= 0; j--) { MVMBitmap *live_in = _BITMAP(j); MVMint32 start = list->blocks[j].start, end = list->blocks[j].end; if (list->blocks[j].num_succ == 2) { /* live out is union of successors' live_in */ MVMBitmap *a = _BITMAP(_SUCC(j, 0)), *b = _BITMAP(_SUCC(j, 1)); MVM_bitmap_union(live_in, a, b, bitmap_size); MVM_bitmap_difference(diff, a, b, bitmap_size); for (k = 0; k < bitmap_size; k++) { MVMBitmap additions = diff[k]; while (additions) { MVMint32 bit = MVM_FFS(additions) - 1; MVMint32 val = (k << 6) + bit; close_hole(alc, val, end); MVM_bitmap_delete(&additions, bit); } } } else if (list->blocks[j].num_succ == 1) { memcpy(live_in, _BITMAP(_SUCC(j, 0)), sizeof(MVMBitmap) * bitmap_size); } for (i = end - 1; i >= start; i--) { MVMJitTile *tile = list->items[i]; if (tile->op == MVM_JIT_ARGLIST) { /* list of uses, all very real */ MVMint32 nchild = list->tree->nodes[tile->node + 1]; for (k = 0; k < nchild; k++) { MVMint32 carg = list->tree->nodes[tile->node + 2 + k]; MVMint32 ref = value_set_find(alc->sets, list->tree->nodes[carg + 1])->idx; if (!MVM_bitmap_get(live_in, ref)) { MVM_bitmap_set(live_in, ref); close_hole(alc, ref, i); } } } else if (tile->op == MVM_JIT_IF || tile->op == MVM_JIT_DO || tile->op == MVM_JIT_COPY) { /* not a real use, no work needed here (we already merged them) */ } else { /* If a value is used and defined by the same tile, then the * 'hole' only covers that single tile. The definitions must * therefore be handled before the uses */ if (MVM_JIT_TILE_YIELDS_VALUE(tile)) { MVMint32 ref = value_set_find(alc->sets, tile->node)->idx; open_hole(alc, ref, i); MVM_bitmap_delete(live_in, ref); } for (k = 0; k < tile->num_refs; k++) { if (MVM_JIT_REGISTER_IS_USED(MVM_JIT_REGISTER_FETCH(tile->register_spec, k+1))) { MVMint32 ref = value_set_find(alc->sets, tile->refs[k])->idx; if (!MVM_bitmap_get(live_in, ref)) { MVM_bitmap_set(live_in, ref); close_hole(alc, ref, i); } } } } } } MVM_free(bitmaps); #undef _BITMAP #undef _SUCC } static void determine_live_ranges(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list) { MVMJitExprTree *tree = list->tree; MVMint32 i, j, n; MVMint32 num_phi = 0; /* pessimistic but correct upper bound of number of holes */ alc->sets = MVM_calloc(tree->nodes_num, sizeof(UnionFind)); /* up to 4 refs per tile (1 out, 3 in) plus the number of refs per arglist */ alc->refs = MVM_calloc(list->items_num * 4 + list->num_arglist_refs, sizeof(ValueRef)); alc->refs_num = 0; MVM_VECTOR_INIT(alc->values, list->items_num); MVM_VECTOR_INIT(alc->worklist, list->items_num); for (i = 0; i < list->items_num; i++) { MVMJitTile *tile = list->items[i]; MVMint32 node = tile->node; /* Each of the following counts as either an alias or as a PHI (in case * of IF), and thus these are not actual definitions */ if (tile->op == MVM_JIT_COPY) { MVMint32 ref = tree->nodes[tile->node + 1]; _DEBUG("Unify COPY node (%d -> %d)", tile->node, ref); alc->sets[node].key = ref; /* point directly to actual definition */ } else if (tile->op == MVM_JIT_DO) { MVMint32 nchild = tree->nodes[tile->node + 1]; MVMint32 ref = tree->nodes[tile->node + 1 + nchild]; _DEBUG("Unify COPY DO (%d -> %d)", tile->node, ref); alc->sets[node].key = ref; } else if (tile->op == MVM_JIT_IF) { MVMint32 left_cond = tree->nodes[tile->node + 2]; MVMint32 right_cond = tree->nodes[tile->node + 3]; /* NB; this may cause a conflict in register requirements, in which * case we should resolve it by creating a new live range or inserting * a copy */ alc->sets[node].key = value_set_union(alc->sets, alc->values, left_cond, right_cond); _DEBUG("Merging nodes %d and %d to %d (result key = %d)", left_cond, right_cond, node, alc->sets[node].key); num_phi++; } else if (tile->op == MVM_JIT_ARGLIST) { MVMint32 num_args = tree->nodes[tile->node + 1]; MVMJitExprNode *refs = tree->nodes + tile->node + 2; _DEBUG("Adding %d references to ARGLIST node", num_args); for (j = 0; j < num_args; j++) { MVMint32 carg = refs[j]; MVMint32 value = list->tree->nodes[carg+1]; MVMint32 idx = value_set_find(alc->sets, value)->idx; _DEBUG(" Reference %d", idx); live_range_add_ref(alc, alc->values + idx, i, j + 1); /* include the CALL node into the arglist child range, so we * don't release them too early */ alc->values[idx].end = MAX(alc->values[idx].end, order_nr(i + 1)); } } else { /* create a live range if necessary */ if (MVM_JIT_TILE_YIELDS_VALUE(tile)) { MVMint8 register_spec = MVM_JIT_REGISTER_FETCH(tile->register_spec, 0); MVMint32 idx = live_range_init(alc); alc->sets[node].key = node; alc->sets[node].idx = idx; _DEBUG("Create live range %d (tile=%d, node=%d)", idx,i, node); live_range_add_ref(alc, alc->values + idx, i, 0); if (MVM_JIT_REGISTER_HAS_REQUIREMENT(register_spec)) { alc->values[idx].register_spec = register_spec; } MVM_VECTOR_PUSH(alc->worklist, idx); } /* account for uses */ for (j = 0; j < tile->num_refs; j++) { MVMint8 register_spec = MVM_JIT_REGISTER_FETCH(tile->register_spec, j+1); /* any 'use' register requirements are handled in the allocation step */ if (MVM_JIT_REGISTER_IS_USED(register_spec)) { MVMint32 idx = value_set_find(alc->sets, tile->refs[j])->idx; _DEBUG("Adding reference to live range %d from tile %d", idx, i); live_range_add_ref(alc, alc->values + idx, i, j + 1); } } } if (MVM_JIT_TILE_YIELDS_VALUE(tile) && tree->info[node].opr_type != 0) { LiveRange *range = alc->values + value_set_find(alc->sets, node)->idx; _ASSERT(range->reg_type == 0 || (range->reg_type << 3) == tree->info[node].opr_type, "Register types do not match between value and node"); /* shift to match MVM_reg_types. should arguably be a macro maybe */ range->reg_type = tree->info[node].opr_type >> 3; _DEBUG( "Assigned type: %d to live range %d\n", range->reg_type, range - alc->values); } } if (num_phi > 0) { /* If there are PHI nodes, there will be holes. * The array allocated here will be used to construct linked lists */ alc->holes = MVM_malloc(num_phi * sizeof(struct Hole)); alc->holes_top = 0; find_holes(tc, alc, list); /* eliminate empty values from the worklist */ for (i = 0, j = 0; j < alc->worklist_num; j++) { if (!live_range_is_empty(alc->values + alc->worklist[j])) { alc->worklist[i++] = alc->worklist[j]; } } alc->worklist_num = i; } else { alc->holes = NULL; alc->holes_top = 0; } } /* The code below needs some thinking... */ static void active_set_add(MVMThreadContext *tc, RegisterAllocator *alc, MVMint32 a) { /* the original linear-scan heuristic for spilling is to take the last value * in the set to expire, freeeing up the largest extent of code... that is a * reasonably good heuristic, albeit not essential to the concept of linear * scan. It makes sense to keep the stack ordered at all times (simplest by * use of insertion sort). Although insertion sort is O(n^2), n is never * large in this case (32 for RISC architectures, maybe, if we ever support * them; 7 for x86-64. So the time spent on insertion sort is always small * and bounded by a constant, hence O(1). Yes, algorithmics works this way * :-) */ MVMint32 i; for (i = 0; i < alc->active_top; i++) { MVMint32 b = alc->active[i]; if (alc->values[b].end > alc->values[a].end) { /* insert a before b */ memmove(alc->active + i + 1, alc->active + i, sizeof(MVMint32)*(alc->active_top - i)); alc->active[i] = a; alc->active_top++; return; } } /* append at the end */ alc->active[alc->active_top++] = a; } /* Take live ranges from active_set whose last use was after position and append them to the retired list */ static void active_set_expire(MVMThreadContext *tc, RegisterAllocator *alc, MVMint32 order_nr) { MVMint32 i; for (i = 0; i < alc->active_top; i++) { MVMint32 v = alc->active[i]; MVMint8 reg_num = alc->values[v].reg_num; if (alc->values[v].end > order_nr) { break; } else { _DEBUG("Live range %d is out of scope (last ref %d, %d) and releasing register %d", v, alc->values[v].end, order_nr, reg_num); free_register(tc, alc, MVM_JIT_STORAGE_GPR, reg_num); } } /* shift off the first x values from the live set. */ if (i > 0) { MVM_VECTOR_APPEND(alc->retired, alc->active, i); alc->active_top -= i; if (alc->active_top > 0) { memmove(alc->active, alc->active + i, sizeof(alc->active[0]) * alc->active_top); } } } /* Compute the earliest live range that is still active. */ static MVMint32 earliest_active_value(MVMThreadContext *tc, RegisterAllocator *alc, MVMint32 tile_order_nr) { /* can we cache this, and does it make sense to do so? */ int i; for (i = 0; i < alc->active_top; i++) { tile_order_nr = MIN(tile_order_nr, alc->values[alc->active[i]].start); } return tile_order_nr; } static void active_set_splice(MVMThreadContext *tc, RegisterAllocator *alc, MVMint32 to_splice) { MVMint32 i ; /* find (reverse, because it's usually the last); predecrement alc->active_top because we're removing one item */ for (i = --alc->active_top; i >= 0; i--) { if (alc->active[i] == to_splice) break; } if (i >= 0 && i < alc->active_top) { /* shift out */ memmove(alc->active + i, alc->active + i + 1, sizeof(alc->active[0]) * alc->active_top - i); } } static void spill_set_release(MVMThreadContext *tc, RegisterAllocator *alc, MVMint32 order_nr) { while (alc->spilled_num > 0 && alc->values[alc->spilled[0]].end <= order_nr) { MVMint32 spilled = live_range_heap_pop(alc->values, alc->spilled, &alc->spilled_num, values_cmp_last_ref); LiveRange *value = alc->values + spilled; _DEBUG("VM Register %d for live range %d can be released", value->spill_pos / sizeof(MVMRegister), spilled); MVM_jit_spill_memory_release(tc, alc->compiler, value->spill_pos, value->reg_type); } } static MVMint32 insert_load_before_use(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list, ValueRef *ref, MVMint32 load_pos) { MVMint32 n = live_range_init(alc); MVMJitTile *tile = MVM_jit_tile_make(tc, alc->compiler, MVM_jit_compile_load, 2, 1, MVM_JIT_STORAGE_LOCAL, load_pos, 0); LiveRange *range = alc->values + n; MVM_jit_tile_list_insert(tc, list, tile, ref->tile_idx - 1, +1); /* insert just prior to use */ range->synthetic[0] = tile; range->first = range->last = ref; range->start = order_nr(ref->tile_idx) - 1; range->end = order_nr(ref->tile_idx); return n; } static MVMint32 insert_store_after_definition(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list, ValueRef *ref, MVMint32 store_pos) { MVMint32 n = live_range_init(alc); MVMJitTile *tile = MVM_jit_tile_make(tc, alc->compiler, MVM_jit_compile_store, 2, 2, MVM_JIT_STORAGE_LOCAL, store_pos, 0, 0); LiveRange *range = alc->values + n; MVM_jit_tile_list_insert(tc, list, tile, ref->tile_idx, -1); /* insert just after storage */ range->synthetic[1] = tile; range->first = range->last = ref; range->start = order_nr(ref->tile_idx); range->end = order_nr(ref->tile_idx) + 1; return n; } static MVMint32 select_live_range_for_spill(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list, MVMint32 code_pos) { return alc->active[alc->active_top-1]; } static void live_range_spill(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list, MVMint32 to_spill, MVMint32 spill_pos, MVMint32 code_pos) { MVMint8 reg_spilled = alc->values[to_spill].reg_num; /* loop over all value refs */ _DEBUG("Spilling live range value %d to memory position %d at %d", to_spill, spill_pos, code_pos); while (alc->values[to_spill].first != NULL) { /* make a new live range */ MVMint32 n; /* shift current ref */ ValueRef *ref = alc->values[to_spill].first; alc->values[to_spill].first = ref->next; ref->next = NULL; if (is_arglist_ref(list, ref) && order_nr(ref->tile_idx) > code_pos) { /* Never insert a load before a future ARGLIST; ARGLIST may easily * consume more registers than we have available. Past ARGLISTs have * already been handled, so we do need to insert a load a before * them (or modify in place, but, complex!). */ continue; } else if (is_definition(ref)) { n = insert_store_after_definition(tc, alc, list, ref, spill_pos); } else { n = insert_load_before_use(tc, alc, list, ref, spill_pos); } if (order_nr(ref->tile_idx) < code_pos) { /* in the past, which means we can safely use the spilled register * and immediately retire this live range */ assign_register(tc, alc, list, n, MVM_JIT_STORAGE_GPR, reg_spilled); MVM_VECTOR_PUSH(alc->retired, n); } else { /* in the future, which means we need to add it to the worklist */ MVM_VECTOR_ENSURE_SPACE(alc->worklist, 1); live_range_heap_push(alc->values, alc->worklist, &alc->worklist_num, n, values_cmp_first_ref); } } /* clear value references */ alc->values[to_spill].last = NULL; /* mark as spilled and store the spill position */ alc->values[to_spill].spill_pos = spill_pos; alc->values[to_spill].spill_idx = code_pos; free_register(tc, alc, MVM_JIT_STORAGE_GPR, reg_spilled); MVM_VECTOR_ENSURE_SPACE(alc->spilled, 1); live_range_heap_push(alc->values, alc->spilled, &alc->spilled_num, to_spill, values_cmp_last_ref); } static void prepare_arglist_and_call(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list, MVMint32 arglist_idx, MVMint32 call_idx) { MVMJitTile *arglist_tile = list->items[arglist_idx], *call_tile = list->items[call_idx]; MVMJitExprTree *tree = list->tree; MVMint32 num_args = tree->nodes[arglist_tile->node + 1]; MVMint32 arg_values[16]; MVMJitStorageRef storage_refs[16]; struct { MVMint8 in_reg; /* register number that is to be moved in */ MVMint8 num_out; /* how many values need to make room for me */ } topological_map[MVM_JIT_ARCH_NUM_GPR]; /* reverse map for topological sort of moves */ struct { MVMint8 reg_num; MVMint8 stack_pos; } stack_transfer[16]; MVMint32 stack_transfer_top = 0; MVMint8 transfer_queue[16]; MVMint32 transfer_queue_idx, transfer_queue_top = 0, transfers_required = 0; MVMint8 spilled_args[16]; MVMint32 spilled_args_top = 0; MVMBitmap call_bitmap = 0, arg_bitmap = 0; MVMint8 spare_register; MVMint32 i, j, ins_pos = 2; /* get storage positions for arglist */ MVM_jit_arch_storage_for_arglist(tc, alc->compiler, tree, arglist_tile->node, storage_refs); /* get value refs for arglist */ for (i = 0; i < num_args; i++) { MVMint32 carg = tree->nodes[arglist_tile->node + 2 + i]; /* may refer to spilled live range */ arg_values[i] = value_set_find(alc->sets, tree->nodes[carg + 1])->idx; } _DEBUG("prepare_call: Got %d args", num_args); /* initialize topological map, use -1 as 'undefined' inboud value */ for (i = 0; i < ARRAY_SIZE(topological_map); i++) { topological_map[i].num_out = 0; topological_map[i].in_reg = -1; } for (i = 0, j = 0; i < alc->active_top; i++) { LiveRange *v = alc->values + alc->active[i]; MVMint32 code_pos = order_nr(call_idx); if (v->end > code_pos && live_range_has_hole(v, code_pos) == NULL) { /* surviving values need to be spilled */ MVMint32 spill_pos = MVM_jit_spill_memory_select(tc, alc->compiler, v->reg_type); /* spilling at the CALL idx will mean that the spiller inserts a * LOAD at the current register before the ARGLIST, meaning it * remains 'live' for this ARGLIST */ _DEBUG("Spilling %d to %d at %d", alc->active[i], spill_pos, code_pos); live_range_spill(tc, alc, list, alc->active[i], spill_pos, code_pos); } else { /* compact the active set */ alc->active[j++] = alc->active[i]; } } alc->active_top = j; for (i = 0; i < num_args; i++) { LiveRange *v = alc->values + arg_values[i]; if (v->spill_idx < order_nr(call_idx)) { /* spilled prior to the ARGLIST/CALL */ spilled_args[spilled_args_top++] = i; } else if (storage_refs[i]._cls == MVM_JIT_STORAGE_GPR) { MVMint8 reg_num = storage_refs[i]._pos; if (reg_num != v->reg_num) { _DEBUG("Transfer Rq(%d) -> Rq(%d)", reg_num, v->reg_num); topological_map[reg_num].in_reg = v->reg_num; topological_map[v->reg_num].num_out++; transfers_required++; } else { _DEBUG("Transfer Rq(%d) not required", reg_num); } } else if (storage_refs[i]._cls == MVM_JIT_STORAGE_STACK) { /* enqueue for stack transfer */ stack_transfer[stack_transfer_top].reg_num = v->reg_num; stack_transfer[stack_transfer_top].stack_pos = storage_refs[i]._pos; stack_transfer_top++; /* count the outbound edge */ topological_map[v->reg_num].num_out++; } else { NYI(this_storage_class); } /* set bitmap */ if (storage_refs[i]._cls == MVM_JIT_STORAGE_GPR) { MVMint8 reg_num = storage_refs[i]._pos; MVM_bitmap_set(&arg_bitmap, reg_num); } } _DEBUG("%d transfers required", transfers_required); #define INSERT_TILE(_tile, _pos, _order) MVM_jit_tile_list_insert(tc, list, _tile, _pos, _order) #define INSERT_NEXT_TILE(_tile) INSERT_TILE(_tile, arglist_idx, ins_pos++) #define MAKE_TILE(_code, _narg, _nval, ...) MVM_jit_tile_make(tc, alc->compiler, MVM_jit_compile_ ## _code, _narg, _nval, __VA_ARGS__) #define INSERT_MOVE(_a, _b) INSERT_NEXT_TILE(MAKE_TILE(move, 0, 2, _a, _b)) #define INSERT_COPY_TO_STACK(_s, _r) INSERT_NEXT_TILE(MAKE_TILE(store, 2, 2, MVM_JIT_STORAGE_STACK, _s, 0, _r)) #define INSERT_LOAD_LOCAL(_r, _l) INSERT_NEXT_TILE(MAKE_TILE(load, 2, 1, MVM_JIT_STORAGE_LOCAL, _l, _r)) #define INSERT_LOCAL_STACK_COPY(_s, _l) \ INSERT_NEXT_TILE(MAKE_TILE(memory_copy, 4, 2, MVM_JIT_STORAGE_STACK, _s, MVM_JIT_STORAGE_LOCAL, _l, 0, spare_register)) #define ENQUEUE_TRANSFER(_r) do { \ MVMint8 _c = (_r); \ if (--(topological_map[(_c)].num_out) == 0 && \ topological_map[(_c)].in_reg >= 0) { \ transfer_queue[transfer_queue_top++] = _c; \ } \ } while(0) /* resolve conflicts for CALL; since we're spilling any surviving bits, * we can just move it to any free registers. */ for (i = 0; i < call_tile->num_refs; i++) { MVMint8 spec = MVM_JIT_REGISTER_FETCH(call_tile->register_spec, i + 1); if (MVM_JIT_REGISTER_IS_USED(spec)) { MVMint8 reg = call_tile->values[i+1]; MVM_bitmap_set(&call_bitmap, reg); } } while (call_bitmap & arg_bitmap) { MVMuint32 free_reg = ~(call_bitmap | arg_bitmap | NVR_GPR_BITMAP); /* FFS counts registers starting from 1 */ MVMuint8 src = MVM_FFS(call_bitmap & arg_bitmap) - 1; MVMuint8 dst = MVM_FFS(free_reg) - 1; _ASSERT(free_reg != 0, "JIT: need to move a register but nothing is free"); /* add edge */ topological_map[dst].in_reg = src; topological_map[src].num_out++; /* update bitmap */ MVM_bitmap_delete(&call_bitmap, src); MVM_bitmap_set(&call_bitmap, dst); /* update CALL args */ for (i = 0; i < call_tile->num_refs; i++) { if (call_tile->values[i+1] == src) { call_tile->values[i+1] = dst; } } } /* at this point, all outbound edges have been created, and none have been * processed yet, so we can eqnueue all 'free' transfers */ for (i = 0; i < ARRAY_SIZE(topological_map); i++) { if (topological_map[i].num_out == 0 && topological_map[i].in_reg >= 0) { _DEBUG("Directly transfer %d -> %d", topological_map[i].in_reg, i); transfer_queue[transfer_queue_top++] = i; } } /* with call_bitmap and arg_bitmap given, we can determine the spare * register used for allocation; NB this may only be necessary in some * cases */ spare_register = MVM_FFS(~(call_bitmap | arg_bitmap | NVR_GPR_BITMAP)) - 1; _ASSERT(spare_register >= 0, "JIT: No spare register for moves"); for (i = 0; i < stack_transfer_top; i++) { MVMint8 reg_num = stack_transfer[i].reg_num; MVMint8 stk_pos = stack_transfer[i].stack_pos; INSERT_COPY_TO_STACK(stk_pos, reg_num); _DEBUG("Insert stack parameter: Rq(%d) -> [rsp+%d]", reg_num, stk_pos); ENQUEUE_TRANSFER(reg_num); } for (transfer_queue_idx = 0; transfer_queue_idx < transfer_queue_top; transfer_queue_idx++) { MVMint8 dst = transfer_queue[transfer_queue_idx]; MVMint8 src = topological_map[dst].in_reg; _ASSERT(src >= 0, "No inboud edge (sort)"); _DEBUG("Insert move (toposort): Rq(%d) -> Rq(%d)", src, dst); INSERT_MOVE(dst, src); ENQUEUE_TRANSFER(src); } if (transfer_queue_top < transfers_required) { /* suppose we have a cycle of transfers: a -> b -> c -> a; * since we only keep the one inbound register as a reference, the chain * is really: * a -> c -> b -> a * We can 'break' this cycle by walking the chain in this order, first * moving 'a' out of thee way into a spare register, then moving c to a, * b to c, and finally moving the spare register to 'b' */ for (i = 0; i < MVM_JIT_ARCH_NUM_GPR; i++) { if (topological_map[i].num_out > 0) { MVMint8 src, dst; INSERT_MOVE(spare_register, i); _ASSERT(--(topological_map[i].num_out) == 0, "More than one outbound edge in cycle"); for (dst = i, src = topological_map[i].in_reg; src != i; dst = src, src = topological_map[src].in_reg) { _ASSERT(src >= 0, "No inbound edge (cycle breaking)"); INSERT_MOVE(dst, src); _ASSERT(--(topological_map[src].num_out) == 0, "More than one outbound edge in cycle"); } INSERT_MOVE(dst, spare_register); } } } /* now all that remains is to deal with spilled values */ for (i = 0; i < spilled_args_top; i++) { MVMint32 arg = spilled_args[i]; LiveRange *v = alc->values + arg_values[arg]; if (storage_refs[arg]._cls == MVM_JIT_STORAGE_GPR) { _DEBUG("Loading spilled value to Rq(%d) from [rbx+%d]", storage_refs[arg]._pos, v->spill_pos); INSERT_LOAD_LOCAL(storage_refs[arg]._pos, v->spill_pos); } else if (storage_refs[arg]._cls == MVM_JIT_STORAGE_STACK) { INSERT_LOCAL_STACK_COPY(storage_refs[arg]._pos, v->spill_pos); } else { NYI(storage_class); } } } MVM_STATIC_INLINE void process_tile(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list, MVMint32 tile_cursor) { MVMJitTile *tile = list->items[tile_cursor]; if (tile->op == MVM_JIT_ARGLIST) { MVMint32 arglist_idx = tile_cursor; MVMint32 call_idx = tile_cursor + 1; _ASSERT(call_idx < list->items_num && (list->items[call_idx]->op == MVM_JIT_CALL || list->items[call_idx]->op == MVM_JIT_CALLV), "ARGLIST tiles must be followed by CALL"); } else if (tile->op == MVM_JIT_CALL || tile->op == MVM_JIT_CALLV) { MVMint32 arglist_idx = tile_cursor - 1; MVMint32 call_idx = tile_cursor; _ASSERT(tile_cursor > 0 && list->items[tile_cursor - 1]->op == MVM_JIT_ARGLIST, "CALL must be preceded by ARGLIST"); /* * CALL nodes can use values in registers, for example for dynamic * calls. These registers may conflict with registers used in ARGLIST, * in which case prepare_arglist_and_call will move the values to a free * register and update the call tile. * * However, as regular register-allocated values, the selected register * may be allocated for a synthetic LOAD tile after it had previously * been spilled. To ensure that allocation for the synthetic tile does * not overwrite the free register picked by the resolution code, we * must be sure that prepare_arglist_and_call will be run *after* all * registers have been allocated for the values used by the CALL tile. * * That is why prepare_arglist_and_call, which handles BOTH tiles, is * called for the CALL tile and not for the ARGLIST tile. */ prepare_arglist_and_call(tc, alc, list, arglist_idx, call_idx); } else { MVMint32 i; /* deal with 'use' registers */ for (i = 1; i < tile->num_refs; i++) { MVMint8 spec = MVM_JIT_REGISTER_FETCH(tile->register_spec, i); if (MVM_JIT_REGISTER_IS_USED(spec) && MVM_JIT_REGISTER_HAS_REQUIREMENT(spec)) { /* we could use the register map here, but let's wait and see */ NYI(tile_use_requirements); } } } } static void process_live_range(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list, MVMint32 v) { MVMint8 reg; MVMint32 tile_order_nr = alc->values[v].start; if (MVM_JIT_REGISTER_HAS_REQUIREMENT(alc->values[v].register_spec)) { reg = MVM_JIT_REGISTER_REQUIREMENT(alc->values[v].register_spec); if (MVM_bitmap_get((MVMBitmap*)&NVR_GPR_BITMAP, reg)) { assign_register(tc, alc, list, v, MVM_JIT_STORAGE_NVR, reg); } else { /* TODO; might require swapping / spilling */ NYI(general_purpose_register_spec); } } else { while ((reg = get_register(tc, alc, MVM_JIT_STORAGE_GPR)) < 0) { /* choose a live range, a register to spill, and a spill location */ MVMint32 to_spill = select_live_range_for_spill(tc, alc, list, tile_order_nr); MVMint32 spill_pos = MVM_jit_spill_memory_select(tc, alc->compiler, alc->values[to_spill].reg_type); active_set_splice(tc, alc, to_spill); _DEBUG("Spilling live range %d at %d to %d to free up a register", to_spill, tile_order_nr, spill_pos); live_range_spill(tc, alc, list, to_spill, spill_pos, tile_order_nr); } assign_register(tc, alc, list, v, MVM_JIT_STORAGE_GPR, reg); active_set_add(tc, alc, v); } } static void linear_scan(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list) { MVMint32 tile_cursor = 0; MVM_VECTOR_INIT(alc->retired, alc->worklist_num); MVM_VECTOR_INIT(alc->spilled, 8); _DEBUG("STARTING LINEAR SCAN: %d/%d", tc->instance->jit_seq_nr, list->tree->seq_nr); /* loop over all tiles and peek on the value heap */ while (tile_cursor < list->items_num) { if (alc->worklist_num > 0 && live_range_heap_peek(alc->values, alc->worklist) < order_nr(tile_cursor)) { /* we've processed all tiles prior to this live range */ MVMint32 live_range = live_range_heap_pop(alc->values, alc->worklist, &alc->worklist_num, values_cmp_first_ref); MVMint32 tile_order_nr = alc->values[live_range].start; _DEBUG("Processing live range %d (first ref %d, last ref %d)", live_range, alc->values[live_range].start, alc->values[live_range].end); active_set_expire(tc, alc, tile_order_nr); /* We can release the spill slot only if there is no more active * live range overlapping with its extent. Otherwise, when we reuse * the slot, we risk overwriting a useful value. * * We pass the current tile_order_nr as the upper bound (e.g. when * there may be no active live ranges, slots will still be useful if * they have later uses */ spill_set_release(tc, alc, earliest_active_value(tc,alc, tile_order_nr)); process_live_range(tc, alc, list, live_range); } else { /* still have tiles to process, increment cursor */ process_tile(tc, alc, list, tile_cursor++); } } /* flush active live ranges */ active_set_expire(tc, alc, order_nr(list->items_num) + 1); spill_set_release(tc, alc, order_nr(list->items_num) + 1); _DEBUG("END OF LINEAR SCAN%s","\n"); } void MVM_jit_linear_scan_allocate(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJitTileList *list) { RegisterAllocator alc; /* initialize allocator */ alc.compiler = compiler; /* restart spill stack */ alc.active_top = 0; memset(alc.active, -1, sizeof(alc.active)); alc.reg_give = alc.reg_take = 0; memcpy(alc.reg_ring, available_gpr, sizeof(available_gpr)); /* run algorithm */ determine_live_ranges(tc, &alc, list); linear_scan(tc, &alc, list); /* deinitialize allocator */ MVM_free(alc.sets); MVM_free(alc.refs); MVM_free(alc.holes); MVM_free(alc.values); MVM_free(alc.worklist); MVM_free(alc.retired); MVM_free(alc.spilled); /* make edits effective */ MVM_jit_tile_list_edit(tc, list); }