view src/strings/unicode_ops.c @ 20:ae67093f0e62

fix code segment
author Takahiro SHIMIZU <anatofuz@cr.ie.u-ryukyu.ac.jp>
date Tue, 30 Oct 2018 18:40:24 +0900
parents 2cf249471370
children
line wrap: on
line source

/* Compares two strings, using the Unicode Collation Algorithm
 * Return values:
 *    0   The strings are identical for the collation levels requested
 * -1/1   String a is less than string b/String a is greater than string b
 *
 * `collation_mode` acts like a bitmask. Each of primary, secondary and tertiary
 * collation levels can be either: disabled, enabled, reversed.
 * In the table below, where + designates sorting normal direction and
 * - indicates reversed sorting for that collation level.
 *
 * Collation level | bitmask value
 *        Primary+ |   1
 *        Primary- |   2
 *      Secondary+ |   4
 *      Secondary- |   8
 *       Tertiary+ |  16
 *       Tertiary- |  32
 *     Quaternary+ |  64
 *     Quaternary- | 128
 */
/* Finds the lowest codepoint of the next subnode. If there's no next subnode,
 * returns -1 */
#define MVM_COLLATION_PRIMARY_POSITIVE      1
#define MVM_COLLATION_PRIMARY_NEGATIVE      2
#define MVM_COLLATION_SECONDARY_POSITIVE    4
#define MVM_COLLATION_SECONDARY_NEGATIVE    8
#define MVM_COLLATION_TERTIARY_POSITIVE    16
#define MVM_COLLATION_TERTIARY_NEGATIVE    32
#define MVM_COLLATION_QUATERNARY_POSITIVE  64
#define MVM_COLLATION_QUATERNARY_NEGATIVE 128

MVM_STATIC_INLINE MVMint64 next_node_min (sub_node node) {
    return node.sub_node_elems
        ? main_nodes[node.sub_node_link].codepoint
        : -1;
}
/* Finds the highest codepoint of the next subnode. If there's no next subnode,
 * returns -1 */
MVM_STATIC_INLINE MVMint64 next_node_max (sub_node node) {
    return node.sub_node_elems
        ? main_nodes[node.sub_node_link + node.sub_node_elems - 1].codepoint
        : -1;
}
typedef union collation_key_u collation_key;
struct collation_stack {
    collation_key *keys;
    MVMint64 stack_top;
    MVMint64 stack_size;
};
typedef struct collation_stack collation_stack;

struct collation_key_s {
    MVMuint32 primary, secondary, tertiary, index;
};
union collation_key_u {
    struct collation_key_s s;
    MVMuint32              a[4];
};
struct level_eval_s2 {
    MVMint32 Less, Same, More;
};
union level_eval_u2 {
    MVMint32 a2[4];
    struct level_eval_s2 s2;
};
struct level_eval_s {
    union level_eval_u2 primary, secondary, tertiary, quaternary;
};
union level_eval_u {
    struct level_eval_s s;
    union  level_eval_u2 a[3];
};
typedef union level_eval_u level_eval;
#define initial_stack_size 100
#define collation_zero 1
struct ring_buffer {
    MVMCodepoint codes[codepoint_sequence_no_max];
    MVMuint32    count;
    MVMint32  location;
    MVMCodepoint codes_out[codepoint_sequence_no_max];
    MVMuint32    codes_out_count;
};
typedef struct ring_buffer ring_buffer;
#ifdef COLLATION_DEBUG
    static void print_sub_node (sub_node subnode) {
        MVMint64 min    = next_node_min(subnode);
        char * min_sign = min < 0 ? "-" : "";
        MVMint64 max    = next_node_max(subnode);
        char * max_sign = max < 0 ? "-" : "";
        max = max < 0 ? -max : max;
        min = min < 0 ? -min : min;
        fprintf(stderr,
            "{codepoint 0x%X, next_node_min %s0x%"PRIX64", next_node_max %s0x%"PRIX64", "
                "sub_node_elems %i, sub_node_link %i, "
                    "collation_key_elems %i, collation_key_link %i}\n",
            subnode.codepoint, min_sign, min, max_sign, max,
                subnode.sub_node_elems, subnode.sub_node_link,
                    subnode.collation_key_elems, subnode.collation_key_link);
    }
    static void print_stack (MVMThreadContext *tc, collation_stack *stack, char *name, char *details) {
        int i = 0;
        fprintf(stderr, "stack_%s ā€œ%sā€ print_stack() stack elems: %li\n", name, details, stack->stack_top + 1);
        for (i = 0; i < stack->stack_top + 1; i++) {
            fprintf(stderr, "stack_%s i: %i [%.4X.%.4X.%.4X]\n", name, i, stack->keys[i].s.primary, stack->keys[i].s.secondary, stack->keys[i].s.tertiary);
            if (30 < i) {
                fprintf(stderr, "Not printing any more of the stack. Too large\n");
                break;
            }
        }
    }
    static void print_ring_buffer(MVMThreadContext *tc, ring_buffer *buffer) {
        MVMint64 i;
        fprintf(stderr, "Buffer: count: %"PRIu32" location %"PRIi32"\nBuffer contents: ", buffer->count, buffer->location);
        for (i = 0; i < buffer->count && i < codepoint_sequence_no_max; i++) {
            fprintf(stderr, "i: %"PRIi64": cp: 0x%X ", i, buffer->codes[i]);
            if (30 < i) {
                fprintf(stderr, "Not printing any more of the buffer. Too large\n");
                break;
            }
        }
        fprintf(stderr, "\n");
    }
    #define DEBUG_COLLATION_MODE_PRINT(level_eval_settings) {\
        fprintf(stderr, "Setting collation_mode: %li\nSetting primary   {%i,%i,%i}\n"\
            "Setting secondary {%i,%i,%i}\nSetting tertiary  {%i,%i,%i}\n", collation_mode,\
            level_eval_settings.a[0].a2[0], level_eval_settings.a[0].a2[1], level_eval_settings.a[0].a2[2],\
            level_eval_settings.a[1].a2[0], level_eval_settings.a[1].a2[1], level_eval_settings.a[1].a2[2],\
            level_eval_settings.a[2].a2[0], level_eval_settings.a[2].a2[1], level_eval_settings.a[2].a2[2]);\
    }
    #define DEBUG_PRINT_SPECIAL_PUSHED(what, name, cp) fprintf(stderr, "Special Pushed 0x%X %s onto stack_%s\n", cp, what, name);
    #define DEBUG_PRINT_SUB_NODE(subnode) print_sub_node(subnode);
    #define DEBUG_SPECIAL_PUSHED(block_pushed, name) block_pushed = name;
    #define DEBUG_PRINT_STACK(tc, stack, name, details) print_stack(tc, stack, name, details);
    #define DEBUG_PRINT_RING_BUFFER(tc, buffer) print_ring_buffer(tc, buffer);
    #define DEBUG_PRINT(...) fprintf (stderr, __VA_ARGS__)
#else
    #define DEBUG_PRINT_SUB_NODE(subnode)
    #define DEBUG_PRINT_STACK(tc, stack, name, details)
    #define DEBUG_SPECIAL_PUSHED(block_pushed, name)
    #define DEBUG_PRINT_SPECIAL_PUSHED(what, name, cp)
    #define DEBUG_PRINT_RING_BUFFER(tc, buffer)
    #define DEBUG_COLLATION_MODE_PRINT(level_eval_settings)
    #define DEBUG_PRINT(...)
#endif
MVMint32 MVM_unicode_collation_primary (MVMThreadContext *tc, MVMint32 codepoint) {
     return MVM_unicode_codepoint_get_property_int(tc, codepoint, MVM_UNICODE_PROPERTY_MVM_COLLATION_PRIMARY);
}
MVMint32 MVM_unicode_collation_secondary (MVMThreadContext *tc, MVMint32 codepoint) {
     return MVM_unicode_codepoint_get_property_int(tc, codepoint, MVM_UNICODE_PROPERTY_MVM_COLLATION_SECONDARY);
}
MVMint32 MVM_unicode_collation_tertiary (MVMThreadContext *tc, MVMint32 codepoint) {
     return MVM_unicode_codepoint_get_property_int(tc, codepoint, MVM_UNICODE_PROPERTY_MVM_COLLATION_TERTIARY);
}
MVMint32 MVM_unicode_collation_quickcheck (MVMThreadContext *tc, MVMint32 codepoint) {
    return MVM_unicode_codepoint_get_property_int(tc, codepoint, MVM_UNICODE_PROPERTY_MVM_COLLATION_QC);
}
static MVMint64 collation_push_cp (MVMThreadContext *tc, collation_stack *stack, MVMCodepointIter *ci, int *cp_maybe, int cp_num, char *name);
static void init_stack (MVMThreadContext *tc, collation_stack *stack) {
    stack->keys       = MVM_malloc(sizeof(collation_key) * initial_stack_size);
    stack->stack_top  = -1;
    stack->stack_size = initial_stack_size;
}
static void cleanup_stack (MVMThreadContext *tc, collation_stack *stack) {
    if (stack->keys != NULL) {
        MVM_free(stack->keys);
        stack->keys = NULL;
    }
}
static void push_key_to_stack(collation_stack *stack, MVMuint32 primary, MVMuint32 secondary, MVMuint32 tertiary) {\
    stack->stack_top++;
    if (stack->stack_size <= stack->stack_top) {
        stack->keys = MVM_realloc(stack->keys,
            (stack->stack_size + initial_stack_size) * sizeof(collation_stack));
        stack->stack_size += initial_stack_size;
    }
    stack->keys[stack->stack_top].s.primary   = primary;
    stack->keys[stack->stack_top].s.secondary = secondary;
    stack->keys[stack->stack_top].s.tertiary  = tertiary;
}
static MVMint64 collation_push_level_separator (MVMThreadContext *tc, collation_stack *stack, char *name) {
    push_key_to_stack(stack, 0, 0, 0);
    DEBUG_PRINT_STACK(tc, stack, name, "After collation_push_level_separator()");
    return 1;
}
/* Pushes collation keys from a collation_key struct and adds 1 to each level. (This is for places where
 * we store the native DUCET values and we add one because values on the stack are one more) */
static MVMint64 push_onto_stack (MVMThreadContext *tc, collation_stack *stack, collation_key *keys, int keys_to_push, char *name) {
    int j;
    DEBUG_PRINT_STACK(tc, stack, name, "push_onto_stack() Before");
    for (j = 0; j < keys_to_push; j++)
        push_key_to_stack(stack, keys[j].s.primary + 1, keys[j].s.secondary + 1, keys[j].s.tertiary + 1);

    DEBUG_PRINT_STACK(tc, stack, name, "push_onto_stack() After");
    return 1;
}
/* TODO write a script to generate this code */
MVM_STATIC_INLINE MVMint32 compute_AAAA(MVMCodepoint cp, int offset) {
    return (offset + (cp >> 15));
}
MVM_STATIC_INLINE MVMint32 compute_BBBB_offset(MVMCodepoint cp, int offset) {
    return ((cp - offset) | 0x8000);
}
MVM_STATIC_INLINE MVMint32 compute_BBBB_and(MVMCodepoint cp) {
    return ((cp & 0x7FFF) | 0x8000);
}
#define initial_collation_norm_buf_size 5
static MVMint32 NFD_and_push_collation_values (MVMThreadContext *tc, MVMCodepoint cp, collation_stack *stack, MVMCodepointIter *ci, char *name) {
    MVMNormalizer norm;
    MVMCodepoint cp_out;
    MVMint32 ready,
             result_pos  = 0;
    MVMCodepoint *result = MVM_malloc(sizeof(MVMCodepoint) * initial_collation_norm_buf_size);
    MVMint32 result_size = initial_collation_norm_buf_size;
    MVMint64 rtrn        = 0;
    MVM_unicode_normalizer_init(tc, &norm, MVM_NORMALIZE_NFD);
    ready = MVM_unicode_normalizer_process_codepoint(tc, &norm, cp, &cp_out);
    if (ready) {
        if (result_size <= result_pos + ready)
            result = MVM_realloc(result, sizeof(MVMCodepoint) * (result_size += initial_collation_norm_buf_size));
        result[result_pos++] = cp_out;
        while (0 < --ready)
            result[result_pos++] = MVM_unicode_normalizer_get_codepoint(tc, &norm);
    }
    MVM_unicode_normalizer_eof(tc, &norm);
    ready = MVM_unicode_normalizer_available(tc, &norm);
    while (ready--) {
        if (result_size <= result_pos + ready + 1)
            result = MVM_realloc(result, sizeof(MVMCodepoint) * (result_size += initial_collation_norm_buf_size));
        result[result_pos++] = MVM_unicode_normalizer_get_codepoint(tc, &norm);
    }
    /* If the codepoint changed or we now have more than before */
    if (result[0] != cp || 1 < result_pos)
        rtrn = collation_push_cp(tc, stack, ci, result, result_pos, name);
    if (result)
        MVM_free(result);
    return rtrn;
}
/* Returns the number of collation elements pushed onto the stack */
static MVMint32 collation_push_MVM_values (MVMThreadContext *tc, MVMCodepoint cp, collation_stack *stack, MVMCodepointIter *ci, char *name) {
    collation_key MVM_coll_key = {
        MVM_unicode_collation_primary(tc, cp), MVM_unicode_collation_secondary(tc, cp), MVM_unicode_collation_tertiary(tc, cp), 0
    };
    /* For some reason some Tangut Block items return values, so test that too.
     * Eventually we might want to restructure this code here */
    if (is_Block_Tangut(cp) || MVM_coll_key.s.primary <= 0 || MVM_coll_key.s.secondary <= 0 || MVM_coll_key.s.tertiary <= 0) {
        MVMuint32 AAAA, BBBB;
        char *block_pushed = NULL;
        collation_key calculated_key[2] = {
            {0, 0x20, 0x2, 0},
            {0, 0x00, 0x0, 0}
        };
        /* Block=Tangut+Block=Tangut_Components 0x17000..0x18AFF */
        if (is_Block_Tangut(cp)) {
            AAAA = 0xFB00;
            BBBB = compute_BBBB_offset(cp, 0x17000);
            DEBUG_SPECIAL_PUSHED(block_pushed, "Block_Tangut_and_Tangut_Components");
        }
        /* Assigned_Block=Nushu 0x1B170..1B2FF (*/
        else if (is_Assigned_Block_Nushu(cp)) {
            AAAA = 0xFB01;
            BBBB = compute_BBBB_offset(cp, 0x1B170);
            DEBUG_SPECIAL_PUSHED(block_pushed, "Assigned_Block_Nushu");
        }
        /* Unified_Ideograph=True */
        else if (is_unified_ideograph(cp)) {
            if (is_Block_CJK_Unified_Ideographs_OR_CJK_Compatibility_Ideographs(cp)) {
                AAAA = compute_AAAA(cp, 0xFB40);
                BBBB = compute_BBBB_and(cp);
                DEBUG_SPECIAL_PUSHED(block_pushed, "Ideograph_CJK_Compatibility_OR_Unified");
            }
            /* All other Unified_Ideograph's */
            else {
                AAAA = compute_AAAA(cp, 0xFB80);
                BBBB = compute_BBBB_and(cp);
                DEBUG_SPECIAL_PUSHED(block_pushed, "Ideograph_NOT_CJK_Compatibility_OR_Unified");
            }
        }
        else {
            MVMint32 NFD_rtrn = NFD_and_push_collation_values(tc, cp, stack, ci, name);
            if (NFD_rtrn) {
                return NFD_rtrn;
            }
            else {
                AAAA = compute_AAAA(cp, 0xFBC0);
                BBBB = compute_BBBB_and(cp);
                DEBUG_SPECIAL_PUSHED(block_pushed, "Unassigned");
            }
        }
        calculated_key[0].s.primary = AAAA;
        calculated_key[1].s.primary = BBBB;
        DEBUG_PRINT_SPECIAL_PUSHED(block_pushed, name, cp);
        push_onto_stack(tc, stack, calculated_key, 2, name);
        return 2;
    }
    else {
        push_key_to_stack(stack,
            MVM_coll_key.s.primary,
            MVM_coll_key.s.secondary,
            MVM_coll_key.s.tertiary);
        return 1;
    }
}
/* This is passed the terminal node and it adds the collation elements linked from
 * that node to the collation stack
 * Returns:
    1 collation elements from last_node were used
    0 collation elements from the first node were used, or it fell back and used collation_push_MVM_values
 * Essentially the return value lets you know if it ended up pushing collation values for the last codepoint
 * in the sequence or if it only pushed collation values for fallback_cp
*/
MVMint64 collation_add_keys_from_node (MVMThreadContext *tc, sub_node *last_node, collation_stack *stack, MVMCodepointIter *ci, char *name, MVMCodepoint fallback_cp, sub_node *first_node) {
    MVMint64 j;
    MVMint64 rtrn = 0;
    sub_node *choosen_node = NULL;
    /* If there are any collation elements */
    if (last_node && last_node->collation_key_elems) {
        choosen_node = last_node;
        rtrn = 1;
    }
    else if (first_node && first_node->collation_key_elems) {
        choosen_node = first_node;
    }
    if (choosen_node) {
        for (j = choosen_node->collation_key_link;
             j < choosen_node->collation_key_link + choosen_node->collation_key_elems;
             j++) {
            push_key_to_stack(stack, special_collation_keys[j].primary   + 1,
                special_collation_keys[j].secondary + 1,
                special_collation_keys[j].tertiary  + 1
            );
        }
        return rtrn;
    }
    /* Terminal node doesn't have any collation data. Fall back to using collation_push_MVM_values() */
    collation_push_MVM_values(tc, fallback_cp, stack, ci, name);
    return rtrn;
}
MVMint64 find_next_node (MVMThreadContext *tc, sub_node node, MVMCodepoint next_cp) {
    MVMint64 next_min, next_max;
    MVMint64 i;
    /* There is nowhere else to go */
    if (!node.sub_node_elems)
        return -1;
    next_min = next_node_min(node);
    next_max = next_node_max(node);
    /* It's not within bounds */
    if (next_cp < next_min || next_max < next_cp)
        return -1;
    for (i = node.sub_node_link; i < node.sub_node_link + node.sub_node_elems; i++) {
        if (main_nodes[i].codepoint == next_cp)
            return i;
    }
    return -1;
}
MVMint64 get_main_node (MVMThreadContext *tc, int cp, int range_min, int range_max) {
    MVMint64 i;
    MVMint64 rtrn = -1;
    int counter   = 0;
    /* Decrement range_min because binary search defaults to 1..* not 0..* */
    range_min--;
    /* starter_main_nodes_elems are all the nodes which are the origin nodes
     * searches using binary search */
    /* Start range_min at -1 since the lowest node we have to find is at 0, not
     * 1 (needed for binary search to work) */
    for (range_min = -1, range_max = starter_main_nodes_elems; 1 < range_max - range_min;) {
        i = (range_min + range_max) / 2;
        if (cp  <= main_nodes[i].codepoint)
            range_max = i;
        else range_min = i;
    }
    /* Final check is here. If we found it, it will match, otherwise not */
    if (main_nodes[range_max].codepoint == cp)
        rtrn = range_max;
    return rtrn;
}
/* Returns the number of added collation keys */
static MVMint64 collation_push_cp (MVMThreadContext *tc, collation_stack *stack, MVMCodepointIter *ci, int *cp_maybe, int cp_num, char *name) {
    MVMint64 rtrn = 0;
    MVMCodepoint cps[10];
    MVMint64 num_cps_processed = 0;
    int query = -1;
    int cp_num_orig = cp_num;
    /* If supplied -1 that means we need to grab it from the codepoint iterator. Otherwise
     * the value we were passed is the codepoint we should process */
    if (cp_num == 0) {
        cps[0] = MVM_string_ci_get_codepoint(tc, ci);
        cp_num = 1;
    }
    else {
        MVMint32 i;
        for (i = 0; i < cp_num; i++) {
            cps[i] = cp_maybe[i];
        }
    }
    query = get_main_node(tc, cps[0], 0, starter_main_nodes_elems);
    if (query != -1) {
        DEBUG_PRINT_SUB_NODE(main_nodes[query]);
        /* If there are no sub_node_elems that means we don't need to look at
         * the next codepoint, we are already at the correct node
         * If there's no more codepoints in the iterator we also are done here */
        if (main_nodes[query].sub_node_elems < 1 || cp_num < 2 && !MVM_string_ci_has_more(tc, ci)) {
            collation_add_keys_from_node(tc, NULL, stack, ci, name, cps[0],  &main_nodes[query]);
            num_cps_processed++;
        }
        /* Otherwise we need to check the next codepoint(s) (0 < sub_node_elems) */
        else {
            MVMint64 last_good_i      =  0,
                     last_good_result = -1;
            MVMint64 i, result = query;
            DEBUG_PRINT_SUB_NODE(main_nodes[query]);
            for (i = 0; result != -1 && MVM_string_ci_has_more(tc, ci) && i < 10;) {
                i++;
                /* Only grab a codepoint if it doesn't already exist in the array */
                if (cp_num <= i) {
                    cps[i] = MVM_string_ci_get_codepoint(tc, ci);
                    cp_num++;
                }
                result = find_next_node(tc, main_nodes[result], cps[i]);
                /* If we got something other than -1 and it has collation elements
                 * store the value so we know how far is valid */
                if (result != -1 && main_nodes[result].collation_key_elems != 0) {
                    last_good_i      = i;
                    last_good_result = result;
                }
                if (result != -1)
                    DEBUG_PRINT_SUB_NODE(main_nodes[result]);
            }
            /* If there is no last_good_result we should return a value from main_nodes */
            DEBUG_PRINT_SUB_NODE( (last_good_result == -1 ? main_nodes[query] : main_nodes[last_good_result]) );
            /* If the terminal_subnode can't be processed then that means it will push the starter codepoint ( cp[0] )'s value onto
             * the stack, and we must set last_good_i to 0 since it didn't work out */
            if (!collation_add_keys_from_node(tc, (last_good_result == -1 ? NULL : &main_nodes[last_good_result]), stack, ci, name, cps[0], &main_nodes[query])) {
                /* If we get 0 from collation_add_keys_from_node then we only processed
                 * a single codepoint so set last_good_i to 0 */
                last_good_i = 0;
            }
            num_cps_processed = last_good_i + 1;
        }
    }
    else {
        /* Push the first codepoint onto the stack */
        rtrn = collation_push_MVM_values(tc, cps[0], stack, ci, name);
        num_cps_processed = 1;
    }
    /* If there are any more codepoints remaining call collation_push_cp on the remaining */
    if (num_cps_processed < cp_num) {
        return num_cps_processed + collation_push_cp(tc, stack, ci, cps + num_cps_processed, cp_num - num_cps_processed, name);
    }
    return num_cps_processed;
}
MVMint64 grab_from_stack(MVMThreadContext *tc, MVMCodepointIter *ci, collation_stack *stack, char *name) {
    if (!MVM_string_ci_has_more(tc, ci))
        return 0;
    collation_push_cp(tc, stack, ci, NULL, 0, name);
    return 1;
}
static void init_ringbuffer (MVMThreadContext *tc,  ring_buffer *buffer) {
    MVMint64 i;
    buffer->count           =  0;
    buffer->location        = -1;
    buffer->codes_out_count =  0;
}
static void add_to_ring_buffer(MVMThreadContext *tc, ring_buffer *buffer, MVMCodepoint cp) {
    buffer->location++;
    if (codepoint_sequence_no_max <= buffer->location)
        buffer->location = 0;
    buffer->codes[buffer->location] = cp;
    buffer->count++;
}
/* Takes the ring buffer and puts the codepoints in order */
static void ring_buffer_done(MVMThreadContext *tc, ring_buffer *buffer) {
    buffer->codes_out_count = codepoint_sequence_no_max < buffer->count ? codepoint_sequence_no_max : buffer->count;
    /* If the buffer hasn't been wraped yet or the last codepoint was pushed onto the last buffer element,
     * use memcpy to copy it to the out buffer */
    if (buffer->count <= codepoint_sequence_no_max || buffer->location == codepoint_sequence_no_max - 1) {
        memcpy(buffer->codes_out, buffer->codes, sizeof(MVMCodepoint) * buffer->codes_out_count);
    }
    /* Otherwise we need to copy it manually */
    else {
        /* Copy backwards from the last copied to the first copied codepoint in the ring buffer */
        MVMint32 out_location     = buffer->codes_out_count - 1;
        MVMint32 buf_location     = buffer->location;
        for (; 0 <= out_location; out_location--) {
            buffer->codes_out[out_location] = buffer->codes[buf_location];
            buf_location--;
            if (buf_location < 0)
                buf_location = codepoint_sequence_no_max - 1;
        }
    }
}
static MVMint64 collation_return_by_quaternary(MVMThreadContext *tc, level_eval *level_eval_settings,
    MVMStringIndex length_a, MVMStringIndex length_b, MVMint64 compare_by_cp_rtrn) {
    if (compare_by_cp_rtrn) {
        return compare_by_cp_rtrn == -1 ? level_eval_settings->s.quaternary.s2.Less :
               compare_by_cp_rtrn ==  1 ? level_eval_settings->s.quaternary.s2.More :
                                          level_eval_settings->s.quaternary.s2.Same ;
    }
    else {
        return length_a < length_b ? level_eval_settings->s.quaternary.s2.Less :
               length_b < length_a ? level_eval_settings->s.quaternary.s2.More :
                                     level_eval_settings->s.quaternary.s2.Same ;
    }
}
/* MVM_unicode_string_compare implements the Unicode Collation Algorthm */
MVMint64 MVM_unicode_string_compare(MVMThreadContext *tc, MVMString *a, MVMString *b,
         MVMint64 collation_mode, MVMint64 lang_mode, MVMint64 country_mode) {
    MVMStringIndex alen, blen;
    /* Iteration variables */
    MVMCodepointIter a_ci, b_ci;
    MVMGrapheme32 ai, bi;
    /* Set it all to 0 to start with. We alter this based on the collation_mode later on */
    level_eval level_eval_settings = {
        { {0,0,0}, {0,0,0}, {0,0,0}, {0,0,0} }
    };
    /* The default level_eval settings, used between two non-equal levels */
    union level_eval_u2 level_eval_default = {
        {-1, 0, 1}
    };
    /* Collation stacks */
    collation_stack stack_a;
    collation_stack stack_b;

    ring_buffer buf_a, buf_b;
    /* This value stores what the return value would be if the strings were compared
     * by codepoint. This is used to break collation value ties */
    MVMint64 compare_by_cp_rtrn = 0;
    MVMint64 pos_a = 0, pos_b = 0, i = 0, rtrn = 0;
    MVMint16 grab_a_done = 0, grab_b_done = 0;
    /* From 0 to 2 for primary, secondary, tertiary levels */
    MVMint16   level_a = 0,   level_b = 0;
    MVMint64 skipped_a = 0, skipped_b = 0;
    /* This code sets up level_eval_settings based on the collation_mode */
    #define setmodeup(mode, level, Less, Same, More) {\
        if (collation_mode & mode) {\
            level_eval_settings.a[level].a2[0] +=  Less;\
            level_eval_settings.a[level].a2[1] +=  Same;\
            level_eval_settings.a[level].a2[2] +=  More;\
        }\
    }
    /* Primary */
    setmodeup(MVM_COLLATION_PRIMARY_POSITIVE,    0, -1, 0,  1);
    setmodeup(MVM_COLLATION_PRIMARY_NEGATIVE,    0,  1, 0, -1);
    /* Secondary */
    setmodeup(MVM_COLLATION_SECONDARY_POSITIVE,  1, -1, 0,  1);
    setmodeup(MVM_COLLATION_SECONDARY_NEGATIVE,  1,  1, 0, -1);
    /* Tertiary */
    setmodeup(MVM_COLLATION_TERTIARY_POSITIVE,   2, -1, 0,  1);
    setmodeup(MVM_COLLATION_TERTIARY_NEGATIVE,   2,  1, 0, -1);
    /* Quaternary */
    setmodeup(MVM_COLLATION_QUATERNARY_POSITIVE, 3, -1, 0,  1);
    setmodeup(MVM_COLLATION_QUATERNARY_NEGATIVE, 3,  1, 0, -1);
    DEBUG_COLLATION_MODE_PRINT(level_eval_settings);

    init_stack(tc, &stack_a);
    init_stack(tc, &stack_b);
    MVM_string_check_arg(tc, a, "compare");
    MVM_string_check_arg(tc, b, "compare");
    /* Simple cases when one or both are zero length. */
    alen = MVM_string_graphs_nocheck(tc, a);
    blen = MVM_string_graphs_nocheck(tc, b);
    if (alen == 0 || blen == 0)
        return collation_return_by_quaternary(tc, &level_eval_settings, alen, blen, 0);

    /* Initialize a codepoint iterator
     * For now we decompose utf8-c8 synthetics. Eventually we may want to pass
     * them back and choose some way to generate sorting info for them, similar
     * to how Unassigned codepoints are dealt with */
    MVMROOT(tc, a_ci, {
        MVM_string_ci_init(tc, &a_ci, a, 0, 0);
        MVMROOT(tc, b_ci, {
            MVM_string_ci_init(tc, &b_ci, b, 0, 0);
        });
    });
    init_ringbuffer(tc, &buf_a);
    init_ringbuffer(tc, &buf_b);
    /* The ring buffers hold the exact number of codepoints which comprise the longest
     * sequence of codepoints which map to its own collation keys in the Unicode
     * Collation Algorithm. As of Unicode 9.0 this number was 3. The number is
     * generated by the script that generates the C data so we only need to retain
     * that many codepoints. TODO actually generate codepoint_sequence_no_max */
    while (MVM_string_ci_has_more(tc, &a_ci) && MVM_string_ci_has_more(tc, &b_ci)) {
        MVMCodepoint cp_a = MVM_string_ci_get_codepoint(tc, &a_ci);
        MVMCodepoint cp_b = MVM_string_ci_get_codepoint(tc, &b_ci);
        add_to_ring_buffer(tc, &buf_a, cp_a);
        add_to_ring_buffer(tc, &buf_b, cp_b);
        if (cp_a != cp_b) {
            compare_by_cp_rtrn = cp_a < cp_b ? -1 :
                                 cp_b < cp_a ?  1 :
                                                0 ;
            break;
        }
    }
    DEBUG_PRINT_RING_BUFFER(tc, &buf_a);
    DEBUG_PRINT_RING_BUFFER(tc, &buf_b);
    ring_buffer_done(tc, &buf_a);
    ring_buffer_done(tc, &buf_b);
    collation_push_cp(tc, &stack_b, &b_ci, buf_b.codes_out, buf_b.codes_out_count, "b");
    DEBUG_PRINT_STACK(tc, &stack_b, "b", "After Initial grab b");
    collation_push_cp(tc, &stack_a, &a_ci, buf_a.codes_out, buf_a.codes_out_count, "a");
    DEBUG_PRINT_STACK(tc, &stack_a, "a", "After Initial grab a");

    while (rtrn == 0) {
        while (pos_a <= stack_a.stack_top && pos_b <= stack_b.stack_top) {
            /* Collation values are set as 1 (collation_zero) higher than what Unicode designates. So a collation value of 1 is ignored as
             * a 0 in DUCET would be. Whereas a collation value of 0 cannot be skipped and is used only for the tertiary level
             * of a level separator so it is evaluated as the end of the string, causing the shorter string to win */
            if (stack_a.keys[pos_a].a[level_a] == collation_zero) {
                pos_a++;
                skipped_a++;
                continue;
            }
            if (stack_b.keys[pos_b].a[level_b] == collation_zero) {
                pos_b++;
                skipped_b++;
                continue;
            }
            /* If collation values are not equal */
            if (stack_a.keys[pos_a].a[level_a] != stack_b.keys[pos_b].a[level_b]) {
                union level_eval_u2 effective_level_eval = level_eval_default;
                /* Aside from ignored (collation_zero) levels, when all primary values
                 * are greater than any secondary values. All secondary values are greater
                 * than any tertiary values. Because of this, we only need to tailor effective_level_evals if the levels match */
                if (level_a == level_b)
                    effective_level_eval = level_eval_settings.a[level_a];
                rtrn = stack_a.keys[pos_a].a[level_a] < stack_b.keys[pos_b].a[level_b] ?  effective_level_eval.s2.Less :
                       stack_a.keys[pos_a].a[level_a] > stack_b.keys[pos_b].a[level_b] ?  effective_level_eval.s2.More :
                                                                                          effective_level_eval.s2.Same ;
                DEBUG_PRINT_STACK(tc, &stack_a, "a", "Collation values found not equal");
                DEBUG_PRINT_STACK(tc, &stack_b, "b", "Collation values found not equal");
            }
            if (rtrn != 0) {
                cleanup_stack(tc, &stack_a);
                cleanup_stack(tc, &stack_b);
                return rtrn;
            }
            pos_a++;
            pos_b++;
        }
        #define if_grab_done(grab_done, stack, ci, pos, level, name) {\
            /* If we haven't grabbed all the collation elements we should grab them */\
            if (!grab_done) {\
                if (!grab_from_stack(tc, &ci, &stack, name)) {\
                    collation_push_level_separator(tc, &stack, name);\
                    grab_done = 1;\
                }\
            }\
            /* Here we check if we've already grabbed everything. If we have \
             * grabbed we need to move to the next collation level for that \
             * stack only      TODO, right hand side of conditional needed or not? */  \
            if (grab_done && stack.stack_top < pos) {\
                /* Only if we're already not on the highest level */\
                if (level < 2) {\
                    pos = 0;\
                    level++;\
                    DEBUG_PRINT("Setting level_%s to %"PRIi16" and pos_%s to %"PRIi64". %s_keys_pushed: %li\n",\
                                            name,      level,   name, pos,name, stack.stack_top + 1);\
                }\
                else {\
                    /* TODO get the names of the strings can't wrap in the debug */ \
                    DEBUG_PRINT_STACK(tc, &stack_a, "a", "Can't wrap string anymore so breaking");\
                    DEBUG_PRINT_STACK(tc, &stack_b, "b", "Can't wrap string anymore so breaking");\
                    break;\
                }\
            }\
        }
        /* Here we wrap to the next level of collation elements if needed */
        if_grab_done(grab_b_done, stack_b, b_ci, pos_b, level_b, "b");
        if_grab_done(grab_a_done, stack_a, a_ci, pos_a, level_a, "a");
    }
    cleanup_stack(tc, &stack_a);
    cleanup_stack(tc, &stack_b);
    /* If we get here, they tied for all levels including the level separator
     * [0001.0001.0000] The primary and secondary of the level separator we push
     * get ignored, but the tertiary level value 0 is not ignored. No other values
     * have 0, so that means those levels must have matched up.*/

    /* The tie must be broken by codepoint or length. Use the return value we computed at
     * the beginning of the function while we were pushing onto the ring buffers */
    return collation_return_by_quaternary(tc, &level_eval_settings, alen, blen, compare_by_cp_rtrn);
}

/* Looks up a codepoint by name. Lazily constructs a hash. */
MVMGrapheme32 MVM_unicode_lookup_by_name(MVMThreadContext *tc, MVMString *name) {
    MVMuint64 size;
    char *cname = MVM_string_utf8_encode_C_string(tc, name);
    size_t cname_len = strlen((const char *) cname );
    MVMUnicodeNameRegistry *result;
    if (!codepoints_by_name) {
        generate_codepoints_by_name(tc);
    }
    HASH_FIND(hash_handle, codepoints_by_name, cname, cname_len, result);
    if (!result) {
        #define prefixes_len 7
        const char *prefixes[prefixes_len] = {
            "CJK UNIFIED IDEOGRAPH-",
            "CJK COMPATIBILITY IDEOGRAPH-",
            "<CONTROL-",
            "<RESERVED-",
            "<SURROGATE-",
            "<PRIVATE-USE-",
            "TANGUT IDEOGRAPH-"
        };
        int i;
        for (i = 0; i < prefixes_len; i++) {
            int str_len = strlen(prefixes[i]);
            if (cname_len <= str_len)
                continue;
            /* Make sure to catch conditions which strtoll is ok with but we
             * don't want to allow. So don't allow leading space, or -/+ and don't
             * allow 0x */
            if (cname[str_len] == '-' || cname[str_len] == '+' || cname[str_len] == ' ')
                continue;
            if (cname_len >= str_len + 2 && cname[str_len+1] == 'X')
                continue;
            if (!strncmp(cname, prefixes[i], str_len)) {
                char *reject = NULL;
                MVMint64 rtrn = strtol(cname + strlen(prefixes[i]), &reject, 16);
                if (prefixes[i][0] == '<' && *reject == '>' && reject - cname + 1 == cname_len) {
                    MVM_free(cname);
                    return rtrn;
                }
                else if ((*reject == '\0' && !(rtrn == 0 && reject == cname + str_len))) {
                    MVM_free(cname);
                    return rtrn;
                }
            }
        }
    }
    MVM_free(cname);
    return result ? result->codepoint : -1;
}
/* Quickly determines the length of a number 6.5x faster than doing log10 after
 * compiler optimization */
MVM_STATIC_INLINE size_t length_of_num (size_t number) {
    if (number < 10) return 1;
    return 1 + length_of_num(number / 10);
}
MVM_STATIC_INLINE size_t length_of_num_16 (size_t number) {
    if (number < 16) return 1;
    return 1 + length_of_num_16(number / 16);
}
MVMString * MVM_unicode_get_name(MVMThreadContext *tc, MVMint64 codepoint) {
    const char *name = NULL;
    size_t name_len = 0;

    /* Catch out-of-bounds code points. */
    if (codepoint < 0) {
        name = "<illegal>";
    }
    else if (0x10FFFF < codepoint) {
        name = "<unassigned>";
    }
    if (name)
        name_len = strlen(name);
    /* Look up name. */
    else {
        MVMuint32 codepoint_row = MVM_codepoint_to_row_index(tc, codepoint);
        if (codepoint_row != -1) {
            name = codepoint_names[codepoint_row];
            if (!name) {
                while (codepoint_row && !codepoint_names[codepoint_row])
                    codepoint_row--;
                name = codepoint_names[codepoint_row];
                if (name && name[0] != '<')
                    name = NULL;
            }
        }
        if (!name) {
            /* U+FDD0..U+FDEF and the last two codepoints of each block
             * are noncharacters (U+FFFE U+FFFF U+1FFFE U+1FFFF U+2FFFE etc.) */
            if ((0xFDD0 <= codepoint && codepoint <= 0xFDEF) || (0xFFFE & codepoint) == 0xFFFE)
                name = "<noncharacter>";
            else
                name = "<reserved>";
        }
        name_len = strlen(name);
        /* Turn non-unique codepoint names into unique ones by adding the
         * codepoint
         * i.e. <CJK UNIFIED IDEOGRAPH> ā†’ CJK UNIFIED IDEOGRAPH-20000
         *      <control> ā†’ <control-0000> */
        if (name && name[0] == '<') {
            size_t i, new_length, num_len = length_of_num_16(codepoint);
            char *new_name = NULL;
            int remove_brack = !strncmp(name, "<CJK", 4) ||
                !strncmp(name, "<TANGUT", 7) ? 1 : 0;
            /* We pad to 4 width, so make sure the number is accurate */
            num_len = num_len < 4 ? 4 : num_len;
            /* The new_length is 1 more than we need since snprintf adds a null */
            new_length = !remove_brack + name_len + num_len * sizeof(char);
            new_name = alloca(new_length);
            for (i = 0; i < name_len; i++) {
                if (name[i] == '>') {
                    snprintf(new_name + i - remove_brack, new_length - (i - remove_brack) ,
                        "-%.4"PRIX32"", (MVMuint32)codepoint);
                    if (!remove_brack) {
                        new_name[new_length-1] = '>';
                    }
                    /* snprintf adds a null terminator at the end. We don't need
                     * this, so replace with a > instead of using snprintf to add
                     * it. Note: new has no NULL terminator */
                    break;
                }
                new_name[i] = name[i+remove_brack];
            }
            name     = new_name;
            name_len = new_length - remove_brack;
        }
    }

    return MVM_string_ascii_decode(tc, tc->instance->VMString, name, name_len);
}

MVMString * MVM_unicode_codepoint_get_property_str(MVMThreadContext *tc, MVMint64 codepoint, MVMint64 property_code) {
    const char * const str = MVM_unicode_get_property_str(tc, codepoint, property_code);

    if (!str)
        return tc->instance->str_consts.empty;

    return MVM_string_ascii_decode(tc, tc->instance->VMString, str, strlen(str));
}

const char * MVM_unicode_codepoint_get_property_cstr(MVMThreadContext *tc, MVMint64 codepoint, MVMint64 property_code) {
    return MVM_unicode_get_property_str(tc, codepoint, property_code);
}

MVMint64 MVM_unicode_codepoint_get_property_int(MVMThreadContext *tc, MVMint64 codepoint, MVMint64 property_code) {
    if (property_code == 0)
        return 0;
    return (MVMint64)MVM_unicode_get_property_int(tc, codepoint, property_code);
}

MVMint64 MVM_unicode_codepoint_get_property_bool(MVMThreadContext *tc, MVMint64 codepoint, MVMint64 property_code) {
    if (property_code == 0)
        return 0;
    return (MVMint64)MVM_unicode_get_property_int(tc, codepoint, property_code) != 0;
}

MVMint64 MVM_unicode_codepoint_has_property_value(MVMThreadContext *tc, MVMint64 codepoint, MVMint64 property_code, MVMint64 property_value_code) {
    if (property_code == 0)
        return 0;
    return (MVMint64)MVM_unicode_get_property_int(tc,
        codepoint, property_code) == property_value_code ? 1 : 0;
}

/* Looks if there is a case change for the provided codepoint. Since a case
 * change may produce multiple codepoints occasionally, then we return 0 if
 * the case change is a no-op, and otherwise the number of codepoints. The
 * codepoints argument will be set to a pointer to a buffer where those code
 * points can be read from. The caller must not mutate the buffer, nor free
 * it. */
MVMuint32 MVM_unicode_get_case_change(MVMThreadContext *tc, MVMCodepoint codepoint, MVMint32 case_,
                                      const MVMCodepoint **result) {
    if (case_ == MVM_unicode_case_change_type_fold) {
        MVMint32 folding_index = MVM_unicode_get_property_int(tc,
            codepoint, MVM_UNICODE_PROPERTY_CASE_FOLDING);
        if (folding_index) {
            MVMint32 is_simple = MVM_unicode_get_property_int(tc,
                codepoint, MVM_UNICODE_PROPERTY_CASE_FOLDING_SIMPLE);
            if (is_simple) {
                *result = &(CaseFolding_simple_table[folding_index]);
                return 1;
            }
            else {
                MVMint32 i = 3;
                while (0 < i && CaseFolding_grows_table[folding_index][i - 1] == 0)
                    i--;
                *result = &(CaseFolding_grows_table[folding_index][0]);
                return i;
            }
        }
    }
    else {
        MVMint32 special_casing_index = MVM_unicode_get_property_int(tc,
            codepoint, MVM_UNICODE_PROPERTY_SPECIAL_CASING);
        if (special_casing_index) {
            MVMint32 i = 3;
                while (0 < i && SpecialCasing_table[special_casing_index][case_][i - 1] == 0)
                    i--;
                *result = SpecialCasing_table[special_casing_index][case_];
                return i;
        }
        else {
            MVMint32 changes_index = MVM_unicode_get_property_int(tc,
                codepoint, MVM_UNICODE_PROPERTY_CASE_CHANGE_INDEX);
            if (changes_index) {
                const MVMCodepoint *found = &(case_changes[changes_index][case_]);
                if (*found != 0) {
                    *result = found;
                    return 1;
                }
            }
        }
    }
    return 0;
}

/* XXX make all the statics members of the global MVM instance instead? */
static MVMUnicodeNameRegistry *property_codes_by_names_aliases;
static MVMUnicodeGraphemeNameRegistry *property_codes_by_seq_names;

static void generate_property_codes_by_names_aliases(MVMThreadContext *tc) {
    MVMuint32 num_names = num_unicode_property_keypairs;

    while (num_names--) {
        MVMUnicodeNameRegistry *entry = MVM_malloc(sizeof(MVMUnicodeNameRegistry));
        entry->name = (char *)unicode_property_keypairs[num_names].name;
        entry->codepoint = unicode_property_keypairs[num_names].value;
        HASH_ADD_KEYPTR(hash_handle, property_codes_by_names_aliases,
            entry->name, strlen(entry->name), entry);
    }
}
static void generate_property_codes_by_seq_names(MVMThreadContext *tc) {
    MVMuint32 num_names = num_unicode_seq_keypairs;

    while (num_names--) {
        MVMUnicodeGraphemeNameRegistry *entry = MVM_malloc(sizeof(MVMUnicodeGraphemeNameRegistry));
        entry->name = (char *)uni_seq_pairs[num_names].name;
        entry->structindex = uni_seq_pairs[num_names].value;
        HASH_ADD_KEYPTR(hash_handle, property_codes_by_seq_names,
            entry->name, strlen(entry->name), entry);
    }
}

MVMint32 MVM_unicode_name_to_property_code(MVMThreadContext *tc, MVMString *name) {
    MVMuint64 size;
    char *cname = MVM_string_ascii_encode(tc, name, &size, 0);
    MVMUnicodeNameRegistry *result;
    if (!property_codes_by_names_aliases) {
        generate_property_codes_by_names_aliases(tc);
    }
    HASH_FIND(hash_handle, property_codes_by_names_aliases, cname, strlen((const char *)cname), result);
    MVM_free(cname); /* not really codepoint, really just an index */
    return result ? result->codepoint : 0;
}

static void generate_unicode_property_values_hashes(MVMThreadContext *tc) {
    MVMUnicodeNameRegistry **hash_array = MVM_calloc(MVM_NUM_PROPERTY_CODES, sizeof(MVMUnicodeNameRegistry *));
    MVMuint32 index = 0;
    MVMUnicodeNameRegistry *entry = NULL, *binaries = NULL;
    for ( ; index < num_unicode_property_value_keypairs; index++) {
        MVMint32 property_code = unicode_property_value_keypairs[index].value >> 24;
        entry = MVM_malloc(sizeof(MVMUnicodeNameRegistry));
        entry->name = (char *)unicode_property_value_keypairs[index].name;
        entry->codepoint = unicode_property_value_keypairs[index].value & 0xFFFFFF;
        HASH_ADD_KEYPTR(hash_handle, hash_array[property_code],
            entry->name, strlen(entry->name), entry);
    }
    for (index = 0; index < MVM_NUM_PROPERTY_CODES; index++) {
        if (!hash_array[index]) {
            if (!binaries) {
                MVMUnicodeNamedValue yes[8] = { {"T",1}, {"Y",1},
                    {"Yes",1}, {"yes",1}, {"True",1}, {"true",1}, {"t",1}, {"y",1} };
                MVMUnicodeNamedValue no [8] = { {"F",0}, {"N",0},
                    {"No",0}, {"no",0}, {"False",0}, {"false",0}, {"f",0}, {"n",0} };
                MVMuint8 i;
                for (i = 0; i < 8; i++) {
                    entry = MVM_malloc(sizeof(MVMUnicodeNameRegistry));
                    entry->name = (char *)yes[i].name;
                    entry->codepoint = yes[i].value;
                    HASH_ADD_KEYPTR(hash_handle, binaries, yes[i].name, strlen(yes[i].name), entry);
                }
                for (i = 0; i < 8; i++) {
                    entry = MVM_malloc(sizeof(MVMUnicodeNameRegistry));
                    entry->name = (char *)no[i].name;
                    entry->codepoint = no[i].value;
                    HASH_ADD_KEYPTR(hash_handle, binaries, no[i].name, strlen(no[i].name), entry);
                }
            }
            hash_array[index] = binaries;
        }
    }
    unicode_property_values_hashes = hash_array;
}
MVMint32 unicode_cname_to_property_value_code(MVMThreadContext *tc, MVMint64 property_code, const char *cname, MVMuint64 cname_length) {
    char *out_str = NULL;
    MVMUnicodeNameRegistry *result = NULL;
                                   /* number + dash + property_value + NULL */
    MVMuint64 out_str_length = length_of_num(property_code) + 1 + cname_length + 1;
    if (1024 < out_str_length)
        MVM_exception_throw_adhoc(tc, "Property value or name queried is larger than allowed.");

    out_str = alloca(sizeof(char) * out_str_length);
    snprintf(out_str, out_str_length, "%"PRIi64"-%s", property_code, cname);

    HASH_FIND(hash_handle, unicode_property_values_hashes[property_code], out_str, out_str_length - 1, result);
    return result ? result->codepoint : 0;
}
MVMint32 MVM_unicode_name_to_property_value_code(MVMThreadContext *tc, MVMint64 property_code, MVMString *name) {
    if (property_code <= 0 || MVM_NUM_PROPERTY_CODES <= property_code)
        return 0;
    else {
        MVMuint64 cname_length;
        char *cname = MVM_string_ascii_encode(tc, name, &cname_length, 0);
        MVMint32 code = unicode_cname_to_property_value_code(tc, property_code, cname, cname_length);
        MVM_free(cname);
        return code;
    }
}
MVMint32 MVM_unicode_cname_to_property_value_code(MVMThreadContext *tc, MVMint64 property_code, const char *cname, size_t cname_length) {
    if (property_code <= 0 || MVM_NUM_PROPERTY_CODES <= property_code)
        return 0;
    else
        return unicode_cname_to_property_value_code(tc, property_code, cname, cname_length);
}

/* Look up the primary composite for a pair of codepoints, if it exists.
 * Returns 0 if not. */
MVMCodepoint MVM_unicode_find_primary_composite(MVMThreadContext *tc, MVMCodepoint l, MVMCodepoint c) {
    MVMint32 lower = l & 0xFF;
    MVMint32 upper = (l >> 8) & 0xFF;
    MVMint32 plane = (l >> 16) & 0xF;
    const MVMint32 *pcs  = comp_p[plane][upper][lower];
    if (pcs) {
        MVMint32 entries = pcs[0];
        MVMint32 i;
        for (i = 1; i < entries; i += 2)
            if (pcs[i] == c)
                return pcs[i + 1];
    }
    return 0;
}

static uv_mutex_t property_hash_count_mutex;
static int property_hash_count = 0;
static uv_once_t property_hash_count_guard = UV_ONCE_INIT;

static void setup_property_mutex(void)
{
    uv_mutex_init(&property_hash_count_mutex);
}

void MVM_unicode_init(MVMThreadContext *tc)
{
    uv_once(&property_hash_count_guard, setup_property_mutex);

    uv_mutex_lock(&property_hash_count_mutex);
    if (property_hash_count == 0) {
        generate_unicode_property_values_hashes(tc);
    }
    property_hash_count++;
    uv_mutex_unlock(&property_hash_count_mutex);
}

void MVM_unicode_release(MVMThreadContext *tc)
{
    uv_mutex_lock(&property_hash_count_mutex);
    property_hash_count--;
    if (property_hash_count == 0) {
        int i;

        for (i = 0; i < MVM_NUM_PROPERTY_CODES; i++) {
            MVMUnicodeNameRegistry *entry = NULL;
            MVMUnicodeNameRegistry *tmp   = NULL;
            unsigned bucket_tmp;
            int j;

            if (!unicode_property_values_hashes[i]) {
                continue;
            }

            for(j = i + 1; j < MVM_NUM_PROPERTY_CODES; j++) {
                if (unicode_property_values_hashes[i] == unicode_property_values_hashes[j]) {
                    unicode_property_values_hashes[j] = NULL;
                }
            }

            HASH_ITER(hash_handle, unicode_property_values_hashes[i], entry, tmp, bucket_tmp) {
                HASH_DELETE(hash_handle, unicode_property_values_hashes[i], entry);
                MVM_free(entry);
            }
            assert(!unicode_property_values_hashes[i]);
        }

        MVM_free(unicode_property_values_hashes);

        unicode_property_values_hashes = NULL;
    }
    uv_mutex_unlock(&property_hash_count_mutex);
}
/* Looks up a codepoint sequence or codepoint by name (case insensitive).
 First tries to look it up by codepoint with MVM_unicode_lookup_by_name and if
 not found as a named codepoint, lazily constructs a hash of the codepoint
 sequences and looks up the sequence name */
MVMString * MVM_unicode_string_from_name(MVMThreadContext *tc, MVMString *name) {
    MVMString * name_uc = MVM_string_uc(tc, name);
    char * cname = NULL;
    MVMUnicodeGraphemeNameRegistry *result;

    MVMGrapheme32 result_graph = MVM_unicode_lookup_by_name(tc, name_uc);
    /* If it's just a codepoint, return that */
    if (0 <= result_graph) {
        return MVM_string_chr(tc, result_graph);
    }
    /* Otherwise look up the sequence */
    else {
        const MVMint32 *uni_seq = NULL;
        cname = MVM_string_utf8_encode_C_string(tc, name_uc);
        if (!property_codes_by_seq_names) {
            generate_property_codes_by_seq_names(tc);
        }
        HASH_FIND(hash_handle, property_codes_by_seq_names, cname, strlen((const char *)cname), result);
        MVM_free(cname);
        /* If we can't find a result return an empty string */
        if (!result)
            return tc->instance->str_consts.empty;

        uni_seq = uni_seq_enum[result->structindex];
        /* The first element is the number of codepoints in the sequence */
        return MVM_unicode_codepoints_c_array_to_nfg_string(tc, (MVMCodepoint *) uni_seq + 1, uni_seq[0]);
    }

}