/*
 * Copyright (C) 2018-2020 Western Digital Corporation or its affiliates
 * Copyright (C) 2017-2018 Wearable, Inc.
 * Copyright (C) 2000-2012 leJOS Contributors
 * Copyright (C) 2000 Jose H. Solorzano and TinyVM Contributors
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at https://mozilla.org/MPL/2.0/.
 * 
 * This Source Code Form is “Incompatible With Secondary Licenses”,
 * as defined by the Mozilla Public License, v. 2.0.
 */

/**
 * Memory operations.
 * This file encapsulates many of the low level memory operations required
 * by the VM. In particular it include the allocator and garbage collector
 * along with functions to allow access to non aligned storage.
 *
 * The allocator uses two heap areas one allocates fixed location objects
 * which are refernced directly by other parts of the VM.
 * The other heap contains objects that can be moved, these objects are
 * accessed via a pointer stored in a fixed location object.
 * Both heaps are collected using an incremental
 * concurrent collector. Failed allocation requests will result in the
 * current thread being suspended while the gc runs, after which the
 * request can be retried.
 */





#include <inttypes.h>
#define JINT_FMT PRId32

#include "types.h"
#include "trace.h"
#include "constants.h"
#include "specialsignatures.h"
#include "specialclasses.h"
#include "memory.h"
#include "threads.h"
#include "classes.h"
#include "language.h"
#include "configure.h"
#include "interpreter.h"
#include "exceptions.h"
#include <stdlib.h>
#include "stack.h"

#include <string.h>

// Heap memory needs to be aligned to 4 bytes on ARM
#define WORD_SIZE 4
#define WORD_SHIFT 2
#define TO_WORDS(b) (((b) + (WORD_SIZE-1)) >> WORD_SHIFT)
#define MEM_ROUND(w) (((w) + (WORD_SIZE-1)) & ~(WORD_SIZE-1))
#define MEM_TRUNC(w) ((w) & ~(WORD_SIZE -1))

// Sizes and growth when using a split heap.
#define INITIAL_HEAP 2048
#define MIN_HEAP 4096
#define MIN_HEAP_ADJUST 256

// Basic types and sizes
static const byte typeSize[] = {
    4, // 0 == T_REFERENCE
    0, // 1
    0, // 2 == T_CLASS
    0, // 3
    1, // 4 == T_BOOLEAN
    2, // 5 == T_CHAR
    4, // 6 == T_FLOAT
    8, // 7 == T_DOUBLE
    1, // 8 == T_BYTE
    2, // 9 == T_SHORT
    4, // 10 == T_INT
    8, // 11 == T_LONG
    0, // 12 == T_VOID
    0, // 13
    4, // 14 Used for multidimensional arrays
};

// Header for array heap objects
typedef struct
{
    uint16_t object:16;
    uint16_t thread:15;
    uint16_t available:1;
}  AHeapObj;

// Object sizes in words
#define AH_OBJ_SIZE TO_WORDS(sizeof(AHeapObj))
#define NORM_OBJ_SIZE TO_WORDS(HEADER_SIZE)
#define BA_OBJ_SIZE TO_WORDS(sizeof(BigArray))

// Mark queue
#define MAX_CALL_DEPTH 4
#define markQNext(cur) (((cur) + 1) % MARK_QUEUE_SIZE)

// Macro used to indicate when to suspend this mark/sweep step
#define GC_TIMEOUT(state, check_timeout) ((check_timeout) && SCHEDULER_REQUESTED(state))

// If you're seeing possible memory corruption, CHANGE THIS FIRST. If making the gc non-preemptable fixes your problem, suspect gc problems.
//#define GC_TIMEOUT(state, check) 0

#define GC_NORMAL 0
#define GC_RETRY 1
#define GC_EXCLUSIVE 2
#define GC_ERROR 3
#define GC_ABORT 4

/**
 * Forward declarations.
 */
void mark_object(nvm_state_t* pState, Object *obj, int callLimit, boolean check_timeout);
Object *try_allocate(nvm_state_t* pState, uint32_t size);
Object *obj_heap_allocate (nvm_state_t* pState, JINT size, nvm_class_index_t class_index, _Bool is_array, byte length);
AHeapObj *array_heap_allocate(nvm_state_t* pState, JINT size, Object *obj);
void array_heap_free(nvm_state_t* pState, AHeapObj *ah);
void compact_array_heap(nvm_state_t* pState);

#define array_data_size_words(state, CLS_, LEN_) TO_WORDS(get_class_record(state, CLS_)->classSize*(LEN_))
//#define array_size(state, OBJ_) (nvm_is_std_array(OBJ_) ? NORM_OBJ_SIZE + array_data_size_words(state, nvm_class_index(OBJ_), ((Object *)(OBJ_))->flags.length) : (nvm_is_big_array(OBJ_) ? BA_OBJ_SIZE + array_data_size_words(state, nvm_class_index(OBJ_), ((BigArray *)(OBJ_))->length) : BA_OBJ_SIZE ))
#define obj_size_words(state, OBJ_) TO_WORDS(get_class_record(state, nvm_object_class_index(OBJ_))->classSize)
#define get_size_words(state, OBJ_) (nvm_object_is_array(OBJ_) ? nvm_array_size_memory(state, OBJ_) : obj_size_words(state, OBJ_))
#define array_element_size(state, OBJ_) (get_class_record(state, nvm_object_class_index(OBJ_))->classSize)
#define get_free_size(OBJ_) (nvm_object_is_array_std((Object *)(OBJ_)) ? NORM_OBJ_SIZE : BA_OBJ_SIZE + ((BigArray *)(OBJ_))->length)

static inline int nvm_array_size_memory(nvm_state_t* state, Object* object)
{
    if (nvm_object_is_array_std(object)) {
        return NORM_OBJ_SIZE + array_data_size_words(state, nvm_object_class_index(object), nvm_array_size(object));
    }
    if (nvm_object_is_array_big(object)) {
        return BA_OBJ_SIZE   + array_data_size_words(state, nvm_object_class_index(object), nvm_array_size(object));
    }
    return BA_OBJ_SIZE;
}

#if NVM_DEBUG_MEMORY > 1
void
heap_utilization(nvm_state_t *state)
{
    MasterRecord *mrec = get_master_record(state);
    uint32_t *cursor = (uint32_t *)state->gc_obj_heap_start;
    int last_class = mrec->lastClass;
    unsigned char *occurrences = calloc(last_class + 1, sizeof(unsigned char));
    size_t *sizes = calloc(last_class + 1, sizeof(size_t));

    while (cursor < (uint32_t *)state->gc_obj_heap_end) {
        Object *obj = (Object *)cursor;
        int size = get_size_words(state, obj);
        nvm_class_index_t class_index = nvm_object_class_index(obj);

        sizes[class_index] += size;
        if (occurrences[class_index] < 255) {
            occurrences[class_index]++;
        }
        printf("heap_utilization: %p %08x class = %d, size = %d words\n", obj, (unsigned int)(*cursor),
                class_index, size);
        cursor += size;
    }
    int total_words = 0;
    for (int i = 0; i <= last_class; i++) {
	    total_words += sizes[i];
    }
    for (int i = 0; i <= last_class; i++) {
        int count = occurrences[i];
        if (count) {
            printf("heap_utilization: class %d, occurrences: ", i);
            if (count < 255) {
            printf("%d ", count);
            } else {
            printf("255 or more ");
            }
            printf("total words: %u (%u%%)\n", sizes[i],
                100 * sizes[i] / total_words);
        }
    }
}
#endif

#if NVM_DEBUG_MEMORY > 0
void describe_heap(nvm_state_t *state)
{
    printf("describe_heap: start %p, end %p, size %d\n", state->gc_heap_start,
            state->gc_heap_end,
            (char *)state->gc_heap_end - (char *)state->gc_heap_start);

    printf("describe_heap: array heap start: %p end: %p free: %d required: %d\n", state->gc_array_heap_start, state->gc_array_heap_end,
           ((int)state->gc_array_heap_memory_free) << WORD_SHIFT,
           ((int)state->gc_array_heap_memory_required) << WORD_SHIFT);
    printf("describe_heap: object heap start: %p end: %p free: %d largest free: %d required: %d\n", state->gc_obj_heap_start, state->gc_obj_heap_end,
           ((int)state->gc_obj_heap_memory_free) << WORD_SHIFT,
           ((int)state->gc_obj_heap_largest_free) << WORD_SHIFT,
           ((int)state->gc_obj_heap_memory_required) << WORD_SHIFT);
#if NVM_DEBUG_MEMORY > 1
    heap_utilization(state);
#endif
}
#endif

/**
 * The following functions manage the allocation request and retry mechanism.
 * Some requests require multiple objects to be allocated, so we implement
 * the notion of a transaction that is bounded by calls to alloc_start and
 * alloc_complete. If a request fails we schedule a garbage collection and
 * arrange to retry the request. If a sunsequent retry fails, we try again
 * but this time stop all other threads from making memory requests. If this
 * retry fails we throw an out of memory exception.
 */

/**
 * Start a memory transaction.
 */
boolean alloc_start(nvm_state_t* pState)
{
    // Does this thread own the lock? If it does then it is a retry
    pState->gc_retry_state = GC_NORMAL;
    if (pState->gc_lock.sync.threadId == pState->current_thread->threadId) {
        // we must release the monitor
        exit_monitor(pState, pState->current_thread, &(pState->gc_lock));
        // If the gc is still running we may need to try again
        if (pState->gc_phase == GC_IDLE) {
            pState->gc_retry_state = GC_RETRY;
        }
    }
    // Are we in exclusive mode?
    if (pState->gc_thread) {
        if (pState->gc_thread != pState->current_thread) {
            // Not the exclusive thread, wait and retry
            pState->program_counter = getPc(pState);
            sleep_thread(pState, 1, 0);
            return false;
        }
        if (pState->gc_retry_state == GC_RETRY) {
            pState->gc_retry_state = GC_EXCLUSIVE;
        }
    }
    return true;
}

/**
 * Complete a memory transaction.
 * The ret parameter is NULL if the transaction failed.
 */
Object *alloc_complete(nvm_state_t* pState, Object *ret)
{
    // If the request has worked, clean up and we are done
    if (ret != NULL) {
        pState->gc_thread = NULL;
#if NVM_DEBUG_MEMORY > 2
        printf("alloc_complete: state is %d\n", pState->gc_retry_state);
#endif
        return ret;
    }

#if NVM_DEBUG_MEMORY > 2
    printf("alloc_complete: failed to allocate, moving to state %d\n", pState->gc_retry_state + 1);
#endif
    
    // Failed to allocate the memory, move on to the next state.
    switch(pState->gc_retry_state + 1) {
        case GC_ERROR:
#if NVM_DEBUG_MEMORY > 0
	    describe_heap(pState);
#endif
            nvm_throw_new_exception(pState, JAVA_LANG_OUTOFMEMORYERROR);
        case GC_ABORT:
            pState->gc_thread = NULL;
            break;
        case GC_EXCLUSIVE:
            pState->gc_thread = pState->current_thread;
        case GC_RETRY:
            // retry the current instruction
            pState->program_counter = getPc(pState);
            system_wait(pState, &(pState->gc_lock));
            if (pState->gc_phase == GC_IDLE) {
                pState->gc_phase = GC_MARKROOTS;
            }
            break;
    }
    return NULL;
}

/**
 * Notify any threads that are waiting to retry allocations that they can so so
 */
void alloc_notify(nvm_state_t* pState)
{
    system_notify(pState, &(pState->gc_lock), true);
}

/**
 * Abort a memory transaction and throw an excpetion
 */
void alloc_abort(nvm_state_t* pState, nvm_class_index_t exception)
{
    nvm_throw_new_exception(pState, exception);
    pState->gc_retry_state = GC_ERROR;
}

/**
 * Checks if the class needs to be initialized.
 * If so, the static initializer is dispatched.
 * Otherwise, an instance of the class is allocated.
 *
 * @@param btAddr Back-track PC address, in case
 *               a static initializer needs to be invoked.
 * @@return Object reference or <code>null</code> iff
 *         NullPointerException had to be thrown or
 *         static initializer had to be invoked.
 */
Object *new_object_checked (nvm_state_t* pState, const nvm_class_index_t class_index, const byte *btAddr)
{
    if (!is_initialized_idx (pState, class_index)) {
        if (dispatch_static_initializer (pState, get_class_record(pState, class_index), btAddr) != EXEC_CONTINUE) {
#if NVM_DEBUG_MEMORY > 2
            printf("new_object_checked: returning null\n");
#endif
            return NULL;
        }
    }
    return new_object_for_class (pState, class_index);
}

/**
 * Allocates and initializes the state of
 * an object. It does not dispatch static
 * initializers.
 */
Object *new_object_for_class (nvm_state_t* pState, const nvm_class_index_t class_index)
{
    Object *ref;
#if NVM_DEBUG_MEMORY > 2
    printf("new_object_for_class: class %d of size %" PRIu32 " words\n", class_index, TO_WORDS(get_class_record(pState, class_index)->classSize));
#endif
    if (!alloc_start(pState)) {
        return NULL;
    }
    ref = obj_heap_allocate(pState, TO_WORDS(get_class_record(pState, class_index)->classSize), class_index, 0, 0);
    return alloc_complete(pState, ref);
}

/**
 * Allocates an array.
 * To allow compact representation of small arrays but also allow the
 * use of larger objects we support two classes of array. The standard array
 * has a normal object header. Arrays larger then LEN_BIGARRAY have the
 * std length field set to LEN_BIARRAY and then have an extra length field which
 * is placed immediately at the end of the normal header. This length
 * field contains the real array size.
 */
Object *new_array (nvm_state_t* pState, const nvm_class_index_t class_index, JINT length, boolean force_obj_heap)
{
    Object *ref;
    AHeapObj *ah;
    JINT allocSize;
    
    if (length < 0) {
        alloc_abort(pState, JAVA_LANG_NEGATIVEARRAYSIZEEXCEPTION);
        return NULL;
    }
    allocSize = array_data_size_words(pState, class_index, length);
#if NVM_DEBUG_MEMORY > 2
    printf("new_array: cls %d, length %" JINT_FMT ", allocSize %" PRIu32 "\n", class_index, length, allocSize);
    printf("new_array: size of cls %d is %" PRIu32 "\n", class_index, get_class_record(pState, class_index)->classSize);
#endif
    // Put most arrays on the moveable heap, unless it's more memory efficient to just allocate it inline
    if (!force_obj_heap && (NORM_OBJ_SIZE + allocSize) > BA_OBJ_SIZE + AH_OBJ_SIZE) {
        ref = obj_heap_allocate(pState, BA_OBJ_SIZE, class_index, 1, LEN_BIGARRAY);
        if (ref == NULL) {
            return NULL;
        }
        // We have the header, try and allocate the data from the moveable heap
        ah = array_heap_allocate(pState, allocSize + AH_OBJ_SIZE, ref);
        if (ah != NULL) {
            nvm_object_array_size_set(ref, LEN_AHARRAY);
            ((BigArray *)ref)->length = length;
            ((BigArray *)ref)->offset = (short)((uint32_t*)ah - (uint32_t*)ref + AH_OBJ_SIZE);
            return ref;
        }
        // Either we retry or retry in exclusive mode
        pState->gc_array_heap_memory_required += allocSize + AH_OBJ_SIZE;
        return NULL;
    } else {
        ref = obj_heap_allocate (pState, NORM_OBJ_SIZE + allocSize, class_index, 1, length);
        return ref;
    }
}

/**
 * Create a single dimension array. The class of the array is given by cls.
 * NOTE: cls is the array class NOT the class of the elements.
 */
Object *new_single_array (nvm_state_t* pState, const nvm_class_index_t class_index, JINT length, boolean force_obj_heap)
{
    if (!alloc_start(pState)) {
        return NULL;
    }
    return alloc_complete(pState, new_array(pState, class_index, length, force_obj_heap));
}

/**
 * @@param class_index The class index of the array
 * @@param reqDimensions Number of requested dimensions for allocation.
 * @@param numElemPtr Pointer to first dimension. Next dimension at numElemPtr+1.
 */
Object *alloc_multi_array (nvm_state_t* pState, const nvm_class_index_t class_index, byte reqDimensions, STACKWORD *numElemPtr)
{
    Object *ref;
    Object *ref2;
    ClassRecord *classRec = get_class_record(pState, class_index);
    JINT ne;
    nvm_class_index_t elemCls;
    
    if (reqDimensions == 0) {
        return NULL;
    }
    
    ref = new_array (pState, class_index, (JINT)*numElemPtr, false);
    if (ref == NULL) {
        return NULL;
    }
    // If this is a partial array we are done...
    if (reqDimensions > 1) {
        // Make sure we protect each level from the gc. Once we have returned
        // the ref it will be protected by the level above.
        ne = (JINT)*numElemPtr;
        elemCls = get_element_class(classRec);
        while (ne--) {
            ref2 = alloc_multi_array (pState, elemCls, reqDimensions - 1, numElemPtr + 1);
            if (ref2 == NULL) {
                return NULL;
            }
            ref_array(ref)[ne] = ptr2ref(ref2);
        }
    }
    return ref;
}

Object *new_multi_array (nvm_state_t* pState, const nvm_class_index_t class_index, byte reqDimensions, STACKWORD *numElemPtr)
{
    ClassRecord *classRec = get_class_record(pState, class_index);
    byte totalDimensions = get_dim(classRec);
    
#if VERIFY
    assert (totalDimensions >= 1, MEMORY6);
    assert (reqDimensions <= totalDimensions, MEMORY8);
#endif
    if (reqDimensions == 0) {
        return NULL;
    }
    
    if (totalDimensions >= MAX_VM_REFS) {
        nvm_throw_new_exception(pState, JAVA_LANG_OUTOFMEMORYERROR);
        return NULL;
    }
    
    if (totalDimensions == 1) {
        return new_single_array (pState, class_index, (JINT)*numElemPtr, false);
    }
    
    
    if (!alloc_start(pState)) {
        return NULL;
    }
    return alloc_complete(pState, alloc_multi_array(pState, class_index, reqDimensions, numElemPtr));
}

/**
 * Native array copy method,
 * Copy the (partial) contents of one array to another
 * Placed here tp allow access to element size information.
 */
int arraycopy(nvm_state_t* pState, Object *src, int srcOff, Object *dst, int dstOff, int len)
{
    int elemSize;
    nvm_class_index_t srcCls, dstCls;
    boolean primitive;
    // validate things
    if (src == NULL || dst == NULL) {
        return nvm_throw_new_exception(pState, JAVA_LANG_NULLPOINTEREXCEPTION);
    }
    if (!nvm_object_is_array(src) || !nvm_object_is_array(dst)) {
        return nvm_throw_new_exception(pState, JAVA_LANG_ARRAYSTOREEXCEPTION);
    }
    // Check type compatibility...
    srcCls = get_element_class(get_class_record(pState, nvm_object_class_index(src)));
    dstCls = get_element_class(get_class_record(pState, nvm_object_class_index(dst)));
    primitive = is_primitive(srcCls) || is_primitive(dstCls);
    if (primitive && srcCls != dstCls) {
        return nvm_throw_new_exception(pState, JAVA_LANG_ARRAYSTOREEXCEPTION);
    }
    if (len < 0 ||
        srcOff < 0 || (srcOff + len > nvm_array_size(src)) ||
        dstOff < 0 || (dstOff + len > nvm_array_size(dst))) {
        return nvm_throw_new_exception(pState, JAVA_LANG_ARRAYINDEXOUTOFBOUNDSEXCEPTION);
    }
    
    // write barrier
    if (!primitive) {
        update_array(pState, dst);
    }
    if (primitive || !type_checks_enabled(pState) || is_assignable(pState, srcCls, dstCls)) {
        // copy things the fast way
        elemSize = array_element_size(pState, src);
        memmove(get_array_element_ptr(dst, elemSize, dstOff), get_array_element_ptr(src, elemSize, srcOff), len*elemSize);
    } else {
        // We have to check every element...
        int i;
        REFERENCE *srcPtr = ref_array(src) + srcOff;
        REFERENCE *dstPtr = ref_array(dst) + dstOff;
        for(i = 0; i < len; i++) {
            if (ref_null_p(*srcPtr) || is_assignable(pState, nvm_object_class_index(ref2obj(*srcPtr)), dstCls)) {
                *dstPtr++ = *srcPtr++;
            } else {
                return nvm_throw_new_exception(pState, JAVA_LANG_ARRAYSTOREEXCEPTION);
            }
        }
    }
    return EXEC_CONTINUE;
}


/**
 * @return A String instance, or null if an exception was thrown
 *         or the static initializer of String had to be executed.
 */
Object *new_string (nvm_state_t* pState, ConstantRecord *constantRecord)
{
    String *ref;
    Object *arr;
    JCHAR *dst;
    byte *src;
    byte *src_end;

#if NVM_DEBUG_MEMORY > 2
    printf("new_string: start\n");
#endif
    
    if (!alloc_start(pState)) {
        return NULL;
    }
    ref = (String*) obj_heap_allocate(pState, TO_WORDS(get_class_record(pState, JAVA_LANG_STRING)->classSize), JAVA_LANG_STRING, 0, 0);
    if (ref != NULL) {
        // what sort of array do we have?
        if (constantRecord->constantType == AC) {
            ref->characters = ptr2ref(get_constant_ptr(pState, constantRecord));
        } else {
            arr = new_array (pState, AC, constantRecord->stringLength, false);
            if (arr != NULL) {
                ref->characters = obj2ref(arr);
                src = get_constant_ptr(pState, constantRecord);
                dst = jchar_array(arr);
                src_end = src + constantRecord->stringLength;
                
                while(src < src_end) {
                    *dst++ = (JCHAR) (*src++);
                }
            } else {
                ref = NULL;
                /* increase memory pressure by the size of a string, so that the allocator only retries if both allocations can succeed */
                pState->gc_obj_heap_memory_required += TO_WORDS(get_class_record(pState, JAVA_LANG_STRING)->classSize);
            }
        }
    } else {
        /* call new_array to increase heap memory required, even though it will likely fail */
        if (constantRecord->constantType != AC) {
            new_array (pState, AC, constantRecord->stringLength, false);
        }
    }
    return alloc_complete(pState, (Object*) ref);
}

/**
 * @return A String instance, or null if there was a problem.
 */
Object *new_string_from_cstr(nvm_state_t* pState, const char *cstr)
{
    String *ref;
    Object *arr;
    JCHAR *dst;
    if (!alloc_start(pState)) {
        return NULL;
    }
    ref = (String*) obj_heap_allocate(pState, TO_WORDS(get_class_record(pState, JAVA_LANG_STRING)->classSize), JAVA_LANG_STRING, 0, 0);
    if (ref != NULL) {
        int len = strlen(cstr);
        arr = new_array(pState, AC, len, false);
        if (arr != NULL) {
            ref->characters = obj2ref(arr);
            dst = jchar_array(arr);
            for (int i=0; i<len; i++) {
                dst[i] = cstr[i];
            }
        }
    }
    return alloc_complete(pState, (Object*) ref);
}


/**
 * Initialize the run time stacks for the given thread.
 */
int init_stacks(nvm_state_t* pState, Thread *thread)
{
    Object *callStack;
    Object *valueStack;
    if (!alloc_start(pState)) {
        return -1;
    }
    // Allocate the call stack
    callStack = new_array(pState, AB, INITIAL_STACK_FRAMES*sizeof(StackFrame), false);
    if (callStack) {
        // And the value stack
        valueStack = new_array(pState, AI, INITIAL_STACK_SIZE, false);
        if (valueStack) {
            thread->stackFrameArray = ptr2ref(callStack);
            thread->stackArray = ptr2ref(valueStack);
            if (nvm_object_is_array_ah(valueStack)) {
                AHeapObj *ah = (AHeapObj *)(array_start(valueStack) - sizeof(AHeapObj));
                ah->thread = (uint32_t *)thread - pState->gc_heap_start;
            }
            alloc_complete(pState, callStack);
            return 0;
        }
    }
    // allocation failed
    alloc_complete(pState, NULL);
    return -1;
}

/**
 * If the stack is stored in a moveable object we need to keep a link back to
 * the thread so that we can update the frame values if the value stack moves.
 */
void set_thread(nvm_state_t* pState, Object *stack, Thread *thread)
{
    if (nvm_object_is_array_ah(stack)) {
        AHeapObj *ah = (AHeapObj *)(array_start(stack) - sizeof(AHeapObj));
        ah->thread = (!thread ? 0 : (uint32_t *)thread - pState->gc_heap_start);
    }
}

/**
 * Release the stacks asociated with a thread.
 */
void free_stacks(nvm_state_t* pState, Thread *thread)
{
    // We need to break the association between the thread object and the
    // value stack array storage to remove any dangling pointer.
    Object *valueStack = ref2ptr(thread->stackArray);
    set_thread(pState, valueStack, NULL);
}

/**
 * Called when a stack frame has been moved in memory. This updates pointers
 * in the stacks to the new locations.
 */
void update_stack_frames(nvm_state_t* pState, Thread *thread, ptrdiff_t offset)
{
    // The value stack has moved. We need to update the pointers in the
    // stack frames.
    StackFrame *sf = (StackFrame *)array_start(ref2obj(thread->stackFrameArray));
    int cnt = thread->stackFrameIndex;
    while(cnt-- >= 0) {
        sf->localsBase = ptr2ref(((STACKWORD*) ref2ptr(sf->localsBase)) + (int32_t) offset);
        sf->stackTop = ptr2ref(((STACKWORD*) ref2ptr(sf->stackTop)) + (int32_t) offset);
        sf++;
    }
    if (thread == pState->current_thread) {
        pState->program_stack_top += (int32_t) offset;
        pState->program_locals_base += (int32_t) offset;
    }
}

/**
 * Grow the call stack.
 */
int expand_call_stack(nvm_state_t* pState, Thread *thread)
{
    Object *cur = ref2ptr(thread->stackFrameArray);
    size_t curSize = nvm_array_size(cur);
    size_t newSize = curSize + INITIAL_STACK_FRAMES*sizeof(StackFrame);
    if (newSize > 256*sizeof(StackFrame)) newSize = 256*sizeof(StackFrame);
    Object *newStack = new_single_array(pState, AB, newSize, false);
    if (newStack == NULL) {
        return -1;
    }
    memcpy(array_start(newStack), array_start(cur), curSize);
    thread->stackFrameArray = ptr2ref(newStack);
    return 0;
}

/**
 * Grow the value stack
 */
int expand_value_stack(nvm_state_t* pState, Thread *thread, int minSize)
{
    Object *cur = ref2ptr(thread->stackArray);
    int curSize = nvm_array_size(cur);
    int newSize = curSize + (minSize > INITIAL_STACK_SIZE ? minSize : INITIAL_STACK_SIZE);
    Object *newStack = new_single_array(pState, AI, newSize, false);
    if (newStack == NULL) {
        return -1;
    }
    memcpy(array_start(newStack), array_start(cur), curSize*sizeof(STACKWORD));
    update_stack_frames(pState, thread, (STACKWORD *)array_start(newStack) - (STACKWORD *)array_start(cur));
    // If the value stack is using a moveable array, we need to allow for updates
    set_thread(pState, newStack, thread);
    // We need to remove the reference back to this thread from the old stack.
    set_thread(pState, cur, NULL);
    thread->stackArray = ptr2ref(newStack);
    return 0;
}

/**
 * Create a copy of an existing Object (including arrays). Copy is shallow...
 */
Object *clone(nvm_state_t* pState, Object *old)
{
    Object *new;
    if (nvm_object_is_array(old)) {
        new = new_single_array(pState, nvm_object_class_index(old), nvm_array_size(old), false);
        if (!new) {
            return NULL;
        }
        // Copy old array to new
        memcpy(array_start(new), array_start(old), nvm_array_size(old) * array_element_size(pState, old));
    } else {
        nvm_class_index_t class_index = nvm_object_class_index(old);
        ClassRecord* classRecord = get_class_record(pState, class_index);
        new = new_object_for_class(pState, class_index);
        if (!new) {
            return NULL;
        }
        memcpy((byte *) new + sizeof(Object), (byte *)old + sizeof(Object), classRecord->classSize - sizeof(Object));
    }
    return new;
}

/**
 * Note the issue is not just endian. We also need to deal with the fact that
 * java items may not be aligned correctly.
 * Now slightly optmized;
 */
STACKWORD get_word_4_swp(const byte *ptr)
{
    return (((STACKWORD)ptr[0]) << 24) | (((STACKWORD)ptr[1]) << 16) |
    (((STACKWORD)ptr[2]) << 8) | ((STACKWORD)ptr[3]);
}

/**
 * Following allow access to non aligned data fields
 */
STACKWORD get_word_ns(const byte *ptr, int typ)
{
    switch(typ)
    {
        case T_INT:
        case T_FLOAT:
        case T_REFERENCE:
        case T_DOUBLE:
        case T_LONG:
            return (((STACKWORD)ptr[3]) << 24) | (((STACKWORD)ptr[2]) << 16) |
            (((STACKWORD)ptr[1]) << 8) | ((STACKWORD)ptr[0]);
        case T_SHORT:
            return (STACKWORD)(JINT)(JSHORT)(((uint16_t)ptr[1]) << 8) | (ptr[0]);
        case T_CHAR:
            return (STACKWORD)(JCHAR)(((uint16_t)ptr[1]) << 8) | (ptr[0]);
        case T_BOOLEAN:
        case T_BYTE:
            return (STACKWORD)(JINT)(JBYTE)ptr[0];
    }
    return 0;
}

STACKWORD get_word_4_ns(const byte *ptr)
{
    return (((STACKWORD)ptr[3]) << 24) | (((STACKWORD)ptr[2]) << 16) |
    (((STACKWORD)ptr[1]) << 8) | ((STACKWORD)ptr[0]);
}

void store_word_ns(byte *ptr, int typ, STACKWORD aWord)
{
    switch(typ)
    {
        case T_INT:
        case T_FLOAT:
        case T_REFERENCE:
        case T_DOUBLE:
        case T_LONG:
            ptr[0] = (byte)(aWord);
            ptr[1] = (byte)(aWord >> 8);
            ptr[2] = (byte)(aWord >> 16);
            ptr[3] = (byte)(aWord >> 24);
            return;
        case T_SHORT:
        case T_CHAR:
            ptr[0] = (byte)(aWord);
            ptr[1] = (byte)(aWord >> 8);
            return;
        case T_BOOLEAN:
        case T_BYTE:
            ptr[0] = (byte)aWord;
            return;
    }
}


/**
 * Initialize the memory system.
 */
void memory_init(nvm_state_t* pState)
{
    pState->gc_memory_size = 0;
    pState->gc_obj_heap_memory_free = 0;
}

int getHeapSize(nvm_state_t* pState) {
    return ((int)pState->gc_memory_size) << WORD_SHIFT;
}

int getHeapFree(nvm_state_t* pState) {
    return ((int)pState->gc_obj_heap_memory_free + (int)pState->gc_array_heap_memory_free) << WORD_SHIFT;
}

/**
 * The concurrent garbage collector implementation starts here.
 * It is an incremental concurrent mark/sweep collector using a snap shot
 * at the beginning and a write barrier. Allocations are made from a single
 * linked list of free areas. A separate bitmap is maintained that identifies
 * the start point of currently allocated objects. This allows the unambiguous
 * identification of pointers into the heap. A collection is triggered when
 * insufficient space is available to make an allocation. When this occurs the
 * thread making the request is suspended and its state is changed to allow a
 * retry of the operation. The collector then runs (effectively on another
 * thread), running for a fixed period of time on each call (currently the
 * collector will run for 1mS consumng one half of a system time slice). The
 * collector uses an extended version of the classic node coloring scheme. The
 * collector runs through four major stages:
 *
 * MARKROOTS
 * At the start of this stage all items in the heap are colored white. This
 * state is guaranteed by the previous SWEEP stage. The collector now marks
 * each root node by placing it on the mark list (see below for overflow
 * handling). Only objects that contain additional refernces are placed on the
 * list other nodes are marked BLACK immeadiately. Objects on the mark list
 * are colored DARYGREY. The roots consist of static data, threads and thread
 * stacks. We do not have stack maps to help identify references on the stack
 * so they are mark conservatively using the object bitmap described above.
 *
 * MARK
 * The mark phase traces each object marking all of the child references
 * in each object in the mark list. It uses a hybrid marking scheme marking
 * using a recursive call to a pre-set depth and then placing objects beyond
 * this depth on the mark list. This allows fast handling of shallow data
 * structures like arrays. If the mark list overfows the object is colored
 * LIGHTGREY. A separate sweep of the heap will find and mark all LIGHTGREY
 * objects. The mark phase continues until the mark queue is empty and there
 * are no LIGHTGREY objects in the heap. The system runs in parallel to the
 * collector. To allow this we use a snapshot at the beginning by marking all
 * of the roots in one go. After that we ensure that the snapshot is maintained
 * by using a write barrier that will track any modifications made to objects
 * that have not yet been marked. The write barrier is only active during the
 * mark phase.
 *
 * SWEEP
 * At this point any objects that are still colored WHITE are no longer in
 * use and so made be freed. The sweep phase performs this operation. It
 * will merge adjacent free blocks and clears the marked objects back to
 * WHITE. It re-builds the free list as part of the sweep. The sweep operation
 * runs in parallel with allocations, to ensure correct operation any objects
 * that are allocated, during a MARK/SWEEP phase that are ahead of the sweep
 * pointer are colored BLACK. The sweep phase will wake any threads that are
 * waiting for memory as it freeds sufficient space to meet the demand. All
 * such threads will be woken when the sweep completes.
 *
 */

/**
 * "Mark" flag manipulation functions.
 */
static inline void set_gc_marked(Object* obj, byte col)
{
    obj->flags.mark = col;
}

static inline byte get_gc_mark(Object *obj )
{
    return obj->flags.mark;
}

static inline void clr_gc_marked(Object* obj)
{
    obj->flags.mark = GC_WHITE;
}

static inline boolean is_gc_marked(Object* obj)
{
    return obj->flags.mark != GC_WHITE;
}

/**
 * Reference bitmap manipulation functions.
 * The bitmap is allocated at the end of the region.
 */
static inline void set_reference(nvm_state_t* pState, FreeObject* ptr)
{
    uint32_t refIndex = ((byte*) ptr - (byte*)pState->gc_heap_start) / (WORD_SIZE);
    (pState->gc_obj_reference_map)[ refIndex >> 3] |= 1 << (refIndex & 7);
}

static inline void clr_reference(nvm_state_t* pState, Object* ptr)
{
    uint32_t refIndex = ((byte*) ptr - (byte*)pState->gc_heap_start) / (WORD_SIZE);
    (pState->gc_obj_reference_map)[ refIndex >> 3] &= ~ (1 << (refIndex & 7));
}

static inline boolean is_reference(nvm_state_t* pState, Object* ptr)
{
    /* The reference must belong to a memory region. */
    if(((int)ptr & ((WORD_SIZE) - 1)) == 0) {
        ptrdiff_t refIndex = ptr - (Object *)pState->gc_heap_start;
        if (refIndex >= 0 && refIndex < (ptrdiff_t) pState->gc_memory_size) {
            return ((pState->gc_obj_reference_map)[ refIndex >> 3] & (1 << (refIndex & 7))) != 0;
        }
    }
    return false;
}

static inline void set_free_size(nvm_state_t* pState, FreeObject *object, uint16_t size)
{
#ifdef VERIFY
    if (size >= pState->gc_memory_size) {
        assert(size < pState->gc_memory_size, MEMORY2);
    }
#endif
    if (size == NORM_OBJ_SIZE) {
        // zero length void array
        nvm_object_init((void*)object, T_FREE, 1, 0);
    } else {
        nvm_object_init((void*)object, T_FREE, 1, LEN_BIGARRAY);
        ((BigArray *)object)->length = (size - BA_OBJ_SIZE);
    }
}

/**
 * @@param region Beginning of region.
 * @@param size Size of region in bytes.
 */
void memory_add_region (nvm_state_t* pState, byte *start, byte *end, size_t array_heap_size)
{
    uint16_t contents_size;
    uint16_t bitmap_size;
    AHeapObj *ah;
    
    /* align upwards */
    pState->gc_heap_start = (uint32_t *) MEM_ROUND((uintptr_t) start);
    
    /* align downwards */
    pState->gc_heap_end = (uint32_t *) MEM_TRUNC((uintptr_t) end);
    /* To be able to quickly identify a reference like stack slot
     we use a dedicated referance bitmap. With alignment of 4 bytes
     the map is 32 times smaller then the heap. Let's allocate
     the map by lowering the heap end pointer by the map size.
     The map must be zeroed. */
    contents_size = pState->gc_heap_end - pState->gc_heap_start;
    /* Calculate the required bitmap size (in words). Note that we devide here
     by 33 rather then 32 to take into account the reduction in size of the
     heap due to the bitmap size!  */
    bitmap_size = contents_size / (((WORD_SIZE) * 8) + 1) + 1;
    pState->gc_heap_end -= bitmap_size;
    memset((byte *)pState->gc_heap_end, 0, bitmap_size*WORD_SIZE);
    pState->gc_obj_reference_map = (byte *)pState->gc_heap_end;
    pState->gc_memory_size = pState->gc_heap_end - pState->gc_heap_start;
    
    // Split the heap into object and array heaps
    pState->gc_array_heap_start = pState->gc_heap_start;
    
    size_t heap_size_words = pState->gc_heap_end - pState->gc_heap_start;
    if (array_heap_size == 0) {
        array_heap_size = (heap_size_words >> 1);
    } else {
        array_heap_size = TO_WORDS(array_heap_size);
    }

    if (heap_size_words > INITIAL_HEAP && array_heap_size >= heap_size_words - INITIAL_HEAP) {
        array_heap_size = heap_size_words - INITIAL_HEAP;
    } else if (array_heap_size > heap_size_words) {
        array_heap_size = (heap_size_words >> 1);
    }

    /* use half the heap for arrays and the rest for gc objects */
    pState->gc_array_heap_end = pState->gc_array_heap_start + array_heap_size;
    pState->gc_obj_heap_end = (Object *)pState->gc_heap_end;
    pState->gc_obj_heap_start = (Object*) pState->gc_array_heap_end;
    // Now create the array heap */
    pState->gc_array_heap_memory_free = pState->gc_array_heap_end - pState->gc_array_heap_start;
    pState->gc_array_heap_alloc = pState->gc_array_heap_start;
    ah = (AHeapObj *)pState->gc_array_heap_start;
    ah->available = 1;
    ah->object = pState->gc_array_heap_memory_free;
    pState->gc_array_heap_freed = 0;
    /* create free block in the heap */
    pState->gc_obj_heap_memory_free = pState->gc_obj_heap_end - pState->gc_obj_heap_start;
    set_free_size(pState, (FreeObject *)pState->gc_obj_heap_start, pState->gc_obj_heap_memory_free);
    // Initialise the free list
    ((FreeObject *)pState->gc_obj_heap_start)->next = FL_END;
    pState->gc_free_list = pState->gc_obj_heap_start - (Object *)pState->gc_heap_start;
    pState->gc_alloc_prev = &pState->gc_free_list;
    // Incremental mark/sweep state
    pState->gc_sweep_base = pState->gc_obj_heap_end;
    pState->gc_sweep_free = NULL;
    pState->gc_obj_heap_memory_required = 0;
    pState->gc_phase = GC_IDLE;
    
    // Reset the allocator lock
    pState->gc_lock.sync.monitorCount = 0;
    pState->gc_lock.sync.threadId = 0;
    
    pState->gc_thread = NULL;
}


/**
 * This is the primary memory allocator for the Java side of the system
 * it will allocate collectable memory from the heap. If there is currently
 * insufficient space, then it will start a garbage collection phase and set
 * the current thread into a state to wait for this to complete. The request
 * will then be retried. if there is still insufficient memory then an
 * exception will be thrown. Note that this operation relies on the original
 * caller being the byte code interpreter. It performs the retry operation
 * by causing a re-start of the current instruction.
 */
Object *obj_heap_allocate(nvm_state_t* pState, JINT size, nvm_class_index_t class_index, _Bool is_array, byte length)
{
    Object *ref;
#if NVM_DEBUG_MEMORY > 2
    printf("obj_heap_allocate: class %d, size %d\n", class_index, size);
#endif
    ref = (Object *)try_allocate(pState, size);
    if (ref == NULL && pState->gc_alloc_prev != &pState->gc_free_list) {
        // First attempt failed. Start at the beginning and try again.
        pState->gc_alloc_prev = &pState->gc_free_list;
        ref = (Object *)try_allocate(pState, size);
    }
    if (ref == NULL) {
#if NVM_DEBUG_MEMORY > 2
        printf("obj_heap_allocate: failed, adjusting gc_obj_heap_memory_required from %" PRIu32 " to %" PRIu32 "\n",
               pState->gc_obj_heap_memory_required, pState->gc_obj_heap_memory_required + size);
#endif
        pState->gc_obj_heap_memory_required += size;
    } else {
        nvm_object_init(ref, class_index, is_array, length);
        memset((byte *)(ref + 1), 0, size*WORD_SIZE - sizeof(Object));
    }
    return ref;
}

/**
 * @@param size Size of block including header in 4-byte words.
 */
Object *try_allocate(nvm_state_t* pState, uint32_t size)
{
    if(pState->gc_obj_heap_memory_free >= size) {
        FreeObject *regionTop = (FreeObject *)pState->gc_obj_heap_end;
        uint16_t *prev = pState->gc_alloc_prev;
        FreeObject *ptr = (FreeObject *)pState->gc_heap_start + *prev;
#if NVM_DEBUG_MEMORY > 2
        printf("try_allocate: allocate %" PRIu32 ", free %" PRIu32 "\n", size, pState->gc_obj_heap_memory_free-size);
#endif
        while (ptr < regionTop) {
            uint32_t freeSz = get_free_size(ptr);
#if NVM_DEBUG_MEMORY > 2
            printf("try_allocate: size of free chunk is %" PRIu32 ", total free is %" PRIu32 "\n",
                   freeSz, pState->gc_obj_heap_memory_free);
#endif
#ifdef VERIFY
            assert(freeSz < pState->gc_memory_size, MEMORY0);
#endif
            if (size <= freeSz) {
                if (ptr == pState->gc_sweep_free) {
                    pState->gc_sweep_free = NULL;
                }
                /* allocate from this block */
                if (size < freeSz) {
                    /* cut into two blocks */
                    /* first block gets remaining free space */
                    freeSz -= size;
                    set_free_size(pState, ptr, freeSz);
                    ptr += freeSz ; /* second block gets allocated */
                    pState->gc_alloc_prev = prev;
                } else {
                    // All used up unlink from free list
                    //*prev = ptr->next;
                    *prev = ptr->next;
                    // start next search from the head of the list
                    pState->gc_alloc_prev = &pState->gc_free_list;
                }
                // Mark the object as allocated
                pState->gc_obj_heap_memory_free -= size;
                set_reference(pState, ptr);
                // We need to GC mark any object that has not yet been swept
                // we also clear the monitor part of the header all in one go.
                if ((Object *)ptr >= pState->gc_sweep_base) {
                    *((uint32_t *)ptr) = GC_MASK;
                } else {
                    *((uint32_t *)ptr) = 0;
                }
                return (Object *)ptr;
            } else {
                /* continue searching */
                prev = &(ptr->next);
                ptr = (FreeObject *)pState->gc_heap_start + *prev;
            }
        }
    }
#if NVM_DEBUG_MEMORY > 2
    printf("try_allocate: failed to allocate, bailing out\n");
#endif
    return NULL;
}

/**
 * Scan and mark events to avoid collecting them.
 */

void mark_events(nvm_state_t* pState, boolean check_timeout)
{
    for (int i = 0; i < MAX_EVENTS; i++) {
        if (pState->events[i] != NULL) {
            mark_object(pState, (Object*) pState->events[i], 0, check_timeout);
        }
    }
}

/**
 * Scan static area of all classes. For every non-null reference field
 * call mark_object function.
 */
void mark_static_objects(nvm_state_t* pState, boolean check_timeout)
{
    MasterRecord* mrec = get_master_record(pState);
    STATICFIELD* staticFieldBase = (STATICFIELD*) get_static_fields_base(pState);
    byte* staticStateBase = get_static_state_base(pState);
    byte* staticState = staticStateBase;
    int cnt = mrec->numStaticFields;
    int idx = 0;
    
    while(cnt-- > 0) {
        STATICFIELD fieldRecord = staticFieldBase[ idx ++];
        byte fieldType = (fieldRecord >> 12) & 0x0F;
        byte fieldSize = typeSize[ fieldType];
        
        if(fieldType == T_REFERENCE) {
            STACKWORD ptr = get_word_4_ns(staticState);
            Object* obj = word2obj(ptr);
            if (obj != NULL) {
                mark_object(pState, obj, 0, check_timeout);
            }
        }
        staticState += fieldSize;
    }
}

/**
 * Scan slot stacks of threads (local variables and method params).
 * For every slot containing reference value call the mark_object
 * function. Additionally, call this function for the thread itself,
 * for both its stacks and optionly for monitor object. This allows
 * avoiding garbage-collecting them.
 */
void mark_local_objects(nvm_state_t* pState, boolean check_timeout)
{
    int i;
    // Make sure the stack frame for the current thread is up to date.
    if (pState->current_thread != NULL) {
        StackFrame *currentFrame = current_stackframe(pState);
        if (currentFrame != NULL) {
            update_stack_frame(pState, currentFrame);
        }
    }
    if (!ref_null_p(pState->threads)) {
        mark_object(pState, ref2obj(pState->threads), 0, check_timeout);
    }
    for(i = 0; i < MAX_PRIORITY; i ++) {
        Thread* th0 = (Thread*) ref2obj(pState->thread_queue[i]);
        Thread* th = th0;
        
        while(th != NULL) {
            mark_object(pState, (Object*) th, 0, check_timeout);
            mark_object(pState, ref2obj(th->stackArray), 0, check_timeout);
            mark_object(pState, ref2obj(th->stackFrameArray), 0, check_timeout);
            
            if(!ref_null_p(th->waitingOn)) {
                mark_object(pState, ref2obj(th->waitingOn), 0, check_timeout);
            }
            
            Object* sfObj = ref2obj(th->stackFrameArray);
            StackFrame* stackFrame = ((StackFrame*) array_start(sfObj)) + th->stackFrameIndex;
            Object* saObj = ref2obj(th->stackArray);
            STACKWORD* stackBottom = (STACKWORD*) jint_array(saObj);
            STACKWORD* stackTop = (STACKWORD*) ref2ptr(stackFrame->stackTop);
            STACKWORD* sp;
            for(sp = stackBottom; sp <= stackTop; sp ++) {
                Object* ptr = word2obj(*sp);
                
                if(is_reference(pState, ptr)) {
                    /* Now we know that ptr points to a valid allocated object.
                     It does not mean, that this slot contains a reference variable.
                     It may be an integer or float variable, which has exactly the
                     same value as a reference of one of the allocated objects.
                     But it is no harm. We can safely "mark" it, In such a case
                     we may just leave an unreachable object uncollected. */
                    mark_object(pState, ptr, 0, check_timeout);
                }
            }
            
            th = (Thread*) ref2obj(th->nextThread);
            if(th == th0) {
                break;
            }
        }
    }
}

/**
 * Mark an object. The object has been identified as either an array of refs
 * or a class instance that contains one or more ref fields. We need to mark
 * each ref in the object, and then mark the object itself. Note that we will
 * normally perform a recursive mark of the references. However if we run out
 * of time, we will complete the mark of the object by quickly marking each
 * child object LIGHTGREY. These objects will then be found and marked during
 * the overflow scan stage.
 */
void process_object(nvm_state_t* pState, Object *obj, int callLimit, boolean check_timeout)
{
    if(nvm_object_is_array(obj)) {
        REFERENCE* refarr = ref_array(obj);
        REFERENCE* refarrend = refarr + nvm_array_size(obj);
        while(refarr < refarrend) {
            REFERENCE ref = *refarr ++;
            Object* ob = ref2obj(ref);
            if(ob != NULL) {
                if (GC_TIMEOUT(pState, check_timeout)) {
                    mark_object(pState, ob, 0, check_timeout);
                } else {
                    mark_object(pState, ob, callLimit, check_timeout);
                }
            }
        }
    } else {
        // Object is a normal class instance with ref fields
        nvm_class_index_t class_index = nvm_object_class_index(obj);
        ClassRecord* classRecord;
        byte* statePtr;
        
        classRecord = get_class_record(pState, class_index);
        statePtr = (byte*) obj;
        /* Scan the object in reverse so we start at the end of the object */
        statePtr += classRecord->classSize;
        /* now we can scan the member fields */
        while (class_index != JAVA_LANG_OBJECT) {
            if(get_field_cnt(classRecord)) {
                int i;
                for(i = get_field_cnt(classRecord)-1; i >= 0; i--) {
                    byte fieldType = get_field_type(pState, classRecord, i);
                    byte fieldSize = typeSize[fieldType];
                    statePtr -= fieldSize;
                    if(fieldType == T_REFERENCE) {
                       
                            STACKWORD ptr = get_word_4_ns(statePtr);
                            Object* robj = word2obj(ptr);
                            if(robj != NULL) {
                                if (GC_TIMEOUT(pState, check_timeout)) {
                                    mark_object(pState, robj, 0, check_timeout);
                                } else {
                                    mark_object(pState, robj, callLimit, check_timeout);
                                }
                            }
                    }
                }
            }
            class_index = classRecord->parentClass;
            classRecord = get_class_record(pState, class_index);
        }
    }
    set_gc_marked(obj, GC_BLACK);
}

/**
 * A function which performs a "mark" operation for an object.
 * If it is an array of references recursively call mark_object
 * for every non-null array element.
 * Otherwise "mark" every non-null reference field of that object.
 * To handle deep structures we use a combination of recursive marking
 * and an explicit mark list. The recursive approach works well (especially
 * for arrays and objects with a large number of refs), but requires more
 * memory per call. So we use recusrion to a fixed depth and then switch
 * to an explicit list. If this in turn overflows, we color the object
 * LIGHTGREY and pick it up using a sweep through all objects.
 */
void mark_object(nvm_state_t* pState, Object *obj, int callLimit, boolean check_timeout)
{
    int col = get_gc_mark(obj);
    if (col >= GC_DARKGREY) {
        return;
    }
    if (col == GC_WHITE) {
        // Deal with non recusrive data types we don't need to examine these
        // any deeper...
        nvm_class_index_t class_index = nvm_object_class_index(obj);;
        if (has_norefs(get_class_record(pState, class_index))) {
            // Object has no references in it so just mark it.
            set_gc_marked(obj, GC_BLACK);
            return;
        }
        if (class_index == JAVA_LANG_STRING) {
            // Strings are very common, mark them in one go
            Object* chars = word2obj(get_word_4_ns((byte*)(&(((String *)obj)->characters))));
            // mark the character array
            if(chars != NULL) {
                set_gc_marked(chars, GC_BLACK);
            }
            set_gc_marked(obj, GC_BLACK);
            return;
        }
    }
    // Object must have fields that require marking
    if (callLimit > 0) {
        process_object(pState, obj, callLimit - 1, check_timeout);
    } else {
        int next = markQNext(pState->gc_mark_queue_hd);
        // Try and push the entry on the mark queue
        if (next == pState->gc_mark_queue_tl) {
            // No space, so we fall back on a complete scan
            if (col != GC_WHITE) {
                return;
            }
            if (obj < pState->gc_overflow_scan) {
                pState->gc_overflow_scan = obj;
            }
            set_gc_marked(obj, GC_LIGHTGREY);
        } else {
            pState->gc_mark_queue[pState->gc_mark_queue_hd] = obj - (Object *)pState->gc_heap_start;
            pState->gc_mark_queue_hd = next;
            set_gc_marked(obj, GC_DARKGREY);
        }
    }
}

/**
 * Process all of the objects currently on the mark queue.
 */
void process_queue(nvm_state_t* pState, boolean check_timeout)
{
    while (pState->gc_mark_queue_tl != pState->gc_mark_queue_hd) {
        Object *obj = (Object *)pState->gc_heap_start + pState->gc_mark_queue[pState->gc_mark_queue_tl];
        process_object(pState, obj, MAX_CALL_DEPTH, check_timeout);
        pState->gc_mark_queue_tl = markQNext(pState->gc_mark_queue_tl);
        if (GC_TIMEOUT(pState, check_timeout)) {
            break;
        }
    }
}

/**
 * Perform the sweep phase of the GC. We scan all objects in the heap. Those
 * that are not marked as BLACK are no longer in use and may be released. We
 * merge adjancent free objects together as we go. We build up the free list
 * as part of this scan. Note that this operation runs in parallel with
 * allocation calls, so we must ensure that any newly allocated objects are
 * colored BLACK if they are ahead of the sweep point. We clear the mark bits
 * of currently live objects as we pass.
 */
void sweep(nvm_state_t* pState, boolean check_timeout)
{
    uint32_t mf = 0;
    Object* ptr = pState->gc_sweep_base;
    FreeObject* fptr = pState->gc_sweep_free;
    Object* regionTop = (Object *)pState->gc_obj_heap_end;
    uint16_t flist = pState->gc_free_list;
    
    while(ptr < regionTop && !GC_TIMEOUT(pState, check_timeout)) {
        nvm_class_index_t class_index = nvm_object_class_index(ptr);
        uint32_t size = get_size_words(pState, ptr);
#ifdef VERIFY
        assert(size < pState->gc_memory_size, MEMORY1);
#endif
        // Round up according to alignment
        if(is_gc_marked(ptr)) {
            clr_gc_marked(ptr);
            fptr = NULL;
        } else {
            if (get_monitor_count(&ptr->sync) == 0 || class_index == T_FREE) {
                // Set object free
                if (class_index != T_FREE) {
                    clr_reference(pState, ptr);
                    if (nvm_object_is_array_ah(ptr)) {
                        array_heap_free(pState, (AHeapObj *)(array_start(ptr) - sizeof(AHeapObj)));
                    }
                }
                mf += size;
                // Got a free block can we merge?
                if (fptr != NULL) {
                    unsigned int fsize = get_size_words(pState, (Object *)fptr);
                    fsize += size;
                    set_free_size(pState, fptr, fsize);
                    if (fsize > pState->gc_obj_heap_largest_free) {
                        pState->gc_obj_heap_largest_free = fsize;
                    }
                } else {
                    fptr = (FreeObject *)ptr;
                    // add to free list
                    fptr->next = flist;
                    flist = ((Object *)fptr - (Object *)pState->gc_heap_start);
                    set_free_size(pState, fptr, size);
                    if (size > pState->gc_obj_heap_largest_free) {
                        pState->gc_obj_heap_largest_free = size;
                    }
                }
            } else {
                fptr = NULL;
            }
        }
        ptr += size;
    }
    
#if NVM_DEBUG_MEMORY > 2
    printf("sweep: adjusting gc_obj_heap_memory_free by %" PRIu32 " from %" PRIu32 " to %" PRIu32 "\n",
           mf, pState->gc_obj_heap_memory_free, pState->gc_obj_heap_memory_free + mf);
#endif
    pState->gc_obj_heap_memory_free += mf;
    // Remember current state for next scan
    pState->gc_sweep_base = ptr;
    pState->gc_sweep_free = fptr;
    pState->gc_free_list = flist;
}


/**
 * Scan the heap for objects that overflowed the mark queue. These will be
 * colored LIGHTGREY. We mark these objects as we pass. Note that marking an
 * object may generate new overlow cases, these will re-set the sweep point.
 */
void mark_overflow(nvm_state_t* pState, boolean check_timeout)
{
    Object* ptr = pState->gc_overflow_scan;
    Object* regionTop = pState->gc_obj_heap_end;
    pState->gc_overflow_scan = regionTop;
    
    while(ptr < regionTop && !GC_TIMEOUT(pState, check_timeout)) {
        unsigned int size = get_size_words(pState, ptr);
        // Round up according to alignment
        if (get_gc_mark(ptr) == GC_LIGHTGREY) {
            mark_object(pState, ptr, 0, check_timeout);
            process_queue(pState, check_timeout);
        }
        ptr += size;
    }
    // If we have not finished the scan remember it for next time.
    if (ptr < pState->gc_overflow_scan) {
        pState->gc_overflow_scan = ptr;
    }
}

/**
 * Search the object heap looking for a free object after/equal to ptr
 * return a pointer to the free list link item, to allow the list to be
 * updated.
 */
uint16_t *get_free_ptr(nvm_state_t* pState, Object *ptr)
{
    Object* regionTop = pState->gc_obj_heap_end;
    uint16_t* fptr = &pState->gc_free_list;
    while(ptr < regionTop) {
        if (nvm_object_class_index(ptr) == T_FREE) {
            fptr = &(((FreeObject *)ptr)->next);
            break;
        }
        ptr += get_size_words(pState, ptr);
    }
    return fptr;
}

/**
 * Main garbage collecting function.
 * This is called to perform one incremental step. It will step through each
 * of the following stages making progress on each call and returning after
 * performing one increment of work. The macro GC_TIMEOUT defines when the
 * increment is complete. When all of the phases are complete gcPhase will
 * be reset to GC_IDLE. The phases are:
 * MARKROOTS: All of the statics, stacks and system vars are marked.
 * MARK: All of the refs found from the roots are traced and marked
 * SWEEP: The entire heap is swept freeing and merging unused objects.
 */
void gc_run_collector(nvm_state_t* pState, boolean check_timeout, boolean force_compact)
{
    switch (pState->gc_phase)
    {
        case GC_IDLE:
            break;
        case GC_MARKROOTS:
            pState->gc_overflow_scan = pState->gc_obj_heap_end;
            mark_events(pState, check_timeout);
            mark_static_objects(pState, check_timeout);
            mark_local_objects(pState, check_timeout);
            pState->gc_sweep_base = pState->gc_obj_heap_start;
            pState->gc_sweep_free = NULL;
            pState->gc_phase = GC_MARK;
            // Fall through
        case GC_MARK:
            // Trace all refs. Our job is done when the mark queue is empty and we
            // have completed an overflow scan of the heap.
            while ((pState->gc_overflow_scan != pState->gc_obj_heap_end) || (pState->gc_mark_queue_tl != pState->gc_mark_queue_hd)) {
                process_queue(pState, check_timeout);
                if (GC_TIMEOUT(pState, check_timeout)) {
                    goto exit;
                }
                if (pState->gc_overflow_scan != pState->gc_obj_heap_end) {
                    mark_overflow(pState, check_timeout);
                }
                if (GC_TIMEOUT(pState, check_timeout)) {
                    goto exit;
                }
            }
            pState->gc_obj_heap_memory_free = 0;
            pState->gc_obj_heap_largest_free = 0;
            pState->gc_free_list = FL_END;
            pState->gc_alloc_prev = &pState->gc_free_list;
            pState->gc_phase = GC_SWEEP;
            // Fall through
        case GC_SWEEP:
            sweep(pState, check_timeout);
            if (pState->gc_obj_heap_memory_required > 0 && pState->gc_obj_heap_largest_free > pState->gc_obj_heap_memory_required) {
#if NVM_DEBUG_MEMORY > 2
                printf("sweep: waking threads because %" PRIu32 " is required and %" PRIu32 " is available in one chunk\n",
                       pState->gc_obj_heap_memory_required, pState->gc_obj_heap_largest_free);
#endif
                // We may have enough free space to meet current requests, so wake up
                // any threads that are waiting for memory.
                alloc_notify(pState);
                pState->gc_obj_heap_memory_required = 0;
            }
            if (GC_TIMEOUT(pState, check_timeout)) {
                goto exit;
            }
            pState->gc_alloc_prev = &pState->gc_free_list;
            pState->gc_sweep_base = pState->gc_obj_heap_end;
            pState->gc_sweep_free = NULL;
            pState->gc_phase = GC_COMPACT;
        case GC_COMPACT:
            /* compact the array heap iff sweep freed an ah-allocated array and we need array heap memory */
            if (force_compact ||
                (pState->gc_array_heap_freed && pState->gc_array_heap_memory_required > 0)) {
                compact_array_heap(pState);
            }
            pState->gc_obj_heap_memory_required = 0;
            pState->gc_array_heap_memory_required = 0;
            // Notify any waiting threads that there may now be more memory
            alloc_notify(pState);
            pState->gc_phase = GC_IDLE;
    }
exit:
    return;
}

/**
 * Perform a full GC and make the thread wait for it to complete
 */
int garbage_collect(nvm_state_t* pState)
{
    if (!alloc_start(pState)) {
        return EXEC_RETRY;
    }
    if (pState->gc_retry_state >= GC_RETRY) {
        // The current thread already owns the memory lock so this must
        // be a retry attempt which means we are done!
        alloc_complete(pState, (Object *)1);
        return EXEC_CONTINUE;
    }
    // If not already collecting, start a collection
    pState->gc_array_heap_memory_required += 1;
    pState->gc_retry_state = GC_RETRY;
    alloc_complete(pState, NULL);
    return EXEC_RETRY;
}

/**
 * Force a full garbage collect and wait for it to complete
 */
void wait_garbage_collect(nvm_state_t* pState, boolean release_locks, boolean force_full)
{
    gc_start_hook(pState);
    // release any locks
    if (release_locks) {
        pState->gc_thread = NULL;
        alloc_start(pState);
    }
    if (pState->gc_phase == GC_IDLE && force_full) {
        pState->gc_phase = GC_MARKROOTS;
    }
    while (pState->gc_phase != GC_IDLE) {
        gc_run_collector(pState, false, force_full);
    }
    if (release_locks) {
        alloc_complete(pState, (Object *)1);
    }
    gc_end_hook(pState);
}

/**
 * The following two functions form the garbage collector "write barrier"
 * they operate to preserve the "snapshot at the begining" data model used
 * by this collector. They ensure that during the mark phase any update to
 * a non-root item is only made after that item has been marked.
 * NOTE: These functions should not be called directly. Instead use the
 * macros in memory.h which implement the require guard conditions.
 */
void gc_update_object(nvm_state_t* pState, Object *obj)
{
    // Object is a normal class instance with ref fields
    nvm_class_index_t class_index = nvm_object_class_index(obj);
    ClassRecord* classRecord;
    byte* statePtr;
    // Following essential tests implement in the calling macro
    //if (pState->gc_phase != GC_MARK) return;
    //if (get_gc_mark(obj) == GC_BLACK) return;
    classRecord = get_class_record(pState, class_index);
    if (!has_norefs(classRecord)) {
        statePtr = (byte*) obj;
        /* Scan the object in reverse so we start at the end of the object */
        statePtr += classRecord->classSize;
        /* now we can scan the member fields */
        while (class_index != JAVA_LANG_OBJECT) {
            if(get_field_cnt(classRecord)) {
                int i;
                for(i = get_field_cnt(classRecord)-1; i >= 0; i--) {
                    byte fieldType = get_field_type(pState, classRecord, i);
                    byte fieldSize = typeSize[fieldType];
                    statePtr -= fieldSize;
                    
                    if(fieldType == T_REFERENCE) {
                            STACKWORD ptr = get_word_4_ns(statePtr);
                            Object* robj = word2obj(ptr);
                        if(robj != NULL) {
                                mark_object(pState, robj, 0, false); // ignore GC timeouts during gc_update_object
                        }
                    }
                }
            }
            class_index = classRecord->parentClass;
            classRecord = get_class_record(pState, class_index);
        }
    }
    set_gc_marked(obj, GC_BLACK);
}

void gc_update_array(nvm_state_t* pState, Object *obj)
{
    // Following essential tests implemented in the calling macro
        REFERENCE* refarr = ref_array(obj);
        REFERENCE* refarrend = refarr + nvm_array_size(obj);
        while(refarr < refarrend) {
            REFERENCE ref = *refarr ++;
            Object* ob = ref2obj(ref);
            if(ob != NULL) {
                mark_object(pState, ob, 0, false); // don't check for gc timeouts while marking in gc_update_array
            }
        }
    set_gc_marked(obj, GC_BLACK);
}

/**
 * The following functions implement the moveable array heap.
 * The heap is a simple copy allocator, with a single free space.
 */

/**
 * Allocate space from the free area.
 */
AHeapObj *array_heap_allocate(nvm_state_t* pState, JINT sz, Object *obj)
{
    if ((unsigned)sz <= pState->gc_array_heap_memory_free) {
        AHeapObj *ah = (AHeapObj *)(pState->gc_array_heap_alloc);
        pState->gc_array_heap_memory_free -= sz;
        ah->available = 0;
        ah->object = obj - (Object *)pState->gc_heap_start;
        ah->thread = 0;
        pState->gc_array_heap_alloc += sz;
        memset((byte *)(ah + 1), 0, sz*WORD_SIZE - sizeof(AHeapObj)); 
        return ah;
    }
    return NULL;
}

/**
 * Mark part of the heap as no longer in use.
 */
void array_heap_free(nvm_state_t* pState, AHeapObj *ah)
{
    BigArray *obj = (BigArray *)((Object *)pState->gc_heap_start + ah->object);
    if (obj->length == 0) {
        //printf("ahf %x zero\n", ah);
    } else {
        int sz = array_data_size_words(pState, nvm_object_class_index(&obj->hdr), obj->length);
        ah->available = 1;
        ah->object = sz + AH_OBJ_SIZE;
        pState->gc_array_heap_freed = 1;
    }
}

/**
 * Compact the heap by copying data down filling any free spaces.
 */
void compact_array_heap(nvm_state_t* pState)
{
    // First walk the objects and create a list
    uint32_t *ptr = pState->gc_array_heap_start;
    int offset = 0;
    while (ptr < pState->gc_array_heap_alloc) {
        AHeapObj *ah = (AHeapObj *)ptr;
        if (ah->available) {
            offset += ah->object;
            ptr += ah->object;
        } else {
            BigArray *obj = (BigArray *)((Object *)pState->gc_heap_start + ah->object);
            int sz = array_data_size_words(pState, nvm_object_class_index(&obj->hdr), obj->length) + AH_OBJ_SIZE;
            if (offset > 0) {
                obj->offset -= (offset);
                // If this block is a stack frame, update any pointers to it.
                if (ah->thread) {
                    update_stack_frames(pState, (Thread *)(pState->gc_heap_start + ah->thread), -offset);
                }
                memmove((byte *)((uint32_t *)ah - offset), (byte *)ah, sz*WORD_SIZE);
            }
            ptr += sz;
        }
    }
    pState->gc_array_heap_memory_free += offset;
    pState->gc_array_heap_alloc -= offset;
    pState->gc_array_heap_freed = 0;
}


