C++ Simulate gc Garbage Collection

Posted by undecided name 01 on Tue, 02 Jul 2019 18:58:45 +0200

Description: Code implementation Learn from this article

There are many ways to find and recycle unwanted objects. The easiest and earliest is called "mark-and-clear". Process:

  1. Starting with the root object, traverse the entire object graph.Each time an object is accessed, a token bit is set to true.
  2. Once the traversal is complete, find all unmarked objects and clear them. Suppose we are writing an interpreter for a small language.It is dynamic and has two objects: int and pair.The following is an enumeration that defines the object type:
#include <assert.h>
enum Obj_Type
{
    OBJ_INT,
    OBJ_PAIR
};

Implement object type:

struct _Object
{
    Obj_Type type;
    unsigned char marked;//A marker, if marked, indicates reachability.
    //union 
    //{
        int value;//Stored value
        struct  
        {
            _Object* pHead;
            _Object* pTail;
        };
    //};
    _Object *pNext;
};

Now we can encapsulate them in the structure of a small virtual machine.The purpose of this virtual machine here is to hold a stack to store the currently used variables.Virtual machines in many languages are either stack-based (such as JVM and CLR) or register-based (such as Lua).Either way, they actually have to have a stack.It stores local variables and intermediate variables that may be used in expressions.

#Define MAX_SIZE 256 //Maximum number of stacks
#Define INITIAL_gc_THRESHOLD 128 //Number of objects triggering gc
typedef struct _tagVM
{
    _Object* objs[MAX_SIZE];
    int max_objs;//Maximum number of objects generated, more than call gc
    int num_objs;//Record the number of objects now
//At the virtual machine level, all objects should be visible. From the user's point of view, some objects may not have any references to them, and the objects will be
//Not visible, so
//Record all objects generated through the virtual machine with a linked list.
    _Object *pFirstObj;//Header Pointer of Chain List
    _Object *pLastObj;//End pointer of chain list
    int objs_size;//Stack size or stack pointer
}VM;

Functions to create virtual machines:

VM* new_VM()
{
    VM *pVm=(VM *)malloc(sizeof(VM));
    pVm->objs_size=0;
    _Object *pObj=create_obj(OBJ_INT);
    pObj->value=0;
    pObj->marked=1;
    pObj->pHead=NULL;
    pObj->pTail=NULL;
    pObj->pNext=NULL;
    pVm->pFirstObj=pObj;//NULL; the first node is not used,
    pVm->pLastObj=pVm->pFirstObj;
    pVm->max_objs=INITIAL_GC_THRESHOLD;
    pVm->num_objs=0;
    return pVm;
}

Release Virtual Machine Function:

void delete_VM(VM *pVM)
{
    _Object *pObj=pVM->pFirstObj;
    while (pObj!=NULL)
    {
        printf("%i\t",pObj->value);
        _Object *tmp=pObj->pNext;
        delete_Obj(pObj);
        pObj=tmp;
    }
    free(pVM);
    pVM=NULL;
}

With a virtual machine, we need to manipulate its stack:

void push(VM* pVM,_Object *pObj)
{
    assert(pVM->objs_size<=MAX_SIZE ,"Stack overflow!");
    pVM->objs[pVM->objs_size++]=pObj;
}
_Object * pop(VM*pVM)
{
    assert(pVM->objs_size>0 ,"Stack underflow!");
    return pVM->objs[--pVM->objs_size];
}

Functions that generate objects:

_Object * create_obj(Obj_Type type)
{
    _Object *pObj=(_Object *)malloc(sizeof(_Object));
    pObj->type=type;
    pObj->marked=0;
    pObj->pNext=NULL;
    return pObj;
}

_Object * new_obj(VM*pVM,Obj_Type type)
{
    _Object *pObj=create_obj(type);
    pVM->num_objs++;
    pVM->pLastObj->pNext=pObj;
    pVM->pLastObj=pObj;
    if (pVM->num_objs>pVM->max_objs)
    {
        gcVM(pVM);
    }
    return pObj;
}

With it we can push different types of objects onto the stack:

void push_int(VM* pVM,int value)
{
    _Object *pObj=new_obj(pVM,OBJ_INT);
    pObj->value=value;
    push(pVM,pObj);
}

_Object * push_pair(VM* pVM,int value)
{
    _Object *pObj=new_obj(pVM,OBJ_PAIR);
    pObj->value=value;
    pObj->pTail=pop(pVM);
    pObj->pHead=pop(pVM);
    push(pVM,pObj);
    return pObj;
}

If we have a parser and interpreter to call these functions, it is a complete language.Here is the tag-clear process: The first phase is the marking phase.

void mark(_Object *pObj,unsigned char marked)
{
    if (pObj->marked) return;//Prevent pair types from referencing each other, causing loops and infinite recursion.
    printf("%i\t",pObj->value);//Debug Output
    pObj->marked=marked;
    if (pObj->type==OBJ_PAIR)//pair type, recursive
    {
        mark(pObj->pHead,marked);
        mark(pObj->pTail,marked);
    }
}

void mark_all(VM*pVM)
{
    for (int i=0;i<pVM->objs_size;++i)
    {
        //printf("%i\t",pVM->objs[i]->value);
        if (pVM->objs[i]->type==OBJ_PAIR)//Non Pair node in stack, considered unreachable
        {
            mark(pVM->objs[i],1);
        }

    }
}

The next stage is to iterate through all assigned objects, releasing those that are not marked.

void sweep(VM* pVM)
{
    _Object *pObj=NULL,*pFrontObj=NULL;
    pObj=pFrontObj=pVM->pFirstObj;
    while (pObj!=NULL)
    {

        if (!pObj->marked)
        {
            printf("%d\t",pObj->value);//Debug Output
            pFrontObj->pNext=pObj->pNext;//Delete the list node, adjust the pointer before and after pointing
            _Object *tmp=pObj;
            pFrontObj=pObj=pFrontObj->pNext;
            free(tmp);
            pVM->num_objs--;//Current number of objects minus one
        }
        else
        {
            pFrontObj=pObj;
            pObj=pObj->pNext;
        }

    }
}

Finally, there is a garbage collector:

void gcVM(VM*pVM)
{
    mark_all(pVM);
    sweep(pVM);
    //After each recycle, we update the value of maxOjbecs based on the number of surviving objects.
    //This is multiplied by 2 to allow our heap to grow as the number of live objects increases.
    //  Similarly, if a large number of objects are recycled, the heap shrinks
    pVM->max_objs = pVM->num_objs * 2;
}

Then we trigger gc in new_obj based on the number of objects generated. Let's generate some objects to test gc:

int _tmain(int argc, _TCHAR* argv[])
{
    VM *pVM=new_VM();
    for (int i=0;i<10;++i)//Produce some objects
    {
        push_int(pVM,10);
        push_int(pVM,15);
        push_pair(pVM,20);//Lose the first two object pointers in the stack
        push_int(pVM,30);//Unreachable
    }
    printf("mark_all:\n");
    mark_all(pVM);
    printf("sweep:\n");
    sweep(pVM);
    printf("delete_VM:\n");
    delete_VM(pVM);
    getchar();
    return 0;
}

The final results are as follows:

Topics: jvm