论坛首页 编程语言技术论坛

云风manualgc源码注解

浏览 3383 次
精华帖 (0) :: 良好帖 (0) :: 新手帖 (0) :: 隐藏帖 (0)
作者 正文
   发表时间:2009-04-03  
C
这是08年8月的老东西了,当时把云风的manualgc的源码打印出来,没事找时间看看,把自己的理解或注解随手写在源码旁边。今天贴这里以此与各位研究过云风GC的朋友交流。不论如何研究这个gc对我这个长期使用java的programmer来说收获良多。
/*
* Copyright (c) 2008 ,
* Cloud Wu . All rights reserved.
*
* http://www.codingnow.com
*
* Use, modification and distribution are subject to the "New BSD License"
* as listed at <url: http://www.opensource.org/licenses/bsd-license.php >.
*/

#include "gc.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>

#ifdef _MSC_VER
#define bool int
#define true 1
#define false 0
typedef int intptr_t;
#else
#include <stdbool.h>
#include <stdint.h>
#endif
#define my_malloc malloc
#define my_free free
#define my_realloc realloc

#define CACHE_BITS 10

struct link {
int number;        该link的children数量
int children[1];
};

struct node {
int mark;     标记值,用于垃圾回收
union {
struct {
void * mem;     该node所指向的内存块
struct link *children;     该node的link关系
void (*finalizer)(void *);
} n;

struct {
intptr_t mem;
struct link *children;
intptr_t weak;
} c;
    在这里通过union把n与c描述的同一种事物解释成两种不同的东西

int free;     记录下个free node在pool中的句柄位置
} u;
};

/* stack data
+----------+
0 | level 0 | ----> level 0 / root ( node pack )
1 | level 1 | --+-> level 1 ( 1 node ref + node pack )
2 | node ref | <-bottom --+
3 | 2 (lv.2) |
4 | node ref | --+-> level 2 ( 3 node ref )
5 | node ref | |
6 | node ref | --+
7 | 4 (lv.3) | <-current
8 | node ref | --+-> level 3 ( 2 node ref )
9 | node ref | --+
10| nil | <-top
11| nil |
+----------+
*/

union stack_node {
int stack;     堆栈挂接点所引用的pool中的句柄
int number;     距上一个堆栈调挂接点的偏移量
int handle;     句柄,pool中的句柄位置
};

struct stack {
union stack_node *data;
int top;
int bottom;
int current;
};

top代表栈顶,data[top]指向nil或者无效值;

bottom代表当前stack frame的栈底。data[bottom-1].stack指向当前stack frame的堆栈挂接点;而如果存在的话data[bottom-2].stack指向上一级堆栈挂接点,并且两者直接必然存在link;stack frame是由gc_enter创建的;

当current>bottom时,data[current].number代表当前堆栈挂接点据上一个堆栈挂接点的偏移量,而当current<bottom时,data[current].stack同data[bottom-1].stack。


struct hash_node {
int id;     pool中的句柄
struct hash_node *next;     表示下一个hash node,用于当hashcode冲突时,将具有相同hashcode的node形成一个链表,一旦hashcode冲突就执行线性查找。
};

struct hash_map {
struct hash_node **table;
int size;     hashmap的大小
struct hash_node *free;     分配出来的hash_node经map_erase释放的后加入free链表以备重用
int number;     当前hashmap的使用数目
};

在这个hashmap的实现中并没有显示使用装载因子,默认装载因子为1,即完全使用晚所有的hashnode后再扩展大小。

#define CACHE_SIZE (1<<CACHE_BITS)

struct cache_node {
int parent;
int child;
};

static struct {
struct node *pool;
int size;     pool的大小
int free;     第一个free node的句柄
int mark;     当前gc的标识值
bool cache_dirty;
struct stack stack;
struct hash_map map;     加速访问pool,将无论地址映射成为pool中的句柄
struct cache_node cache[CACHE_SIZE];     缓存link关系
} E;

#define WEAK_CONTAINER -1
#define FREED_POINTER -1
#define CACHE_PARENT_BITS (CACHE_BITS/3)
#define CACHE_CHILD_BITS (CACHE_BITS-CACHE_PARENT_BITS)
#define CACHE_PARENT_MASK ((1<<CACHE_PARENT_BITS) -1 )
#define CACHE_CHILD_MASK ((1<<CACHE_CHILD_BITS) -1 )
#define UNSET_MASK 0x80000000
#define UNSET_MASK_BIT(a) ((unsigned)(a)>>31)

/* Insert a parent/child handle pair into cache */
添加或者删除一个缓存中的link关系。由于pool的句柄必是正整数,因此使用符号位来标识是执行删除操作。在后续中多次用到这个“符号位”。

static bool
cache_insert(int parent,int child)
{
int hash = (parent & CACHE_PARENT_MASK) << CACHE_CHILD_BITS | (child & CACHE_CHILD_MASK) ;
struct cache_node *cn = &E.cache[hash];
E.cache_dirty=true;

if (cn->parent == -1) {     不存在则添加该link的缓存
cn->parent=parent;
cn->child=child;
return true;
}
else if (cn->parent == parent && (cn->child ^ child) == UNSET_MASK) {
删除已缓存的link
cn->parent = -1;
return true;
}
return false;     该cache已被使用
}

/* create a new handle for pointer p */
从pool中分配一个handle

static int
node_alloc(void *p)
{
struct node *ret;     指向E.free所表示的可重用的free node
if (E.free==-1) {     与后面的E.pool[sz-1].u.free=-1相呼,应标识pool已用尽
int sz=E.size * 2;
int i;
if (sz==0) {
sz=1024;
}

E.pool=(struct node *)my_realloc(E.pool,sz*sizeof(struct node));
ret=E.pool + E.size;
ret->u.n.children=0;

for (i=E.size+1;i<sz;i++) {
E.pool[i].u.free=i+1;     下个free node的句柄位置
E.pool[i].mark=-1;
E.pool[i].u.n.children=0;
}
E.pool[sz-1].u.free=-1;
E.free=E.size+1;
E.size=sz;
}
else {
ret=E.pool + E.free;
E.free = E.pool[E.free].u.free;     将下一个free node的句柄告诉给E.free
}
ret->u.n.mem=p;
ret->mark=0;
if (ret->u.n.children) {
ret->u.n.children->number=0;
}
else {
ret->u.n.children=0;
}
return ret-E.pool;
}

/* expand link table space . each node has a link table. */
来来来,肉戏开始了。直接引用云风的原话
“利用二进制逻辑运算的性质,我们只需要把新的尺寸和老的尺寸异或,再和老的尺寸比较,就可以知道,新的尺寸的二进制最高是 1 的那一位的位置是否大于老的尺寸。然后分配新尺寸的两倍,空间是绰绰有余的。”

static struct link *
link_expand(struct link *old,int sz)
{
struct link *ret;
if (old!=0) {
sz+=old->number;
if ((sz ^ old->number) <= old->number) {
return old;
}
}

sz=sz*2-1;
ret=(struct link *)my_realloc(old,sizeof(struct link)+(sz-1)*sizeof(int));
if (old==0) {
ret->number=0;
}
return ret;
}

/* cmp function for cache sort */
static int
cache_node_cmp(const void *a,const void *b)
{
const struct cache_node *ca=(const struct cache_node *)a;
const struct cache_node *cb=(const struct cache_node *)b;
if (ca->parent != cb->parent) {
return cb->parent - ca->parent;     按parent的降序排列
}
if (ca->parent == -1 ) {
return 0;
}
return (ca->child & ~ UNSET_MASK) - (cb->child & ~UNSET_MASK);
    当parent相同时,按child的升序排列
}

/* commit cache to the node graph *
static void
cache_flush()
{
int i;
if (!E.cache_dirty)
return;
qsort(E.cache,CACHE_SIZE,sizeof(struct cache_node),cache_node_cmp);     快排
i=0;
while (i<CACHE_SIZE) {
int parent=E.cache[i].parent;
struct cache_node *head;
struct cache_node *next;
struct node *node=&E.pool[parent];
struct link *children;
int sz;
int j,k;

if (parent==-1) {
return;
}

head=&E.cache[i];
next=head;
sz=0;

寻找需要对link进行扩展的理论上的最大值,后面会更具情况减少
while (next->parent == parent && i<CACHE_SIZE) {
sz += 1 - UNSET_MASK_BIT(next->child);
++next;
++i;
}

children=node->u.n.children;
k=j=0;
if (children) {
while (j<children->number) {
int child;
if (head>=next) {
goto copy_next;
}

child=head->child;
children->children[k]=children->children[j];
if (child == (children->children[j] | UNSET_MASK)) {
 取消link关系
--k;     与后面的k++对应,已达到k不增长
head->parent=-1;
--sz;
++head;
}
else if ((child & ~UNSET_MASK) < children->children[j]) {
break;
}     需要添加link且小于当前child,维护有序children
++j;
++k;
}
}
if (sz>0) {
    扩展sz个位置
children=node->u.n.children=link_expand(node->u.n.children,sz);
assert(children);
memmove(children->children + j + sz, children->children +j , (children->number - j) * sizeof(int));
j+=sz;
children->number+=sz;
}

while(j<children->number) {
int child;
if (head>=next) {     cache里的值已经全部处理了
goto copy_next;
}

child=head->child;
if (child == (children->children[j] | UNSET_MASK)) {     取消link
--k;
head->parent=-1;
++head;
}
else if ((child & ~UNSET_MASK) < children->children[j]) {
设置新的link
assert(child >= 0 );
children->children[k]=child;
head->parent=-1;
++head;
--j;
}
else {     复制原有的link关系
children->children[k]=children->children[j];
}
++j;
++k;
}
while(head<next) {     添加剩下的值
int child=head->child;
assert((child & UNSET_MASK) == 0);
children->children[k]=child;
++k;
head->parent=-1;
++head;
}
children->number=k;
continue;
copy_next:
if (k!=j) {
for (;j<children->number;j++,k++) {
children->children[k]=children->children[j];
}
children->number=k;
}
}

E.cache_dirty=false;
}

/* add an edge into (or delete from) graph */

static void
node_add(int parent,int child)
{
while (!cache_insert(parent,child)) {     当cache插入失败时继续插入
cache_flush();
}
}

/* free a node for reuse */
释放一个node并加入free node中

static __inline void
node_free(int id)
{
E.pool[id].mark=-1;
if (E.pool[id].u.n.children) {
E.pool[id].u.n.children->number=0;
}
E.pool[id].u.free=E.free;
E.free=id;
}

/* create a hash value for pointer p */

static __inline int
hash(void *p)
{
intptr_t t=(intptr_t)p;
return (int)((t>>2) ^ (t>>16));
}

/* expand hash map space */

static void
map_expand()
{
struct hash_node **table;
int sz,i;
if (E.map.size==0) {
sz=1024;
}
else {
sz=E.map.size*2;
}
table=(struct hash_node **)my_malloc(sz*sizeof(struct hash_node *));
memset(table,0,sz*sizeof(struct hash_node *));
重新hash
for (i=0;i<E.map.size;i++) {
struct hash_node *t=E.map.table[i];
while (t) {
struct hash_node *tmp=t;
void *p=E.pool[tmp->id].u.n.mem;
int new_hash=hash(p) & (sz-1);
t=t->next;

tmp->next=table[new_hash];
table[new_hash]=tmp;
}
}

my_free(E.map.table);
E.map.table=table;
E.map.size=sz;
}

/* map a existed pointer into a handle , if not exist, create a new one */
把一个物理地址映射成为pool中的句柄

static int
map_id(void *p)
{
int h=hash(p);
求在hashmap中的位置,确保其在size内

struct hash_node *node=E.map.table[h & (E.map.size -1)];
while (node) {
if (E.pool[node->id].u.n.mem==p) {
return node->id;
}
node=node->next;
}

if (E.map.number >= E.map.size) {
map_expand();
}

++E.map.number;

if (E.map.free) {
node=E.map.free;
E.map.free=node->next;
}
else {
node=(struct hash_node *)my_malloc(sizeof(*node));
}
node->id=node_alloc(p);
node->next=E.map.table[h & (E.map.size -1)];
E.map.table[h & (E.map.size -1)]=node;

return node->id;
}

/* Erase a handle from hash map */

static void
map_erase(int id)
{
void *p=E.pool[id].u.n.mem;
if (p) {
int h=hash(p);
struct hash_node **node= &E.map.table[h & (E.map.size -1)];
struct hash_node *find;
while ((*node)->id!=id) {     当hashcode冲突时查找转为线性查找链表
node=&(*node)->next;
assert(*node);
}
    将查找到的hash_node加入free链表以备重用

find=*node;
*node=find->next;
find->next=E.map.free;
E.map.free=find;
--E.map.number;
}
}

/* Expand stack space */
    __inline 函数内联,一些编译器才有,如gcc。在C99中可以直接使用inline

static __inline void
stack_expand()
{
只有当top为2^n – 1时才会进行扩展
if (((E.stack.top + 1) ^ E.stack.top) > E.stack.top) {
E.stack.data = (union stack_node *)my_realloc(E.stack.data,(E.stack.top*2+1) * sizeof(union stack_node));
分配2^(n+1) -1个stack node
}
}

/* Push a handle into the stack */

static void
stack_push(int handle)
{
stack_expand();
E.stack.data[E.stack.top++].handle=handle;
}

/* gc brackets open */

void
gc_enter()
{
stack_expand();
设置新的堆栈挂接点
E.stack.data[E.stack.top].number=E.stack.top-E.stack.current;
E.stack.current=E.stack.top++;
}

/* gc brackets close , extend some pointers' lifetime */
可变参数必须以0结尾

void
gc_leave(void *p,...)
{
void **head;
if (E.stack.current >= E.stack.bottom) {     直接取消该堆栈挂接点,因为此时还未执行stach_pack所以,挂靠在该堆栈挂接点上的所有内存都不会有任何link存在,可以被回收掉。

E.stack.top = E.stack.current;
E.stack.current -= E.stack.data[E.stack.current].number;
}
else {     leave前执行了stack_pack(包括gc_collect的间接调用)
int parent,child;
--E.stack.bottom;
parent=E.stack.data[E.stack.bottom-1].stack;
child=E.stack.data[E.stack.bottom].stack;
node_add(parent, child | UNSET_MASK);     取消该堆栈挂接点与上一级的link
node_free(child);
E.stack.current=E.stack.bottom-1;     恢复到上一级堆栈挂接点
E.stack.top = E.stack.current + 1;
}

head=&p;

while (*head) {     将程序显示要求的保留的内存压栈,以延长起生命周期
stack_push(map_id(*head));
++head;
}
}

/* pack the stack recursively */

static int
stack_pack_internal(int from,int to,int top)
{
if (to < from) {     将to…from内的句柄(内存)与这一级堆栈挂接点建立link关系
int parent = E.stack.data[to].stack;
while (from < top) {
node_add(parent,E.stack.data[from].handle);
++from;
}
return to+1;
}
else {
递归处理上一级堆栈挂点

int bottom=stack_pack_internal(from,to-E.stack.data[to].number,to);
将(to+1)…top内的内存与node建立link关系
int node=node_alloc(0);
++to;
while (to<top) {
node_add(node,E.stack.data[to].handle);
++to;
}
node_add(E.stack.data[bottom-1].stack,node);     挂靠node
E.stack.data[bottom].stack=node;
return bottom+1;
}
}

/* pack the value in the stack */

static void
stack_pack()
{
int bottom=stack_pack_internal(E.stack.bottom,E.stack.current,E.stack.top);
E.stack.top=E.stack.bottom=bottom;
E.stack.current=bottom-1;
}

/* link or unlink two pointer */
建立parent/now的link关系
取消parent/prev的link关系

void
gc_link(void *parent,void *prev,void *now)
{
int parent_id;
if (parent==0) {
parent_id=0;
}
else {
parent_id=map_id(parent);
}
if (prev) {
int prev_id=map_id(prev);
stack_push(prev_id);
node_add(parent_id,prev_id | UNSET_MASK);
}
if (now) {
node_add(parent_id,map_id(now));
}
}

/* init struct E */

void
gc_init()
{
int i;
int root;

E.pool=0;
E.size=0;
E.mark=1;
E.free=-1;
E.cache_dirty=false;
for (i=0;i<CACHE_SIZE;i++) {
E.cache[i].parent=-1;
}
E.stack.data=0;
E.stack.top=0;

root=node_alloc(0);
assert(root==0);
stack_expand();
E.stack.data[E.stack.top++].stack=root;

E.stack.bottom=E.stack.top;
E.stack.current=E.stack.bottom-1;

E.map.table=0;
E.map.size=0;
E.map.free=0;
E.map.number=0;

map_expand();
}

/* release all the resource used in gc */

void
gc_exit()
{
int i;
清空pool

for (i=0;i<E.size;i++) {
my_free(E.pool[i].u.n.children);
if (E.pool[i].mark >= 0) {
void *p=E.pool[i].u.n.mem;
if (E.pool[i].u.n.finalizer && E.pool[i].u.c.weak!=WEAK_CONTAINER) {
E.pool[i].u.n.finalizer(p);
}
if ((intptr_t)p != FREED_POINTER) {
my_free(p);
}
}
}
my_free(E.pool);
清空hashmap

for (i=0;i<E.map.size;i++) {
struct hash_node *p=E.map.table[i];
while (p) {
struct hash_node *n=p->next;
my_free(p);
p=n;
}
}
my_free(E.map.table);
while (E.map.free) {
struct hash_node *p=E.map.free->next;
my_free(E.map.free);
E.map.free=p;
}
my_free(E.stack.data);
E.map.number=0;
}

/* mark the nodes related to root */
WeakContainer里的所有weakReferencemark值设置为当前E.mark

static __inline void
gc_mark_weak(int weak)
{
if (E.pool[weak].mark < E.mark) {
E.pool[weak].mark=E.mark;
}
}

static void
gc_mark(int root)
{
if (E.pool[root].mark < E.mark+1) {
struct link *children=E.pool[root].u.n.children;
E.pool[root].mark=E.mark+1;
if (children) {
int i;
if (E.pool[root].u.c.weak==WEAK_CONTAINER) {
bool merge=false;
for (i=children->number-1;i>=0;i--) {
int child=children->children[i];
if ((intptr_t)E.pool[child].u.n.mem == FREED_POINTER) {
children->children[i]=0;
merge=true;
}
else
gc_mark_weak(child);
}

if (merge) {     合并weakContainer里的free node
int j;
for (i=0;i<children->number;i++) {
if (children->children[i]==0) {     找到第一个为0的child
break;
}
}

for (j=i,++i;i<children->number;i++) {
if (children->children[i]!=0) {     取消所有为0的child
children->children[j++]=children->children[i];
}
}

children->number=j;
}
}
else {
for (i=children->number-1;i>=0;i--) {
gc_mark(children->children[i]);
}
}
}
}
}

/* collect the memory block can no longer be otherwise accessed */

void
gc_collect()
{
int i;
stack_pack();
cache_flush();
stack_pack与cache_flush的调用顺序不能改变。还是原引云风的说法“gc 最重要的一点,就是要对堆栈上的数据进行关联。在收集发生时,堆栈上所有临时分配出来的内存块都不应该被释放掉。”

gc_mark(0);
for (i=0;i<E.size;i++) {
if (E.pool[i].mark < E.mark) {
if (E.pool[i].mark >= 0) {     无视free node和才分配出来的node(这两者的mark均为-1)
void *p=E.pool[i].u.n.mem;
if (E.pool[i].u.n.finalizer && E.pool[i].u.c.weak!=WEAK_CONTAINER) {
E.pool[i].u.n.finalizer(p);
}
if ((intptr_t)p != FREED_POINTER) {
my_free(p);
map_erase(i);
}
node_free(i);
}
}
else if (E.pool[i].mark == E.mark) {     weakContainer里所有的weakReference每次都会被回收,但仍然保留了weakContainer与weanReference的关联关系,由gc_mark/gc_weak_next去负责清除。因此后面才仅仅设置了一个FREED_POINTER,而没有进行node_free

void *p=E.pool[i].u.n.mem;
if (E.pool[i].u.n.finalizer && E.pool[i].u.c.weak!=WEAK_CONTAINER) {
E.pool[i].u.n.finalizer(p);
E.pool[i].u.n.finalizer=0;
}

my_free(p);
map_erase(i);
E.pool[i].u.c.mem=FREED_POINTER;
}
}
E.mark+=2;
}

/* only for debug, print all the relationship of all nodes */

void
gc_dryrun()
{
int i;
printf("------dry run begin----\n");
stack_pack();
cache_flush();
gc_mark(0);
for (i=0;i<E.size;i++) {
struct link *link=E.pool[i].u.n.children;

if (E.pool[i].mark >= E.mark+1) {
if (E.pool[i].u.c.weak == WEAK_CONTAINER) {
printf("%d[weak] -> ",i);
}
else {
printf("%d(%p) -> ",i,E.pool[i].u.n.mem);
}
}
else if (E.pool[i].mark == E.mark) {
printf("w %d: ",i);
}
else if (E.pool[i].mark >= 0 ) {
printf("x %d(%p): ",i,E.pool[i].u.n.mem);
}
else {
continue;
}

if (link) {
int j;
for (j=0;j<link->number;j++) {
printf("%d,",link->children[j]);
}
}

printf("\n");
}
E.mark++;
printf("------dry run end------\n");
}

/* allocate a memory block , and create a node map to it. node will link to parent */

void*
gc_malloc(size_t sz,void *parent,void (*finalizer)(void *))
{
void *ret=my_malloc(sz);
int id=map_id(ret);
E.pool[id].u.n.finalizer=finalizer;
if (parent) {
gc_link(parent,0,ret);
}
else {
stack_push(id);
}
return ret;
}

struct gc_weak_table {
int node_id;
};

/* allocate a weak pointer container */

struct gc_weak_table*
gc_weak_table(void *parent)
{
struct gc_weak_table *ret=my_malloc(sizeof(*ret));
ret->node_id=map_id(ret);
E.pool[ret->node_id].u.c.weak=WEAK_CONTAINER;
if (parent) {
gc_link(parent,0,ret);
}
else {
stack_push(ret->node_id);
}
return ret;
}

/* iterate the weak container */

void*
gc_weak_next(struct gc_weak_table *cont,int *iter)
{
int i,j;
struct link *children;
cache_flush();
children = E.pool[cont->node_id].u.n.children;
if (children==0) {
return 0;
}

for (i = (iter==0 ? 0 : *iter) ;i < children->number; i++) {
int id=children->children[i];
if (id) {
if (E.pool[id].u.c.mem == FREED_POINTER) {
children->children[i] = 0;
}
else {
if (iter) {
*iter=i+1;
}
stack_push(id);
return E.pool[id].u.n.mem;
}
}
}

for (i=0;i<children->number;i++) {
if (children->children[i]==0) {
break;
}
}

for (j=i,++i;i<children->number;i++) {
if (children->children[i]!=0) {
children->children[j++]=children->children[i];
}
}

children->number=j;

return 0;
}

/* clone a memory block , this will copy all the edges linked to orginal node */

void*
gc_clone(void *from,size_t sz)
{
int from_id=map_id(from);
void *to=my_malloc(sz);
int to_id=map_id(to);
struct link *from_children=E.pool[from_id].u.n.children;
stack_push(to_id);

cache_flush();
memcpy(to,from,sz);
E.pool[to_id].u.n.finalizer=E.pool[from_id].u.n.finalizer;
E.pool[to_id].u.n.children=link_expand(E.pool[to_id].u.n.children,from_children->number);
memcpy(E.pool[to_id].u.n.children,from_children,sizeof(*from_children) + (from_children->number-1)*sizeof(int));
return to;
}

/* realloc a memory block , all the edages linked to it will be retained */

void*
gc_realloc(void *p,size_t sz,void *parent)
{
void *ret;
assert(sz>0);

if (p==0) {
return gc_malloc(sz,parent,0);
}

ret=my_realloc(p,sz);
if (ret==p) {
return ret;
}
else {     当重新分配内存后,原内存块被realloc移动到了新的合适的位置,这就导致了必须重新为其分配句柄,并且将原有link关系复制过去
int new_id=map_id(ret);
int old_id=map_id(p);

struct link *tmp=E.pool[new_id].u.n.children;
E.pool[new_id].u.n.children=E.pool[old_id].u.n.children;
E.pool[old_id].u.n.children=tmp;

E.pool[new_id].u.n.finalizer=E.pool[old_id].u.n.finalizer;

if (parent) {
gc_link(parent,p,ret);
}
else {
stack_push(new_id);
}
map_erase(old_id);
E.pool[old_id].u.c.mem=FREED_POINTER;
}

return ret;
}
论坛首页 编程语言技术版

跳转论坛:
Global site tag (gtag.js) - Google Analytics