浏览 3383 次
锁定老帖子 主题:云风manualgc源码注解
精华帖 (0) :: 良好帖 (0) :: 新手帖 (0) :: 隐藏帖 (0)
|
|
---|---|
作者 | 正文 |
发表时间:2009-04-03
/* * Copyright (c) 2008 , * Cloud Wu . All rights reserved. * * http://www.codingnow.com * * Use, modification and distribution are subject to the "New BSD License" * as listed at <url: http://www.opensource.org/licenses/bsd-license.php >. */ #include "gc.h" #include <stdio.h> #include <stdlib.h> #include <string.h> #include <assert.h> #ifdef _MSC_VER #define bool int #define true 1 #define false 0 typedef int intptr_t; #else #include <stdbool.h> #include <stdint.h> #endif #define my_malloc malloc #define my_free free #define my_realloc realloc #define CACHE_BITS 10 struct link { int number; 该link的children数量 int children[1]; }; struct node { int mark; 标记值,用于垃圾回收 union { struct { void * mem; 该node所指向的内存块 struct link *children; 该node的link关系 void (*finalizer)(void *); } n; struct { intptr_t mem; struct link *children; intptr_t weak; } c; 在这里通过union把n与c描述的同一种事物解释成两种不同的东西 int free; 记录下个free node在pool中的句柄位置 } u; }; /* stack data +----------+ 0 | level 0 | ----> level 0 / root ( node pack ) 1 | level 1 | --+-> level 1 ( 1 node ref + node pack ) 2 | node ref | <-bottom --+ 3 | 2 (lv.2) | 4 | node ref | --+-> level 2 ( 3 node ref ) 5 | node ref | | 6 | node ref | --+ 7 | 4 (lv.3) | <-current 8 | node ref | --+-> level 3 ( 2 node ref ) 9 | node ref | --+ 10| nil | <-top 11| nil | +----------+ */ union stack_node { int stack; 堆栈挂接点所引用的pool中的句柄 int number; 距上一个堆栈调挂接点的偏移量 int handle; 句柄,pool中的句柄位置 }; struct stack { union stack_node *data; int top; int bottom; int current; }; top代表栈顶,data[top]指向nil或者无效值; bottom代表当前stack frame的栈底。data[bottom-1].stack指向当前stack frame的堆栈挂接点;而如果存在的话data[bottom-2].stack指向上一级堆栈挂接点,并且两者直接必然存在link;stack frame是由gc_enter创建的; 当current>bottom时,data[current].number代表当前堆栈挂接点据上一个堆栈挂接点的偏移量,而当current<bottom时,data[current].stack同data[bottom-1].stack。 struct hash_node { int id; pool中的句柄 struct hash_node *next; 表示下一个hash node,用于当hashcode冲突时,将具有相同hashcode的node形成一个链表,一旦hashcode冲突就执行线性查找。 }; struct hash_map { struct hash_node **table; int size; hashmap的大小 struct hash_node *free; 分配出来的hash_node经map_erase释放的后加入free链表以备重用 int number; 当前hashmap的使用数目 }; 在这个hashmap的实现中并没有显示使用装载因子,默认装载因子为1,即完全使用晚所有的hashnode后再扩展大小。 #define CACHE_SIZE (1<<CACHE_BITS) struct cache_node { int parent; int child; }; static struct { struct node *pool; int size; pool的大小 int free; 第一个free node的句柄 int mark; 当前gc的标识值 bool cache_dirty; struct stack stack; struct hash_map map; 加速访问pool,将无论地址映射成为pool中的句柄 struct cache_node cache[CACHE_SIZE]; 缓存link关系 } E; #define WEAK_CONTAINER -1 #define FREED_POINTER -1 #define CACHE_PARENT_BITS (CACHE_BITS/3) #define CACHE_CHILD_BITS (CACHE_BITS-CACHE_PARENT_BITS) #define CACHE_PARENT_MASK ((1<<CACHE_PARENT_BITS) -1 ) #define CACHE_CHILD_MASK ((1<<CACHE_CHILD_BITS) -1 ) #define UNSET_MASK 0x80000000 #define UNSET_MASK_BIT(a) ((unsigned)(a)>>31) /* Insert a parent/child handle pair into cache */ 添加或者删除一个缓存中的link关系。由于pool的句柄必是正整数,因此使用符号位来标识是执行删除操作。在后续中多次用到这个“符号位”。 static bool cache_insert(int parent,int child) { int hash = (parent & CACHE_PARENT_MASK) << CACHE_CHILD_BITS | (child & CACHE_CHILD_MASK) ; struct cache_node *cn = &E.cache[hash]; E.cache_dirty=true; if (cn->parent == -1) { 不存在则添加该link的缓存 cn->parent=parent; cn->child=child; return true; } else if (cn->parent == parent && (cn->child ^ child) == UNSET_MASK) { 删除已缓存的link cn->parent = -1; return true; } return false; 该cache已被使用 } /* create a new handle for pointer p */ 从pool中分配一个handle static int node_alloc(void *p) { struct node *ret; 指向E.free所表示的可重用的free node if (E.free==-1) { 与后面的E.pool[sz-1].u.free=-1相呼,应标识pool已用尽 int sz=E.size * 2; int i; if (sz==0) { sz=1024; } E.pool=(struct node *)my_realloc(E.pool,sz*sizeof(struct node)); ret=E.pool + E.size; ret->u.n.children=0; for (i=E.size+1;i<sz;i++) { E.pool[i].u.free=i+1; 下个free node的句柄位置 E.pool[i].mark=-1; E.pool[i].u.n.children=0; } E.pool[sz-1].u.free=-1; E.free=E.size+1; E.size=sz; } else { ret=E.pool + E.free; E.free = E.pool[E.free].u.free; 将下一个free node的句柄告诉给E.free } ret->u.n.mem=p; ret->mark=0; if (ret->u.n.children) { ret->u.n.children->number=0; } else { ret->u.n.children=0; } return ret-E.pool; } /* expand link table space . each node has a link table. */ 来来来,肉戏开始了。直接引用云风的原话 “利用二进制逻辑运算的性质,我们只需要把新的尺寸和老的尺寸异或,再和老的尺寸比较,就可以知道,新的尺寸的二进制最高是 1 的那一位的位置是否大于老的尺寸。然后分配新尺寸的两倍,空间是绰绰有余的。” static struct link * link_expand(struct link *old,int sz) { struct link *ret; if (old!=0) { sz+=old->number; if ((sz ^ old->number) <= old->number) { return old; } } sz=sz*2-1; ret=(struct link *)my_realloc(old,sizeof(struct link)+(sz-1)*sizeof(int)); if (old==0) { ret->number=0; } return ret; } /* cmp function for cache sort */ static int cache_node_cmp(const void *a,const void *b) { const struct cache_node *ca=(const struct cache_node *)a; const struct cache_node *cb=(const struct cache_node *)b; if (ca->parent != cb->parent) { return cb->parent - ca->parent; 按parent的降序排列 } if (ca->parent == -1 ) { return 0; } return (ca->child & ~ UNSET_MASK) - (cb->child & ~UNSET_MASK); 当parent相同时,按child的升序排列 } /* commit cache to the node graph * static void cache_flush() { int i; if (!E.cache_dirty) return; qsort(E.cache,CACHE_SIZE,sizeof(struct cache_node),cache_node_cmp); 快排 i=0; while (i<CACHE_SIZE) { int parent=E.cache[i].parent; struct cache_node *head; struct cache_node *next; struct node *node=&E.pool[parent]; struct link *children; int sz; int j,k; if (parent==-1) { return; } head=&E.cache[i]; next=head; sz=0; 寻找需要对link进行扩展的理论上的最大值,后面会更具情况减少 while (next->parent == parent && i<CACHE_SIZE) { sz += 1 - UNSET_MASK_BIT(next->child); ++next; ++i; } children=node->u.n.children; k=j=0; if (children) { while (j<children->number) { int child; if (head>=next) { goto copy_next; } child=head->child; children->children[k]=children->children[j]; if (child == (children->children[j] | UNSET_MASK)) { 取消link关系 --k; 与后面的k++对应,已达到k不增长 head->parent=-1; --sz; ++head; } else if ((child & ~UNSET_MASK) < children->children[j]) { break; } 需要添加link且小于当前child,维护有序children ++j; ++k; } } if (sz>0) { 扩展sz个位置 children=node->u.n.children=link_expand(node->u.n.children,sz); assert(children); memmove(children->children + j + sz, children->children +j , (children->number - j) * sizeof(int)); j+=sz; children->number+=sz; } while(j<children->number) { int child; if (head>=next) { cache里的值已经全部处理了 goto copy_next; } child=head->child; if (child == (children->children[j] | UNSET_MASK)) { 取消link --k; head->parent=-1; ++head; } else if ((child & ~UNSET_MASK) < children->children[j]) { 设置新的link assert(child >= 0 ); children->children[k]=child; head->parent=-1; ++head; --j; } else { 复制原有的link关系 children->children[k]=children->children[j]; } ++j; ++k; } while(head<next) { 添加剩下的值 int child=head->child; assert((child & UNSET_MASK) == 0); children->children[k]=child; ++k; head->parent=-1; ++head; } children->number=k; continue; copy_next: if (k!=j) { for (;j<children->number;j++,k++) { children->children[k]=children->children[j]; } children->number=k; } } E.cache_dirty=false; } /* add an edge into (or delete from) graph */ static void node_add(int parent,int child) { while (!cache_insert(parent,child)) { 当cache插入失败时继续插入 cache_flush(); } } /* free a node for reuse */ 释放一个node并加入free node中 static __inline void node_free(int id) { E.pool[id].mark=-1; if (E.pool[id].u.n.children) { E.pool[id].u.n.children->number=0; } E.pool[id].u.free=E.free; E.free=id; } /* create a hash value for pointer p */ static __inline int hash(void *p) { intptr_t t=(intptr_t)p; return (int)((t>>2) ^ (t>>16)); } /* expand hash map space */ static void map_expand() { struct hash_node **table; int sz,i; if (E.map.size==0) { sz=1024; } else { sz=E.map.size*2; } table=(struct hash_node **)my_malloc(sz*sizeof(struct hash_node *)); memset(table,0,sz*sizeof(struct hash_node *)); 重新hash for (i=0;i<E.map.size;i++) { struct hash_node *t=E.map.table[i]; while (t) { struct hash_node *tmp=t; void *p=E.pool[tmp->id].u.n.mem; int new_hash=hash(p) & (sz-1); t=t->next; tmp->next=table[new_hash]; table[new_hash]=tmp; } } my_free(E.map.table); E.map.table=table; E.map.size=sz; } /* map a existed pointer into a handle , if not exist, create a new one */ 把一个物理地址映射成为pool中的句柄 static int map_id(void *p) { int h=hash(p); 求在hashmap中的位置,确保其在size内 struct hash_node *node=E.map.table[h & (E.map.size -1)]; while (node) { if (E.pool[node->id].u.n.mem==p) { return node->id; } node=node->next; } if (E.map.number >= E.map.size) { map_expand(); } ++E.map.number; if (E.map.free) { node=E.map.free; E.map.free=node->next; } else { node=(struct hash_node *)my_malloc(sizeof(*node)); } node->id=node_alloc(p); node->next=E.map.table[h & (E.map.size -1)]; E.map.table[h & (E.map.size -1)]=node; return node->id; } /* Erase a handle from hash map */ static void map_erase(int id) { void *p=E.pool[id].u.n.mem; if (p) { int h=hash(p); struct hash_node **node= &E.map.table[h & (E.map.size -1)]; struct hash_node *find; while ((*node)->id!=id) { 当hashcode冲突时查找转为线性查找链表 node=&(*node)->next; assert(*node); } 将查找到的hash_node加入free链表以备重用 find=*node; *node=find->next; find->next=E.map.free; E.map.free=find; --E.map.number; } } /* Expand stack space */ __inline 函数内联,一些编译器才有,如gcc。在C99中可以直接使用inline static __inline void stack_expand() { 只有当top为2^n – 1时才会进行扩展 if (((E.stack.top + 1) ^ E.stack.top) > E.stack.top) { E.stack.data = (union stack_node *)my_realloc(E.stack.data,(E.stack.top*2+1) * sizeof(union stack_node)); 分配2^(n+1) -1个stack node } } /* Push a handle into the stack */ static void stack_push(int handle) { stack_expand(); E.stack.data[E.stack.top++].handle=handle; } /* gc brackets open */ void gc_enter() { stack_expand(); 设置新的堆栈挂接点 E.stack.data[E.stack.top].number=E.stack.top-E.stack.current; E.stack.current=E.stack.top++; } /* gc brackets close , extend some pointers' lifetime */ 可变参数必须以0结尾 void gc_leave(void *p,...) { void **head; if (E.stack.current >= E.stack.bottom) { 直接取消该堆栈挂接点,因为此时还未执行stach_pack所以,挂靠在该堆栈挂接点上的所有内存都不会有任何link存在,可以被回收掉。 E.stack.top = E.stack.current; E.stack.current -= E.stack.data[E.stack.current].number; } else { leave前执行了stack_pack(包括gc_collect的间接调用) int parent,child; --E.stack.bottom; parent=E.stack.data[E.stack.bottom-1].stack; child=E.stack.data[E.stack.bottom].stack; node_add(parent, child | UNSET_MASK); 取消该堆栈挂接点与上一级的link node_free(child); E.stack.current=E.stack.bottom-1; 恢复到上一级堆栈挂接点 E.stack.top = E.stack.current + 1; } head=&p; while (*head) { 将程序显示要求的保留的内存压栈,以延长起生命周期 stack_push(map_id(*head)); ++head; } } /* pack the stack recursively */ static int stack_pack_internal(int from,int to,int top) { if (to < from) { 将to…from内的句柄(内存)与这一级堆栈挂接点建立link关系 int parent = E.stack.data[to].stack; while (from < top) { node_add(parent,E.stack.data[from].handle); ++from; } return to+1; } else { 递归处理上一级堆栈挂点 int bottom=stack_pack_internal(from,to-E.stack.data[to].number,to); 将(to+1)…top内的内存与node建立link关系 int node=node_alloc(0); ++to; while (to<top) { node_add(node,E.stack.data[to].handle); ++to; } node_add(E.stack.data[bottom-1].stack,node); 挂靠node E.stack.data[bottom].stack=node; return bottom+1; } } /* pack the value in the stack */ static void stack_pack() { int bottom=stack_pack_internal(E.stack.bottom,E.stack.current,E.stack.top); E.stack.top=E.stack.bottom=bottom; E.stack.current=bottom-1; } /* link or unlink two pointer */ 建立parent/now的link关系 取消parent/prev的link关系 void gc_link(void *parent,void *prev,void *now) { int parent_id; if (parent==0) { parent_id=0; } else { parent_id=map_id(parent); } if (prev) { int prev_id=map_id(prev); stack_push(prev_id); node_add(parent_id,prev_id | UNSET_MASK); } if (now) { node_add(parent_id,map_id(now)); } } /* init struct E */ void gc_init() { int i; int root; E.pool=0; E.size=0; E.mark=1; E.free=-1; E.cache_dirty=false; for (i=0;i<CACHE_SIZE;i++) { E.cache[i].parent=-1; } E.stack.data=0; E.stack.top=0; root=node_alloc(0); assert(root==0); stack_expand(); E.stack.data[E.stack.top++].stack=root; E.stack.bottom=E.stack.top; E.stack.current=E.stack.bottom-1; E.map.table=0; E.map.size=0; E.map.free=0; E.map.number=0; map_expand(); } /* release all the resource used in gc */ void gc_exit() { int i; 清空pool for (i=0;i<E.size;i++) { my_free(E.pool[i].u.n.children); if (E.pool[i].mark >= 0) { void *p=E.pool[i].u.n.mem; if (E.pool[i].u.n.finalizer && E.pool[i].u.c.weak!=WEAK_CONTAINER) { E.pool[i].u.n.finalizer(p); } if ((intptr_t)p != FREED_POINTER) { my_free(p); } } } my_free(E.pool); 清空hashmap for (i=0;i<E.map.size;i++) { struct hash_node *p=E.map.table[i]; while (p) { struct hash_node *n=p->next; my_free(p); p=n; } } my_free(E.map.table); while (E.map.free) { struct hash_node *p=E.map.free->next; my_free(E.map.free); E.map.free=p; } my_free(E.stack.data); E.map.number=0; } /* mark the nodes related to root */ WeakContainer里的所有weakReferencemark值设置为当前E.mark static __inline void gc_mark_weak(int weak) { if (E.pool[weak].mark < E.mark) { E.pool[weak].mark=E.mark; } } static void gc_mark(int root) { if (E.pool[root].mark < E.mark+1) { struct link *children=E.pool[root].u.n.children; E.pool[root].mark=E.mark+1; if (children) { int i; if (E.pool[root].u.c.weak==WEAK_CONTAINER) { bool merge=false; for (i=children->number-1;i>=0;i--) { int child=children->children[i]; if ((intptr_t)E.pool[child].u.n.mem == FREED_POINTER) { children->children[i]=0; merge=true; } else gc_mark_weak(child); } if (merge) { 合并weakContainer里的free node int j; for (i=0;i<children->number;i++) { if (children->children[i]==0) { 找到第一个为0的child break; } } for (j=i,++i;i<children->number;i++) { if (children->children[i]!=0) { 取消所有为0的child children->children[j++]=children->children[i]; } } children->number=j; } } else { for (i=children->number-1;i>=0;i--) { gc_mark(children->children[i]); } } } } } /* collect the memory block can no longer be otherwise accessed */ void gc_collect() { int i; stack_pack(); cache_flush(); stack_pack与cache_flush的调用顺序不能改变。还是原引云风的说法“gc 最重要的一点,就是要对堆栈上的数据进行关联。在收集发生时,堆栈上所有临时分配出来的内存块都不应该被释放掉。” gc_mark(0); for (i=0;i<E.size;i++) { if (E.pool[i].mark < E.mark) { if (E.pool[i].mark >= 0) { 无视free node和才分配出来的node(这两者的mark均为-1) void *p=E.pool[i].u.n.mem; if (E.pool[i].u.n.finalizer && E.pool[i].u.c.weak!=WEAK_CONTAINER) { E.pool[i].u.n.finalizer(p); } if ((intptr_t)p != FREED_POINTER) { my_free(p); map_erase(i); } node_free(i); } } else if (E.pool[i].mark == E.mark) { weakContainer里所有的weakReference每次都会被回收,但仍然保留了weakContainer与weanReference的关联关系,由gc_mark/gc_weak_next去负责清除。因此后面才仅仅设置了一个FREED_POINTER,而没有进行node_free void *p=E.pool[i].u.n.mem; if (E.pool[i].u.n.finalizer && E.pool[i].u.c.weak!=WEAK_CONTAINER) { E.pool[i].u.n.finalizer(p); E.pool[i].u.n.finalizer=0; } my_free(p); map_erase(i); E.pool[i].u.c.mem=FREED_POINTER; } } E.mark+=2; } /* only for debug, print all the relationship of all nodes */ void gc_dryrun() { int i; printf("------dry run begin----\n"); stack_pack(); cache_flush(); gc_mark(0); for (i=0;i<E.size;i++) { struct link *link=E.pool[i].u.n.children; if (E.pool[i].mark >= E.mark+1) { if (E.pool[i].u.c.weak == WEAK_CONTAINER) { printf("%d[weak] -> ",i); } else { printf("%d(%p) -> ",i,E.pool[i].u.n.mem); } } else if (E.pool[i].mark == E.mark) { printf("w %d: ",i); } else if (E.pool[i].mark >= 0 ) { printf("x %d(%p): ",i,E.pool[i].u.n.mem); } else { continue; } if (link) { int j; for (j=0;j<link->number;j++) { printf("%d,",link->children[j]); } } printf("\n"); } E.mark++; printf("------dry run end------\n"); } /* allocate a memory block , and create a node map to it. node will link to parent */ void* gc_malloc(size_t sz,void *parent,void (*finalizer)(void *)) { void *ret=my_malloc(sz); int id=map_id(ret); E.pool[id].u.n.finalizer=finalizer; if (parent) { gc_link(parent,0,ret); } else { stack_push(id); } return ret; } struct gc_weak_table { int node_id; }; /* allocate a weak pointer container */ struct gc_weak_table* gc_weak_table(void *parent) { struct gc_weak_table *ret=my_malloc(sizeof(*ret)); ret->node_id=map_id(ret); E.pool[ret->node_id].u.c.weak=WEAK_CONTAINER; if (parent) { gc_link(parent,0,ret); } else { stack_push(ret->node_id); } return ret; } /* iterate the weak container */ void* gc_weak_next(struct gc_weak_table *cont,int *iter) { int i,j; struct link *children; cache_flush(); children = E.pool[cont->node_id].u.n.children; if (children==0) { return 0; } for (i = (iter==0 ? 0 : *iter) ;i < children->number; i++) { int id=children->children[i]; if (id) { if (E.pool[id].u.c.mem == FREED_POINTER) { children->children[i] = 0; } else { if (iter) { *iter=i+1; } stack_push(id); return E.pool[id].u.n.mem; } } } for (i=0;i<children->number;i++) { if (children->children[i]==0) { break; } } for (j=i,++i;i<children->number;i++) { if (children->children[i]!=0) { children->children[j++]=children->children[i]; } } children->number=j; return 0; } /* clone a memory block , this will copy all the edges linked to orginal node */ void* gc_clone(void *from,size_t sz) { int from_id=map_id(from); void *to=my_malloc(sz); int to_id=map_id(to); struct link *from_children=E.pool[from_id].u.n.children; stack_push(to_id); cache_flush(); memcpy(to,from,sz); E.pool[to_id].u.n.finalizer=E.pool[from_id].u.n.finalizer; E.pool[to_id].u.n.children=link_expand(E.pool[to_id].u.n.children,from_children->number); memcpy(E.pool[to_id].u.n.children,from_children,sizeof(*from_children) + (from_children->number-1)*sizeof(int)); return to; } /* realloc a memory block , all the edages linked to it will be retained */ void* gc_realloc(void *p,size_t sz,void *parent) { void *ret; assert(sz>0); if (p==0) { return gc_malloc(sz,parent,0); } ret=my_realloc(p,sz); if (ret==p) { return ret; } else { 当重新分配内存后,原内存块被realloc移动到了新的合适的位置,这就导致了必须重新为其分配句柄,并且将原有link关系复制过去 int new_id=map_id(ret); int old_id=map_id(p); struct link *tmp=E.pool[new_id].u.n.children; E.pool[new_id].u.n.children=E.pool[old_id].u.n.children; E.pool[old_id].u.n.children=tmp; E.pool[new_id].u.n.finalizer=E.pool[old_id].u.n.finalizer; if (parent) { gc_link(parent,p,ret); } else { stack_push(new_id); } map_erase(old_id); E.pool[old_id].u.c.mem=FREED_POINTER; } return ret; } 声明:ITeye文章版权属于作者,受法律保护。没有作者书面许可不得转载。
推荐链接
|
|
返回顶楼 | |