reference: Python 源码剖析
Python_memory_management
内存管理架构
小块空间的内存池
循环引用的垃圾收集
Python中的垃圾收集
1.Python的内存管理机制 a.内存管理架构
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 [pymem.h] PyAPI_FUNC(void *) PyMem_Malloc(size_t ); PyAPI_FUNC(void *) PyMem_Realloc(void *, size_t ); PyAPI_FUNC(void ) PyMem_Free(void *); [object.h] void * PyMem_Malloc (size_t nbytes) { return PyMem_MALLOC(nbytes); } void * PyMem_Realloc (void *p, size_t nbytes) { return PyMem_REALLOC(p,nbytes); } void PyMem_Free (void * p) { PyMem_FREE(p); } [pymem.h] #define PyMem_MALLOC(n) malloc((n) ? (n) : 1) #define PyMem_REALLOC(n) realloc((p), (n) ? (n) : 1) #define PyMem_FREE free
1 2 3 4 5 6 7 8 9 10 11 12 13 14 [pymem.h] #define PyMem_New(type,n) ( (type *)PyMem_Malloc((n) * sizeof (type)) ) #define PyMem_NEW(type,n) ( (type *)PyMem_MALLOC((n) * sizeof (type)) ) #define PyMem_Resize(p,type,n) ( (p) = (type *)PyMem_Realloc((p), (n) * sizeof (type)) ) #define PyMem_RESIZE(p,type,n) ( (p) = (type *)PyMem_REALLOC((p), (n) * sizeof (type)) ) #define PyMem_Del PyMem_Free #define PyMem_DEL PyMem_FREE
第二层构建了PyObject_位前缀的函数族,Pymalloc机制
b.小块空间的内存池 PyObject_Malloc、PyObject_Realloc、PyObject_Free
Block(block 只是一个概念上的东西, 实际不与python某个源码中的东西对应)
1 2 3 4 5 6 7 8 [obmalloc.c] #define ALIGNMENT 8 #define ALIGNMENT_SHIFT 3 #define ALIGNMENT_MASK (ALIGNMENT - 1) [obmalloc.c] #define SMALL_REQUEST_THRESHOLD 256 #define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
1 2 3 4 5 6 [obmalloc.c] #define INDEX2SIZE(I) (((uint)(I)+ 1) << ALIGNMENT_SHIFT) size = (uint )(nbytes - 1 ) >> ALIGNMENT_SHIFT;
1 2 3 4 5 6 [obmalloc.c] #define SYSTEM_PAGE_SIZE (4 * 1024) #define SYSTEM_PAGE_SIZE_MASK (SYSTEM_PAGE_SIZE - 1) #define POOL_SIZE SYSTEM_PAGE_SIZE #define POOL_SIZE_MASK SYSTEM_PAGE_SIZE_MASK
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 [obmalloc.c] typedef uchar block;struct pool_header { union { block* _padding; uint count; } ref; block* freeblock; struct pool_header * nextpool ; struct pool_header * prevpool ; uint arenaindex; uint szidx; uint nextoffset; uint maxnextoffset; };
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 [obmalloc.c]-[convert 4 k raw memory to pool] #define ROUNDUP(x) (((x) + ALIGNMENT_MASK) & ~ALIGNMENT_MASK) #define POOL_OVERHEAD ROUNDUP(sizeof(struct pool_header)) #define struct pool_header* poolp #define uchar block poolp pool; block* bp; ... pool->ref.count = 1 pool->szidx = size; size = INDEX2SIZE(size); bp = (block *)pool + POOL_OVERHEAD pool->nextoffset = POOL_OVERHEAD + (size << 1 ); pool->maxnextoffset = POOL_SIZE - size; pool->freeblock = bp + size; *(block **)(pool->freeblock) = NULL ; return (void *)bp;
1 2 3 4 5 6 7 8 9 10 11 12 13 14 [obmalloc.c]-[allocate block] if (pool != pool->nextpool){ ++pool->ref.count; bp = pool->freeblock; ... if (pool->nextoffset <= pool->maxnextoffset){ pool->freeblock = (block *)pool + pool->nextoffset; pool->nextoffset += INDEX2SIZE(size); *(block **)(pool->freeblock) = NULL ; return (void *)bp; } }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 [obmalloc.c] #define POOL_ADDR(P) ((poolp)((uptr)(P) & ~(uptr)POOL_SIZE_MASK)) void PyObject_Free (void * p) { poolp pool; block* lastfree; poolp next,prev; uint size; pool = POOL_ADDR(p); if (Py_ADDRESS_IN_RANGE(p,pool)){ *(block **)p = lastfree = pool->freeblock; pool->freeblock = (block *)p; ... } }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 [obmalloc.c]-[allocate block] if (pool != pool->nextpool){ ++pool->ref.count; bp = pool->freeblock; if ((pool->freeblock = *(block **)bp) != NULL ){ return (void *bp); } if (pool->nextoffset <= pool->maxnextoffset){ .... } ... }
多个pool聚合的结果就是一个arena,一个arena256KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 [obmalloc.c] #define ARENA_SIZE (256 << 10) [obmalloc.c] typedef uchar block;struct arena_object { uptr address; block* pool_address; uint nfreepools; uint ntotalpools; struct pool_header * freepools ; struct arena_object * nextarena ; struct arena_object * prevarena ; }
pool_header与arena_object对比:当pool_header被申请时,它所管理的pool集合的内存一定也被申请;但是当arena_object被申请时,它所管理的pool集合的内存则没有被申请
arena的两种状态: 当一个arena的arena_object没有与pool集合建立联系时,这时的arena处于“未使用”状态,一旦建立了联系,这时arena就转换到了“可用”状态。
”未使用“的arena的链表表头是unused_arena_objects,通过nextarena链接,是一个单项链表
”可用“的arena的链表表头是usable_arenas,通过nextarena与prevarena链接,是一样个双向链表
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 [obmalloc.c] static struct arena_object * arenas = NULL ;static uint maxarena = 0 ;static struct arena_object * unused_arena_objects = NULL ;static struct arena_object * usable_arenas = NULL ;#define INITIAL_ARENA_OBJECTS 16; static struct arena_object* new_arena (void ) { struct arena_object * arenaobj ; uint excess; if (unused_arena_objects == NULL ){ uint i; uint numarenas; size_t nbytes; numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS; if (numarenas <= maxarenas) return NULL ; nbytes = numarenas * sizeof (*arenas); if (nbytes/sizeof (*arenas) != numarenas) return NULL ; arenaobj = (struct arena_object *)realloc (arenas,nbytes); if (arenaobj == NULL ) return NULL ; arenas = arenaobj; for (i = maxarenas; i < numarenas; ++i){ arenas[i].address = 0 ; arenas[i].nextarena = i < numarenas -1 ? &arenas[i+1 ] : NULL ; } unused_arena_objects = &arenas[maxarenas]; maxarenas = numarenas; } arenaobj = unused_arena_objects; unused_arena_objects = arenaobj->nextarena; assert(arenaobj->address == 0 ); arenaobj->address = (uptr)malloc (ARENA_SIZE); ++narenas_currently_allocated; arenaobj->freepools = NULL ; arenaobj->pool_address = (block *)arenaobj->address; arenaobj->nfreepools = ARENA_SIZE/POOL_SIZE; excess = (uint)(arenaobj->address & POOL_SIZE_MASK); if (excess != 0 ){ --arenaobj->nfreepools; arenaobj->pool_address += POOL_SIZE - excess; } arenaobj->ntotalpools = arenaobj->nfreepools; return arenaobj; }
当Python在WITH_MEMORY_LIMITS编译符号打开的情况下进行编译时,Python的另一个符号会被激活,这个名为SMALL_MEMORY_LIMIT的符号限制了整个内存池的大小,同时也就限制了可以创建的arena的个数
1 2 3 4 5 6 7 8 9 10 11 [obmalloc.c] #ifdef WITH_MEMORY_LIMITS #ifndef SMALL_MEMORY_LIMIT #define SMALL_MEMORY_LIMIT (64 * 1024 * 1024) #endif #endif #ifdef WITH_MEMORY_LIMITS #define MAX_ARENAS (SMALL_MEMORY_LIMIT/ARENA_SIZE) #endif
Python申请内存时,最基本的操作单元并不是arena,而是pool
pool的三种状态:used,full,empty
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 [obmalloc.c] typedef uchar block;#define PTA(x) ((poolp)((uchar* )&usedpools[2 * (x)]) - 2 * sizeof(block *))) #define PT(x) PTA(x), PTA(x) static poolp usedpools[2 * (NB_SMALL_SIZE_CLASSES + 7 )/8 * 8 ] = { PT(0 ),PT(1 ),PT(2 ),PT(3 ),PT(4 ),PT(5 ),PT(6 ),PT(7 ) #if NB_SMALL_SIZE_CLASSES > 8 , PT(8 ), PT(9 ), PT(10 ), PT(11 ), PT(12 ), PT(13 ), PT(14 ), PT(15 ) ... #endif } [obmalloc.c] #define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 [obmalloc.c] void * PyObject_Malloc (size_t nbytes) { block* bp; poolp pool; poolp next; uint size; if ((nbytes - 1 ) < SMALL_REQUEST_THRESHOLD){ LOCK(); size = (uint )(nbytes - 1 ) >> ALIGNMENT_SHIFT pool = usedpools[size + size]; if (pool != pool->nextpool){ .... } ... } }
!!!!!!arena没有size class的属性,而pool才有
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 [obmalloc.c] void * PyObject_Malloc (size_t nbytes) { block* bp; poolp pool; poolp next; uint size; if ((nbytes - 1 ) < SMALL_REQUEST_THRESHOLD){ LOCK(); size = (uint)(nbytes - 1 ) >> ALIGNMENT_SHIFT; pool = usedpools[size + size]; if (pool != pool->nextpool){ ... } if (usable_arenas == NULL ){ usable_arenas = new_arena(); usable_arenas->nextarena = usable_arenas->prevarena = NULL ; } pool = usable_arenas->freepools; if (pool != NULL ){ usable_arenas->freepools = pool->nextpool; --usable_arenas->nfreepools; if (usable_arenas->nfreepools == 0 ){ usable_arenas = usable_arenas->nextarena; if (usable_arenas != NULL ){ usable_arenas->prevarena = NULL ; } } init pool: ... } } }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 [obmalloc.c] #define ROUNDUP(x) (((x) + ALIGNMENT_MASK) & ~ALIGNMENT_MASK) #define POOL_OVERHEAD ROUNDUP(sizeof(struct pool_header)); void *PyObject_Malloc (size_t nbytes) { ... init_pool: next = usedpools[size + size]; pool->nextpool = next; pool->prevpool = next; next->nextpool = pool; next->prevpool = pool; pool->ref.count = 1 ; if (pool->szidx == size){ bp = pool->freeblock; pool->freeblock = *(block **)bp; UNLOCK(); return (void *)bp; } pool->szidx = size; size = INDEX2SIZE(size); bp = (block *)pool + POOL_OVERHEAD; pool->nextoffset = POOL_OVERHEAD + (size << 1 ); pool->maxnextoffset = POOL_SIZE - size; pool->freeblock = bp + size; *(block **)(pool->freeblock) = NULL ; UNLOCK(); return (void *)bp; ... }
empty状态转换到used状态
1 2 3 4 5 6 7 8 [obmalloc.c] ... pool = usable_arenas->freepools; if (pool != NULL ){ usable_arenas->freepools = pool->nextpool; ... [init_pool] }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 [obmalloc.c] #define DUMMY_SIZE_IDX 0xffff void * PyObject_Malloc (size_t nbytes) { block* bp; poolp pool; poolp next; uint size; ... pool = (poolp)usable_arenas->pool_address; pool->arenaindex = usable_arenas - arenas; pool->szidx = DUMMY_SIZE_IDX; usable_arenas->pool_address += POOL_SIZE; --usable_arenas->nfreepools; if (usable_arenas->nfreepools == 0 ){ usable_arenas = usable_arenas->nextarena; if (usable_arenas != NULL ){ usable_arenas->prevarena = NULL ; } } goto init_pool: ... [obmalloc.c] int Py_ADDRESS_IN_RANGE (void * P, poolp pool) { return pool->arenaindex < maxarenas && (uptr)P - arenas[pool->arenaindex].address < (uptr)ARENA_SIZE && arenas[pool->arenaindex].address != 0 ; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 [obmalloc.c] void * PyObject_Malloc (size_t nbytes) { block* bp; poolp pool; poolp next; uint size; if ((nbytes - 1 ) < SMALL_REQUEST_THRESHOLD){ size = (uint)(nbytes - 1 ) >> ALIGNMENT_SHIFT; pool = usedpools[size + size]; if (pool != pool->next){ ... next = pool->nextpool; pool = pool->prevpool; next->prevpool = pool; pool->nextpool = next; return (void *)bp; } if (usable_arenas == NULL ){ usable_arenas = new_arena(); usable_arenas->nextarena = usable_arenas->prevarena = NULL ; } pool = usable_arenas->freepools; if (pool != NULL ){ init_pool: ...... } ... goto init_pool; } redirect: if (nbytes == 0 ) nbytes = 1 ; return (void *)malloc (nbytes); }
释放一个block后,pool的状态有两种转变情况: 1.used状态转变为empty状态 2.full状态转变为used状态 3.used仍然处于used状态
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 [obmalloc.c] void PyObject_Free (void *p) { poolp pool; block* lastfree; poolp next, prev; uint size; pool = POOL_ADDR(p); if (Py_ADDRESS_IN_RANGE(p,pool)){ *(block **)p = lastfree = pool->freeblock; pool->freeblock = (block *)p; if (lastfree){ if (--pool->ref.count != 0 ){ return ; } ... } ... } free (p); }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 [obmalloc.c] void PyObject_Free (void *p) { poolp pool; block *lastfree; poolp next,prev; uint size; pool = POOL_ADDR(p); if (Py_ADDRESS_IN_RANGE(p,pool)){ ...... --pool->ref.count; size = pool->szidx; next = usedpools[size + size]; prev = next->prevpool; pool->nextpool = next; pool->prevpool = prev; next->prevpool = pool; prev->nextpool = pool; return ; } ... }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 [obmalloc.c] void PyObject_Free (void *p) { poolp pool; block* lastfree; poolp next, prev; uint size; pool = POOL_ADDR(p); if (Py_ADDRESS_IN_RANGE(p,pool)){ *(block **)p = lastfree = pool->freeblock; pool->freeblock = (block *)p; if (lastfree){ struct arena_object * ao ; uint nf; if (--pool->ref.count != 0 ){ return ; } ao = &arenas[pool->arenaindex]; pool->nextpool = ao->freepools; ao->freepools = pool; nf = ++ao->nfreepools; ... } ... } }
对arena的处理分为了4种情况
如果arena中所有pool都是empty的,释放pool集合占用的内存
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 [obmalloc.c] void PyObject_Free (void *p) { poolp pool; block* lastfree; poolp next,prev; uint size; pool = POOL_ADDR(p); struct arena_object * ao ; uint nf; ... ao = &arenas[pool->arenaindex]; pool->nextpool = ao->freepools; ao->freepools = pool; nf = ++ao->nfreepools; if (nf == ao->ntotalpools ){ if (ao->prevarena == NULL ){ usable_arenas = ao->nextarena; }else { ao->prevarena->nextarena = ao->nextarena; } if (ao->nextarena != NULL ){ ao->nextarena->prevarena = ao->prevarena; } ao->nextarena = unused_arena_objects; unused_arena_objects = ao; free ((void *)ao->address); ao->address = 0 ; --narenas_currently_allocated; } .... }
c.循环引用的垃圾收集
python大量使用的面向特定对象的对象内存池机制正是为了竭力弥补引用计数机制的软肋
python中引入了主流垃圾收集技术中的标记-清除 和 分代收集 来填补其内存管理机制中最后也是最致命的漏洞
垃圾收集机制的两个阶段:垃圾检测和垃圾回收
标记-清除方法的两个阶段
在垃圾收集动作被激活之前,系统中所分配的所有对象和对象之间的引用组成了一张有向图,其中对象是图中的节点,对象间的引用是图的边
d.Python中的垃圾收集 解决循环引用只需要去检查container对象(list,dict),而不用去关心PyIntObject…
python采用了一个双向链表,所有的container对象在创建之后,都会被插入到这个链表中
1 2 3 4 5 6 7 8 9 10 [objimpl.h] typedef union _gc_head{ struct { union _gc_head* gc_next; union _gc_head* gc_prev; int gc_refs; } gc; long double dummy; } PyGC_Head;
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 [gcmodule.c] PyObject* _PyObject_GC_New(PyTypeObject *tp){ PyObject* op = _PyObject_GC_Malloc(_PyObject_SIZE(tp)); if (op != NULL ) op = PyObject_INIT(op,tp); return op; } #define _PyGC_REFS_UNTRACKED (-2) #define GC_UNTRACKED _PyGC_REFS_UNTRACKED PyObject* _PyObject_GC_Malloc(size_t basicsize){ PyObject* op; PyGC_Head* g = PyObject_MALLOC(sizeof (PyGC_Head) + basicsize); g->gc.gc_refs = GC_UNTRACKED; generations[0 ].count++; if (generations[0 ].count > generations[0 ].threshold && enabled && generations[0 ].threshold && !collecting && !PyErr_Occured()){ collecting = 1 ; collect_generations(); collecting = 0 ; } op = FROM_GC(g); return op; }
1 2 3 4 5 6 7 8 9 [gcmodule.c] #define AS_GC(o) ((PyGC_HEAD *)(o) -1) #define FROM_GC(g) ((PyObject *)(((PyGC_HEAD *)g) + 1)) [objimpl.h] #define _Py_AS_GC(o) ((PyGC_HEAD *)(o) - 1)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 [dictobject.c] PyObject* PyDict_New (void ) { register dictobject *mp; ... mp = PyObject_GC_New(dictobject,&PyDict_Type); ... _PyObject_GC_TRACK(mp); return (PyObject *)mp; } [objimpl.h] #define _PyObject_GC_TRACK(o) do{ PyGC_Head* g = _Py_AS_GC(o;) if (g->gc.gc_refs != _PyGC_REFS_UNTRACKED) Py_FataError("GC object already tracked" ); g->gc.gc_refs = _PyGC_REFS_REACHABLE; g->gc.gc_next = _PyGC_generation0; g->gc.gc_prev = _PyGC_generation0->gc.gc_prev; g->gc.gc_prev->gc.gc_next = g; _PyGC_generation0->gc.gc_prev = g; }while (0 ); [objimpl.h] #define _PyObject_GC_UNTRACK(o) do{ PyGC_Head* g = _Py_AS_GC(o;) assert(g->gc.gc_refs != _PyGC_REFS_UNTRACKED); g->gc.gc_refs = _PyGC_REFS_UNTRACKED; g->gc.gc_prev->gc.gc_next = g->gc.gc_next; g->gc.gc_next->gc.gc_prev = g->gc.gc_prev; g->gc.gc_next = NULL ; }while (0 );
规律:一定比例的内存块的生存周期都比较短,通常是几百万条机器指令的时间,而剩下的内存块,其生存周期会比较长,甚至会从程序开始一直持续到程序结束
核心:空间换时间的分代技术, java的老底
手法:将系统中的所有内存块根据其存活时间划分为不同的集合,每一个集合就称为一个“代”,垃圾收集的频率随着”代“的存活时间的增大而减小,如果一个对象经过的垃圾收集次数越多,其存活时间就越长
python一共有三代,_PyGC_generation0是python内部维护的一个指针,指向的是Python中第0代的内存块集合
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 [gcmodule.c] struct gc_generation { PyGC_Head head; int threshold; int count; }; [gcmodule.c] #define NUM_GENERATIONS 3 #define GEN_HEAD(n) (&generations[n].head) static struct gc_generation generations [NUM_GENERATIONS ] = { {{{GEN_HEAD(0 ),GEN_HEAD(0 ),0 }},700 ,0 }, {{{GEN_HEAD(1 ),GEN_HEAD(1 ),0 }},10 , 0 }, {{{GEN_HEAD(2 ),GEN_HEAD(2 ),0 }},10 , 0 }, }; PyGC_Head *_PyGC_generation0 = GEN_HEAD(0 );
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 [gcmodule.c] static Py_ssize_t collect_generations (void ) { int i; Py_ssize_t n = 0 ; for (i = NUM_GENERATIONS - 1 ; i >= 0 ; i--){ if (generations[i].count > generations[i].threshold){ n = collect(i); break ; } } return n; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 \\将比其“年轻”的所有代的内存链表整个链接到第1 代内存链表之后 [gcmodule.c] static void gc_list_init (PyGC_Head* list ) { list ->gc.gc_prev = list ; list ->gc.gc_next = list ; } static void gc_list_merge (PyGC_Head* from, PyGC_Head* to) { PyGC_Head* tail; if (!gc_list_is_empty(from)){ tail = to->gc.gc_prev; tail->gc.gc_next = from->gc.gc_next; tail->gc.gc_next->gc.gc_prev = tail; to->gc.gc_prev = from->gc.gc_prev; to->gc.gc_prev->gc.gc_next = to; } gc_list_init(from); }
root object是不能被删除的对象,有可收集对象链表外部的某个引用在引用这个对象
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 [gcmodule.c] static void update_refs (PyGC_Head* containers) { PyGC_Head* gc = containers->gc.gc_next; for (;gc != containers; gc = gc->gc.gc_next){ assert(gc->gc.gc_refs == GC_REACHABLE); gc->gc.gc_refs = FROM_GC(gc)->ob_refcnt; } } [gcmodule.c] static void subtract_refs (PyGC_Head* containers) { traverseproc traverse; PyGC_Head* gc = containers->gc.gc_next; for (;gc != containers; gc = gc->gc.gc_next){ traverse = FROM_GC(gc)->ob_type->tp_traverse; (void )traverse(FROM_GC(gc),(visitproc)visit_decref,NULL ); } }
以PyDictObject对象所定义的traverse操作为例
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 [object.h] typedef int (*visitproc) (PyObject *, void *) ;typedef int (*traverseproc) (PyObject *, visitproc, void *) ;[dictobject.c] PyTypeObject PyDict_Type = { ... (traverseproc)dict_traverse, ... }; static int dict_traverse (PyObject* op, visitproc visit, void * arg) { int i = 0 , err; PyObject* pk; PyObject* pv; while (PyDict_Next(op,&i,&pk,&pv)) { visit(pk,arg); visit(pv,arg); } } [gcmodule.c] static int visit_decref (PyObject* op, void * data) { if (PyObject_IS_GC(op)){ PyGC_Head* gc = AS_GC(op); if (gc->gc.gc_refs > 0 ) gc->gc.gc_refs--; } return 0 ; }
subtract_refs之后,不为0的就是root object
root object不可回收,分成两条链root链和unreachable链
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 [gcmodule.c] static void move_unreachable (PyGC_Head* young, PyGC_Head* unreachable) { PyGC_Head* gc = young->gc.gc_next; while (gc != young){ PyGC_Head* next; if (gc->gc_refs){ PyObject* op = FROM_GC(gc); traverseproc traverse = op->ob_type->tp_traverse; gc->gc.gc_refs = GC_REACHABLE; (void )traverse(op,(visitproc)visit_reachable,(void *)young); next = gc->gc.gc_next; } else { next = gc->gc.gc_next; gc_list_move(gc,unreachable); gc->gc.gc_refs = GC_TENTATIVELY_UNREACHABLE; } gc = next; } } static int visit_reachable (PyObject* op, PyGC_Head* reachable) { if (PyObject_IS_GC(op)){ PyGC_Head* gc = AS_GC(op); const int gc_refs = gc->gc.refs; if (gc_refs == 0 ){ gc->gc.gc_refs = 1 ; } else if (gc_refs == GC_TENTATIVELY_UNREACHABLE){ gc_list_move(gc,reachable); gc->gc.gc_refs = 1 ; } else { assert(gc_refs > 0 || gc_refs == GC_REACHABLE || gc_refs == GC_UNTRACKED); } } return 0 ; }
特殊的container对象,即从类对象实例化得到的实例对象,有一个特殊方法”del “,被称为finalizer,假如对象B在finalizer中调用对象A的某个操作,Python必须先回收B,再回收A,但python无法保证回收顺序,采取的方法是:!!!将unreachable链表中的拥有finalizer的PyInstanceObject对象统统都移到一个名为garbage的PyListObject对象
将unreachable链表中的每一个对象的ob_refcnt变为0,引发对象的销毁
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 [gcmodule.c] static int gc_list_is_empty (PyGC_Head* list ) { return (list ->gc.gc_next == list ); } static void delete_garbage (PyGC_Head* collectable, PyGC_Head* old) { inquiry clear; while (!gc_list_is_empty(collectable)){ PyGC_Head* gc = collectable->gc.gc_next; PyObject* op = FROM_GC(gc); if ((clear = op->ob_type->tp_clear) != NULL ){ Py_INCREF(op); clear(op); Py_DECREF(op); } if (collectable->gc.gc_next == gc){ gc_list_move(gc,old); gc->gc.gc_refs = GC_REACHABLE; } } }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 [listobject.c] static int list_clear (PyListObject* a) { int i; PyObject** item = a->ob_item; if (item != NULL ){ i = a->ob_size; a->ob_size = 0 ; a->ob_item = NULL ; a->allocated = 0 ; while (--i >= 0 ){ Py_XDECREF(item[i]); } PyMem_FREE(item); } return 0 ; } [listobject.c] static void list_dealloc (PyListObject* op) { int i; PyObject_GC_UnTrack(op); if (op->ob_item != NULL ){ i = op->ob_size; while (--i >= 0 ){ Py_XDECREF(op->ob_item[i]); } PyMem_FREE(op->ob_item); } ... }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 [gcmodule.c] static long collect (int generation) { int i; long m = 0 ; long n = 0 ; PyGC_Head* young; PyGC_Head* old; PyGC_Head unreachable; PyGC_Head finalizers; PyGC_Head* gc; if (delstr == NULL ){ delstr = PyString_InternFromString("__del__" ); if (delstr == NULL ) Py_FataError("gc couldn't allocate \"__del__\"" ); } if (generation + 1 < NUM_GENERATIONS) generations[generation + 1 ].count += 1 ; for (i = 0 ;i < generation; i++) generations[i].count = 0 ; for (i = 0 ; i < generation; i++){ gc_list_merge(GEN_HEAD(i),GEN_HEAD(generation)); } young = GEN_HEAD(generation); if (generation < NUM_GENERATIONS - 1 ) old = GEN_HEAD(generation + 1 ); else old = young; update_refs(young); subtract_refs(young); gc_list_init(&unreachable); move_unreachable(young, &unreachable); if (young != old) gc_list_merge(young,old); gc_list_init(&finalizers); move_finalizer(&unreachable,&finalizers); move_finalizer_reachable(&finalizers); handle_weakrefs(&unreachable,old); delete_garbage(&unreachable,old); (void )handle_finalizers(&finalizers,old); ... }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 [funcobject.c] static void fun_dealloc (PyFunctionObject* op) { _PyObject_GC_UNTRACK(op); ... PyObject_GC_Del(op); } [gcmodule.c] void PyObject_GC_Del (void * op) { PyGC_Head* g = AS_GC(g); if (IS_TRACKED(op)) gc_list_merge(g); if (generation[0 ].count > 0 ){ generations[0 ].count--; } PyObject_FREE(g); }
总之,python的垃圾收集机制单纯是为了服务循环引用
1 2 3 4 5 6 7 8 9 10 11 12 13 14 [gc1.py] import gcclass A (object) : pass class B (object) : pass gc.set_debug(gc.DEBUG_STATS | gc.DEBUG_LEAK) a = A() b = B() del adel bgc.collect()
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 [gc2.py] import gcclass A (object) : pass class B (object) : pass gc.set_debug(gc.DEBUG_STATS | gc.DEBUG_LEAK) a = A() b = B() a.b = b b.a = a del adel bgc.collect()
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 [gc3.py] import gcclass A (object) : def __del__ (self) : pass class B (object) : def __del__ (self) : pass gc.set_debug(gc.DEBUG_STATS | gc.DEBUG_LEAK) a = A() b = B() a.b = b b.a = a del adel bgc.collect()