- reference: Python 源码剖析
- Python_dict_object
- 使用散列表(hash table)
- PyDictOject
- PyDictObject的创建和维护
- PyDictObject对象创建
- PyDictObject中的元素搜索
- 插入与删除
- PyDictObject对象缓冲池
1.Python中的Dict对象
a.使用散列表(hash table)
开放地址法 + 伪删除
b.PyDictOject
1 2 3 4 5 6
| [dictobject.h] typedef struct{ Py_ssize_t me_hash; PyObject* me_key; PyObject* me_value; } PyDictEntry;
|
PyDictObject三种状态转换:Unused态,Active态,Dummy态
1 2 3 4 5 6 7 8 9 10 11 12
| [dictobject.h] #define PyDict_MINSIZE 8 typedef struct _dictobject PyDictObject; struct _dictobject{ PyObject_HEAD Py_ssize_t ma_fill; Py_ssize_t ma_used; Py_ssize_t ma_mask; PyDictEntry* ma_table; PyDictEntry* (*ma_lookup)(PyDictObject* mp, PyObject* key, long hash); PyDictEntry ma_smalltable[PyDict_MINSIZE]; }
|
c.PyDictObject的创建和维护
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
| [dictobject.c] typedef PyDictEntry dictentry; typedef PyDictObject dictobject;
#define INIT_NONZERO_DICT_SLOTS(mp) do{ (mp)->ma_table = (mp)->ma_smalltable; (mp)->ma_mask = PyDict_MINSIZE - 1; }while(0)
#define EMPTY_TO_MINSIZE(mp) do{ memset((mp)->ma_smalltable,0,sizeof((mp)->ma_smalltable)); (mp)->ma_used = (mp)->ma_fill = 0; INIT_NONZERO_DICT_SLOTS(mp); }while (0)
PyObject* PyDIct_New(void){ register dictobject* mp; if(dummy == NULL){ dummy = PyString_FromString("<dummy key>"); }
if(num_free_dicts){ ... } else{ mp = PyObject_GC_New(dictobject,&PyDict_Type); EMPTY_TO_MINSIZE(mp); } mp->ma_lookup = lookdict_string; return (PyObject* )mp; }
|
两种搜索策略,lookdict和lookdict_string(默认搜索策略)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
| [dictobject.c] static dictentry* lookdict(dictobject *mp, PyObject *key, register long hash){ register size_t i; register size_t perturb; register dictentry *freeslot; register size_t mask = mp->ma_mask; dictentry *ep0 = mp->ma_table; register dictentry *ep;
register int restore_error; register int checked_error; register int cmp; PyObject* err_type, *err_value, *err_tb; PyObject* startkey;
i = hash & mask; ep = &ep0[i];
if(ep->me_key == NULL || ep->me_key == key) return ep;
if(ep->me_key == dummy) freeslot = ep; else{ if(ep->me_hash == hash){ startkey = ep->me_key; cmp = PyObject_RichCompareBool(startkey,key,Py_EQ); if(cmp > 0) return ep; } freeslot = NULL; } .... }
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
| [dictobject.c] static dictentry* lookdict(dictobject* mp,PyObject* key, register long hash){ register int i; register unsigned int perturb; register dictentry* freeslot; register unsigned int mask = mp->ma_mask; dictentry *ep0 = mp->ma_table; register dictentry* ep; register int cmp; PyObject* startkey; .... for(perturb = hash;;perturb >>= PERTURB_SHIFT){
i = (i<<2) + i + perturb + 1; ep = &ep0[i & mask];
if(ep->me_key == NULL) return freeslot == NULL ? ep : freeslot;
if(ep->me_key == key) return ep;
if(ep->me_hash == hash && ep->me_key != dummy){ startkey = ep->me_key; cmp = PyObject_RichCompareBool(startkey,key,Py_EQ); if(cmp > 0) return ep; } else if(ep->me_key == dummy && freeslot == NULL) freeslot = ep; }
}
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
|
[dictobject.c] static dictentry* lookdict_string(dictobject* mp, PyObject* key, register long hash){ register int i; register unsigned int perturb; register dictentry* freeslot; register unsigned int mask = mp->ma_mask; dictentry* ep0 = mp->ma_table; register dictentry* ep;
if(!PyString_CheckExact(key)){ mp->ma_lookup = lookdict; return lookdict(mp,key,hash); }
i = hash & mask; ep = &ep0[i];
if(ep->me_key == NULL || ep->me_key == key) return ep;
if(ep->me_key == dummy) freeslot = ep; else{ if(ep->me_hash == hash && _PyString_Eq(ep->me_key,key)){ return ep; } freeslot = NULL; }
for(perturb = hash; ; perturb >> PERTURB_SHIFT){ i = (i << 2) + i + perturb + 1; ep = &ep0[i&mask] if(ep->me_key == NULL) return freeslot == NULL ? ep : freeslot; if(ep->me_key == key || (ep->me_hash == hash && ep->me_hash != dummy && _PyString_Eq(ep->me_key, key))) return ep; if(ep->me_key == dummy && freeslot == NULL) freeslot = ep; } }
|
插入
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
| [dictobject.c] static void insertdict(register dictobject* mp, PyObject* key, long hash , PyObject* value){ PyObject* old_value; register dictentry* ep;
ep = mp->ma_lookup(mp,key,hash);
if(ep->me_value != NULL){ old_value = ep->me_value; ep->me_value = value; Py_DECREF(old_value); Py_DECREF(key); } else{ if(ep->me_key == NULL) mp->ma_fill++; else Py_DECREF(ep->me_key); ep->me_key = key; ep->me_hash = hash; ep->me_value = value; mp->ma_used++; } }
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
| [dictobject.c] int PyDict_SetItem(register PyObject* op, PyObject* key, PyObject* value){ register dictobject* mp; register long hash; register Py_ssize_t n_used;
mp = (dictobject *)op; if(PyString_CheckExact(key)){ hash = ((PyStringObject *)key)->ob_shash; if(hash == -1) hash = PyObject_Hash(key); } else{ hash = PyObject_Hash(key); if(hash == -1) return -1; }
n_used = mp->ma_used; insertdict(mp,key,hash,value);
if(!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask + 1)*2)) return 0; return dictresize(mp,mp->ma_used*(mp->ma_used > 50000 ? 2 : 4)); }
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
| static int dictresize(dictobject* mp, int minused){ Py_ssize_t newsize; dictentry* oldtable, *newtable, *ep; Py_ssize_t i; int is_oldtable_malloced; dictentry small_copy[PyDict_MINSIZE];
for(newsize = PyDict_MINSIZE; newsize <= minused && newsize > 0; newsize <<=1);
oldtable = mp->ma_table; is_oldtable_malloced = (oldtable != mp->ma_smalltable);
if(newsize == PyDict_MINSIZE){ newtable = mp->ma_smalltable; if(newtable == oldtable){ if(mp->ma_fill == mp->ma_used){ return 0; } memcpy(small_copy,oldtable,sizeof(small_copy)); oldtable = small_copy; } }
else{ newtable = PyMem_NEW(dictentry,newsize); }
mp->ma_table = newtable; mp->ma_mask = newsize - 1; memset(newtable,0,sizeof(dictentry)*newsize); mp->ma_used = 0; i = mp->ma_fill; mp->ma_fill = 0;
for(ep = oldtable; i> 0; ep++){ if(ep->me_value != NULL){ --i; insertdict(mp,ep->me_key,ep->me_hash,ep->me_value); } else if(ep->me_key != NULL){ --i; assert(ep->me_key == dummy); Py_DECREF(ep->me_key); } }
if(is_oldtable_malloced) PyMem_DEL(oldtable); return 0; }
|
删除
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
| [dictobject.c] int PyDict_DelItem(PyObject* op, PyObject* key){ register dictobject* mp; register long hash; register dictentry* ep; PyObject* old_value, *old_key;
if(!PyString_CheckExact(key) || (hash = (PyStringObject *)key->ob_shash) == -1){ hash = PyObject_Hash(key); if(hash == -1) return -1; }
mp = (dictobject *)op; ep = (mp->ma_lookup)(mp,key,hash); if(ep->me_value == NULL){ return -1; }
old_key = ep->me_key; ep->me_key = dummy; old_value = ep->me_value; ep->me_value = NULL; mp->ma_used--; Py_DECREF(old_value); Py_DECREF(old_key); return 0; }
|
d.PyDictObject对象缓冲池
1 2 3 4
| [dictobject.c] #define MAXFREEDICTS 80 static PyDictObject* free_dicts[MAXFREEDICTS]; static int num_free_dicts = 0;
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
| [dictobject.c] static void dict_dealloc(register dictobject* mp){ register dictentry* ep; Py_ssize_t fill = mp->ma_fill;
for(ep = mp->ma_table; fill > 0; ep++){ if(ep->me_key){ --fill; Py_DECREF(ep->me_key); Py_XDECREF(ep->me_value); } }
if(mp->ma_table != mp->ma_smalltable) PyMem_DEL(mp->ma_table);
if(num_free_dicts < MAXFREEDICTS && mp->ob_type == &PyDict_Type) free_dicts[num_free_dicts++] = mp; else mp->ob_type->tp_free((PyObject *)mp); }
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| [dictobject.c] PyObject* PyDict_New(void){ register dictobject *mp; ..... if(num_free_dicts){ mp = free_dicts[--num_free_dicts]; _Py_NewReference((PyObject *)mp); if(mp->ma_fill){ EMPTY_TO_MINSIZE(mp); } } ..... }
|