2、多線程內(nèi)存池
上一節(jié)很簡(jiǎn)略的說了下單線程內(nèi)存池,單線程內(nèi)存池如果要放在多線程環(huán)境下使用是不安全的,我們需要進(jìn)行保護(hù),如何保護(hù),最簡(jiǎn)單的方法就是加臨界區(qū),云風(fēng)的實(shí)現(xiàn)里面是用原子操作模擬一個(gè)臨界區(qū),我實(shí)測(cè)跟臨界區(qū)性能非常接近,甚至很多時(shí)候不如臨界區(qū),所以我就不追求用原子操作模擬臨界區(qū)了,還是直接用臨界區(qū)最簡(jiǎn)單。
class CMemPool
{
public:
struct memory_list
{
memory_list *_next;
};
struct alloc_node
{
size_t _size;
size_t _number;
size_t _bksize;
long _guard;
memory_list *_free_list;
};
struct chunk_list
{
chunk_list *_next;
memory_list *_data;
size_t _size;
size_t _idx;
};
~CMemPool();
static CMemPool &instance()
{
if(!_instance)
{
create_instance();
}
return *_instance;
}
static int chunk_index(size_t size);
void *allocate(size_t size, size_t *psize=NULL);
void deallocate(void *p, size_t size);
//以下為幾個(gè)檢測(cè)和設(shè)置預(yù)分配參數(shù)的函數(shù),2007.06.08增
size_t getallocnumber(size_t size, size_t *psize=NULL);
size_t setallocnumber(size_t size, size_t number);
static void dumpallocnode();
private:
static alloc_node _vnode[92];
chunk_list *_chunk_list;
long _chunk_guard;
static CMemPool *_instance;
static long _singleton_guard;
static bool _singleton_destroyed;
static void create_instance();
static void build_chunknode();
CMemPool();
memory_list *alloc_chunk(size_t idx);
};
CMemPool *CMemPool::_instance = 0;
long CMemPool::_singleton_guard = 0;
bool CMemPool::_singleton_destroyed = false;
CMemPool::alloc_node CMemPool::_vnode[92];
struct chunk_desc
{
int s; // 區(qū)間起始字節(jié)
int e; // 區(qū)間終止字節(jié)
int align; // 區(qū)間內(nèi)分段對(duì)齊值
int number; // 區(qū)間內(nèi)分段個(gè)數(shù),計(jì)算屬性
}_cd[] =
{
{0, 1024, 16, 0},
{1024, 8192, 256, 0}
};
void CMemPool::create_instance()
{
thread_guard guard(&_singleton_guard);
if(_instance)
return;
assert(!_singleton_destroyed);
static CMemPool obj;
_instance = &obj;
}
void CMemPool::build_chunknode()
{
const int cdnum = sizeof(_cd)/sizeof(_cd[0]);
int index=0, number;
for(int j=0; j<cdnum; j++)
{
_cd[j].number = (_cd[j].e-_cd[j].s)/_cd[j].align;
for(int i=0; i<_cd[j].number; i++)
{
int unitsize = (i+1)*_cd[j].align+_cd[j].s;
_vnode[index]._size = unitsize;
if(unitsize < 512)
number = 4096/unitsize;
else if(unitsize < 1024)
number = 16384/unitsize;
else
number = 65536/unitsize;
_vnode[index]._number = number;
_vnode[index]._bksize = unitsize * number;
_vnode[index]._guard = 0;
_vnode[index]._free_list = NULL;
++index;
}
}
}
size_t CMemPool::getallocnumber(size_t size, size_t *psize/*=NULL*/)
{
int idx = chunk_index(size);
if(idx >= 0)
return _vnode[idx]._number;
return 0;
}
size_t CMemPool::setallocnumber(size_t size, size_t number)
{
int idx = chunk_index(size);
if(idx >= 0)
{
size_t on = _vnode[idx]._number;
_vnode[idx]._number = number;
return on;
}
return 0;
}
void CMemPool::dumpallocnode()
{
int size = sizeof(_vnode)/sizeof(_vnode[0]);
printf("_vnode.size = %d\r\n", size);
for(int i=0; i<size; ++i)
{
printf("vnode.size %d, vnode.number %d\r\n", _vnode[i]._size, _vnode[i]._number);
}
}
int CMemPool::chunk_index(size_t bytes)
{
#if(0)
int idx = 0;
const int cdnum = sizeof(_cd)/sizeof(_cd[0]);
if(bytes > _cd[cdnum-1].e)
idx = -1;
else
{
for(int i=0; i<cdnum; i++)
{
if((bytes > _cd[i].s) && (bytes <= _cd[i].e))
{
idx += (bytes-_cd[i].s+_cd[i].align-1)/_cd[i].align-1;
break;
}
idx += _cd[i].number;
}
}
// printf("bytes %d idx = %d\r\n", bytes, idx);
return idx;
#else
//下面的代碼是根據(jù)靜態(tài)數(shù)據(jù)和上面的代碼做了優(yōu)化的,
//如果修改了基礎(chǔ)數(shù)據(jù)需要對(duì)應(yīng)的修改下面的代碼
if(bytes > 8192)
{
return -1;
}
else if(bytes > 1024)
// idx = _cd[0].number+(bytes-_cd[1].s+_cd[1].align-1)/_cd[1].align-1;
return 64+(bytes-1024+255)/256-1;
// return _cd[0].number+(bytes-1024+255)/256-1;
else
// idx = (bytes-_cd[0].s+_cd[0].align-1)/_cd[0].align-1;
return (bytes+15)/16-1;
#endif
}
CMemPool::CMemPool()
{
_chunk_list = NULL;
_chunk_guard = 0;
build_chunknode();
}
CMemPool::~CMemPool()
{
int s = 0;
chunk_list *temp = _chunk_list;
while(temp)
{
++s;
temp = temp->_next;
}
void **chunk = reinterpret_cast<void **>(malloc(s * sizeof(void *)));
temp = _chunk_list;
int i=0;
while(temp)
{
chunk[i] = temp->_data;
++i;
temp = temp->_next;
}
for(i=0; i<s; i++)
{
free(chunk[i]);
}
free(chunk);
_singleton_destroyed = true;
_instance = 0;
}
CMemPool::memory_list *CMemPool::alloc_chunk(size_t idx)
{
thread_guard guard(&_chunk_guard);
memory_list *¤t_list = _vnode[idx]._free_list;
if(current_list)
return current_list;
const size_t node_size = _vnode[idx]._size;
const size_t number = _vnode[idx]._number;
const size_t chunk_size = node_size * number;
memory_list *ret = current_list = reinterpret_cast<memory_list *>(malloc(chunk_size));
memory_list *iter = ret;
//for(size_t i=0; i<=chunk_size-node_size*2; i+=node_size)
for(size_t i=0; i<number-1; ++i)
{
iter = iter->_next = iter+node_size/sizeof(*iter);
}
iter->_next = 0;
return ret;
}
void *CMemPool::allocate(size_t size, size_t *psize/*=NULL*/)
{
int idx = chunk_index(size);
if(idx < 0)
{
if(psize) *psize = size;
return malloc(size);
}
if(psize)
*psize = _vnode[idx]._size;
thread_guard guard(&_vnode[idx]._guard);
memory_list *&temp = _vnode[idx]._free_list;
if(!temp)
{
memory_list *new_chunk = alloc_chunk(idx);
chunk_list *chunk_node;
if(chunk_index(sizeof(chunk_list))==idx)
{
chunk_node = reinterpret_cast<chunk_list *>(temp);
temp = temp->_next;
}
else
{
chunk_node = reinterpret_cast<chunk_list *>(allocate(sizeof(chunk_list)));
}
thread_guard guard(&_chunk_guard);
chunk_node->_next = _chunk_list;
chunk_node->_data = new_chunk;
chunk_node->_size = _vnode[idx]._bksize;
chunk_node->_idx = idx;
_chunk_list = chunk_node;
}
void *ret = temp;
temp = temp->_next;
return ret;
}
void CMemPool::deallocate(void *p, size_t size)
{
int idx = chunk_index(size);
if(idx < 0)
{
free(p);
}
else
{
memory_list *free_block = reinterpret_cast<memory_list *>(p);
thread_guard guard(&_vnode[idx]._guard);
memory_list *&temp = _vnode[idx]._free_list; //_free_list[idx];
free_block->_next = temp;
temp = free_block;
}
}
以上基本是云風(fēng)內(nèi)存池的一個(gè)簡(jiǎn)單修改版,這種模式的內(nèi)存池由于每次分配釋放都要lock unlock,所以效率很低,大概只相當(dāng)于malloc/free 2-4倍的速度,提速也不是很明顯,大概相當(dāng)于nedmalloc速度的一半左右。
發(fā)表于 @ 2010年02月04日 16:54:00 | | 編輯| 舉報(bào)| 收藏