zlib是一個比較通用的壓縮庫,通過在程序中引入zlib,可以方便的對數據進行壓縮。今天花了點時間研究了一下,在tbuldr中實現了直接將數據庫中的數據導出為gzip文件的功能。
下載zlib源代碼,對于Linux,可以編譯成靜態庫,然后將zlib.h,libz.a(如有必要,再加上zonf.h)和程序代碼放置到同一個目錄,編譯的時候包含libz.a即可。
#./configure
#make
#make install
如果要編譯成動態庫,則只需要第一步改成./configure -s即可。
廢話少說,下面是一段演示代碼,非常簡單:
#include <stdio.h>
#include "zlib.h"
int main()
{
char *data = "this is a gzip test from NinGoo.net";
gzFile *fp=NULL;
fp=gzopen("test_out.gz","wb");
gzwrite(fp,data,strlen(data));
gzclose(fp);
}
編譯:
gcc -o test test.c libz.a
執行:
$./test
$gzip -d test_out.gz
$more test_out
this is a gzip test from NinGoo.net
tbuldr中,只需要在file參數指定的文件名后面加上.gz,就會自動輸出為gzip格式的文件。至此,tbuldr已基本實現了sqluldr2的全部功能,性能測試結果也不相上下了。
另外,zlib也可以在內存中對數據進行壓縮和解壓縮,參考:
http://hispider.googlecode.com/svn/trunk/devel/hispider/src/utils/zstream.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <zlib.h>
/* Compress data */
int zcompress(Bytef *data, uLong ndata,
Bytef *zdata, uLong *nzdata)
{
z_stream c_stream;
int err = 0;
if(data && ndata > 0)
{
c_stream.zalloc = (alloc_func)0;
c_stream.zfree = (free_func)0;
c_stream.opaque = (voidpf)0;
if(deflateInit(&c_stream, Z_DEFAULT_COMPRESSION) != Z_OK) return -1;
c_stream.next_in = data;
c_stream.avail_in = ndata;
c_stream.next_out = zdata;
c_stream.avail_out = *nzdata;
while (c_stream.avail_in != 0 && c_stream.total_out < *nzdata)
{
if(deflate(&c_stream, Z_NO_FLUSH) != Z_OK) return -1;
}
if(c_stream.avail_in != 0) return c_stream.avail_in;
for (;;) {
if((err = deflate(&c_stream, Z_FINISH)) == Z_STREAM_END) break;
if(err != Z_OK) return -1;
}
if(deflateEnd(&c_stream) != Z_OK) return -1;
*nzdata = c_stream.total_out;
return 0;
}
return -1;
}
/* Compress gzip data */
int gzcompress(Bytef *data, uLong ndata,
Bytef *zdata, uLong *nzdata)
{
z_stream c_stream;
int err = 0;
if(data && ndata > 0)
{
c_stream.zalloc = (alloc_func)0;
c_stream.zfree = (free_func)0;
c_stream.opaque = (voidpf)0;
if(deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
-MAX_WBITS, 8, Z_DEFAULT_STRATEGY) != Z_OK) return -1;
c_stream.next_in = data;
c_stream.avail_in = ndata;
c_stream.next_out = zdata;
c_stream.avail_out = *nzdata;
while (c_stream.avail_in != 0 && c_stream.total_out < *nzdata)
{
if(deflate(&c_stream, Z_NO_FLUSH) != Z_OK) return -1;
}
if(c_stream.avail_in != 0) return c_stream.avail_in;
for (;;) {
if((err = deflate(&c_stream, Z_FINISH)) == Z_STREAM_END) break;
if(err != Z_OK) return -1;
}
if(deflateEnd(&c_stream) != Z_OK) return -1;
*nzdata = c_stream.total_out;
return 0;
}
return -1;
}
/* Uncompress data */
int zdecompress(Byte *zdata, uLong nzdata,
Byte *data, uLong *ndata)
{
int err = 0;
z_stream d_stream; /* decompression stream */
d_stream.zalloc = (alloc_func)0;
d_stream.zfree = (free_func)0;
d_stream.opaque = (voidpf)0;
d_stream.next_in = zdata;
d_stream.avail_in = 0;
d_stream.next_out = data;
if(inflateInit(&d_stream) != Z_OK) return -1;
while (d_stream.total_out < *ndata && d_stream.total_in < nzdata) {
d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */
if((err = inflate(&d_stream, Z_NO_FLUSH)) == Z_STREAM_END) break;
if(err != Z_OK) return -1;
}
if(inflateEnd(&d_stream) != Z_OK) return -1;
*ndata = d_stream.total_out;
return 0;
}
/* HTTP gzip decompress */
int httpgzdecompress(Byte *zdata, uLong nzdata,
Byte *data, uLong *ndata)
{
int err = 0;
z_stream d_stream = {0}; /* decompression stream */
static char dummy_head[2] =
{
0x8 + 0x7 * 0x10,
(((0x8 + 0x7 * 0x10) * 0x100 + 30) / 31 * 31) & 0xFF,
};
d_stream.zalloc = (alloc_func)0;
d_stream.zfree = (free_func)0;
d_stream.opaque = (voidpf)0;
d_stream.next_in = zdata;
d_stream.avail_in = 0;
d_stream.next_out = data;
if(inflateInit2(&d_stream, 47) != Z_OK) return -1;
while (d_stream.total_out < *ndata && d_stream.total_in < nzdata) {
d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */
if((err = inflate(&d_stream, Z_NO_FLUSH)) == Z_STREAM_END) break;
if(err != Z_OK )
{
if(err == Z_DATA_ERROR)
{
d_stream.next_in = (Bytef*) dummy_head;
d_stream.avail_in = sizeof(dummy_head);
if((err = inflate(&d_stream, Z_NO_FLUSH)) != Z_OK)
{
return -1;
}
}
else return -1;
}
}
if(inflateEnd(&d_stream) != Z_OK) return -1;
*ndata = d_stream.total_out;
return 0;
}
/* Uncompress gzip data */
int gzdecompress(Byte *zdata, uLong nzdata,
Byte *data, uLong *ndata)
{
int err = 0;
z_stream d_stream = {0}; /* decompression stream */
static char dummy_head[2] =
{
0x8 + 0x7 * 0x10,
(((0x8 + 0x7 * 0x10) * 0x100 + 30) / 31 * 31) & 0xFF,
};
d_stream.zalloc = (alloc_func)0;
d_stream.zfree = (free_func)0;
d_stream.opaque = (voidpf)0;
d_stream.next_in = zdata;
d_stream.avail_in = 0;
d_stream.next_out = data;
if(inflateInit2(&d_stream, -MAX_WBITS) != Z_OK) return -1;
//if(inflateInit2(&d_stream, 47) != Z_OK) return -1;
while (d_stream.total_out < *ndata && d_stream.total_in < nzdata) {
d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */
if((err = inflate(&d_stream, Z_NO_FLUSH)) == Z_STREAM_END) break;
if(err != Z_OK )
{
if(err == Z_DATA_ERROR)
{
d_stream.next_in = (Bytef*) dummy_head;
d_stream.avail_in = sizeof(dummy_head);
if((err = inflate(&d_stream, Z_NO_FLUSH)) != Z_OK)
{
return -1;
}
}
else return -1;
}
}
if(inflateEnd(&d_stream) != Z_OK) return -1;
*ndata = d_stream.total_out;
return 0;
}
#ifdef _DEBUG_ZSTREAM
#define BUF_SIZE 65535
int main()
{
char *data = "kjdalkfjdflkjdlkfjdklfjdlkfjlkdjflkdjflddajfkdjfkdfaskf;ldsfk;ldakf;ldskfl;dskf;ld";
uLong ndata = strlen(data);
Bytef zdata[BUF_SIZE];
uLong nzdata = BUF_SIZE;
Bytef odata[BUF_SIZE];
uLong nodata = BUF_SIZE;
memset(zdata, 0, BUF_SIZE);
//if(zcompress((Bytef *)data, ndata, zdata, &nzdata) == 0)
if(gzcompress((Bytef *)data, ndata, zdata, &nzdata) == 0)
{
fprintf(stdout, "nzdata:%d %s\n", nzdata, zdata);
memset(odata, 0, BUF_SIZE);
//if(zdecompress(zdata, ndata, odata, &nodata) == 0)
if(gzdecompress(zdata, ndata, odata, &nodata) == 0)
{
fprintf(stdout, "%d %s\n", nodata, odata);
}
}
}
#endif