Add LIT_MEM define to use more memory for a small deflate speedup.
A bug fix in zlib 1.2.12 resulted in a slight slowdown (1-2%) of deflate. This commit provides the option to #define LIT_MEM, which uses more memory to reverse most of that slowdown. The memory for the pending buffer and symbol buffers is increased by 25%, which increases the total memory usage with the default parameters by about 6%.
This commit is contained in:
parent
bd9c329c10
commit
ac8f12c97d
21
deflate.c
21
deflate.c
@ -493,7 +493,11 @@ int ZEXPORT deflateInit2_(z_streamp strm, int level, int method,
|
||||
* symbols from which it is being constructed.
|
||||
*/
|
||||
|
||||
#ifdef LIT_MEM
|
||||
s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 5);
|
||||
#else
|
||||
s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 4);
|
||||
#endif
|
||||
s->pending_buf_size = (ulg)s->lit_bufsize * 4;
|
||||
|
||||
if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL ||
|
||||
@ -503,8 +507,14 @@ int ZEXPORT deflateInit2_(z_streamp strm, int level, int method,
|
||||
deflateEnd (strm);
|
||||
return Z_MEM_ERROR;
|
||||
}
|
||||
#ifdef LIT_MEM
|
||||
s->d_buf = (ushf *)(s->pending_buf + (s->lit_bufsize << 1));
|
||||
s->l_buf = s->pending_buf + (s->lit_bufsize << 2);
|
||||
s->sym_end = s->lit_bufsize - 1;
|
||||
#else
|
||||
s->sym_buf = s->pending_buf + s->lit_bufsize;
|
||||
s->sym_end = (s->lit_bufsize - 1) * 3;
|
||||
#endif
|
||||
/* We avoid equality with lit_bufsize*3 because of wraparound at 64K
|
||||
* on 16 bit machines and because stored blocks are restricted to
|
||||
* 64K-1 bytes.
|
||||
@ -720,9 +730,15 @@ int ZEXPORT deflatePrime(z_streamp strm, int bits, int value) {
|
||||
|
||||
if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
|
||||
s = strm->state;
|
||||
#ifdef LIT_MEM
|
||||
if (bits < 0 || bits > 16 ||
|
||||
(uchf *)s->d_buf < s->pending_out + ((Buf_size + 7) >> 3))
|
||||
return Z_BUF_ERROR;
|
||||
#else
|
||||
if (bits < 0 || bits > 16 ||
|
||||
s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3))
|
||||
return Z_BUF_ERROR;
|
||||
#endif
|
||||
do {
|
||||
put = Buf_size - s->bi_valid;
|
||||
if (put > bits)
|
||||
@ -1308,7 +1324,12 @@ int ZEXPORT deflateCopy(z_streamp dest, z_streamp source) {
|
||||
zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
|
||||
|
||||
ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
|
||||
#ifdef LIT_MEM
|
||||
ds->d_buf = (ushf *)(ds->pending_buf + (ds->lit_bufsize << 1));
|
||||
ds->l_buf = ds->pending_buf + (ds->lit_bufsize << 2);
|
||||
#else
|
||||
ds->sym_buf = ds->pending_buf + ds->lit_bufsize;
|
||||
#endif
|
||||
|
||||
ds->l_desc.dyn_tree = ds->dyn_ltree;
|
||||
ds->d_desc.dyn_tree = ds->dyn_dtree;
|
||||
|
31
deflate.h
31
deflate.h
@ -23,6 +23,10 @@
|
||||
# define GZIP
|
||||
#endif
|
||||
|
||||
/* define LIT_MEM to slightly increase the speed of deflate (order 1% to 2%) at
|
||||
the cost of a larger memory footprint */
|
||||
/* #define LIT_MEM */
|
||||
|
||||
/* ===========================================================================
|
||||
* Internal compression state.
|
||||
*/
|
||||
@ -217,7 +221,12 @@ typedef struct internal_state {
|
||||
/* Depth of each subtree used as tie breaker for trees of equal frequency
|
||||
*/
|
||||
|
||||
#ifdef LIT_MEM
|
||||
ushf *d_buf; /* buffer for distances */
|
||||
uchf *l_buf; /* buffer for literals/lengths */
|
||||
#else
|
||||
uchf *sym_buf; /* buffer for distances and literals/lengths */
|
||||
#endif
|
||||
|
||||
uInt lit_bufsize;
|
||||
/* Size of match buffer for literals/lengths. There are 4 reasons for
|
||||
@ -239,7 +248,7 @@ typedef struct internal_state {
|
||||
* - I can't count above 4
|
||||
*/
|
||||
|
||||
uInt sym_next; /* running index in sym_buf */
|
||||
uInt sym_next; /* running index in symbol buffer */
|
||||
uInt sym_end; /* symbol table full when sym_next reaches this */
|
||||
|
||||
ulg opt_len; /* bit length of current block with optimal trees */
|
||||
@ -318,6 +327,25 @@ void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf,
|
||||
extern const uch ZLIB_INTERNAL _dist_code[];
|
||||
#endif
|
||||
|
||||
#ifdef LIT_MEM
|
||||
# define _tr_tally_lit(s, c, flush) \
|
||||
{ uch cc = (c); \
|
||||
s->d_buf[s->sym_next] = 0; \
|
||||
s->l_buf[s->sym_next++] = cc; \
|
||||
s->dyn_ltree[cc].Freq++; \
|
||||
flush = (s->sym_next == s->sym_end); \
|
||||
}
|
||||
# define _tr_tally_dist(s, distance, length, flush) \
|
||||
{ uch len = (uch)(length); \
|
||||
ush dist = (ush)(distance); \
|
||||
s->d_buf[s->sym_next] = dist; \
|
||||
s->l_buf[s->sym_next++] = len; \
|
||||
dist--; \
|
||||
s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
|
||||
s->dyn_dtree[d_code(dist)].Freq++; \
|
||||
flush = (s->sym_next == s->sym_end); \
|
||||
}
|
||||
#else
|
||||
# define _tr_tally_lit(s, c, flush) \
|
||||
{ uch cc = (c); \
|
||||
s->sym_buf[s->sym_next++] = 0; \
|
||||
@ -337,6 +365,7 @@ void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf,
|
||||
s->dyn_dtree[d_code(dist)].Freq++; \
|
||||
flush = (s->sym_next == s->sym_end); \
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
|
||||
# define _tr_tally_dist(s, distance, length, flush) \
|
||||
|
18
trees.c
18
trees.c
@ -899,14 +899,19 @@ local void compress_block(deflate_state *s, const ct_data *ltree,
|
||||
const ct_data *dtree) {
|
||||
unsigned dist; /* distance of matched string */
|
||||
int lc; /* match length or unmatched char (if dist == 0) */
|
||||
unsigned sx = 0; /* running index in sym_buf */
|
||||
unsigned sx = 0; /* running index in symbol buffers */
|
||||
unsigned code; /* the code to send */
|
||||
int extra; /* number of extra bits to send */
|
||||
|
||||
if (s->sym_next != 0) do {
|
||||
#ifdef LIT_MEM
|
||||
dist = s->d_buf[sx];
|
||||
lc = s->l_buf[sx++];
|
||||
#else
|
||||
dist = s->sym_buf[sx++] & 0xff;
|
||||
dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8;
|
||||
lc = s->sym_buf[sx++];
|
||||
#endif
|
||||
if (dist == 0) {
|
||||
send_code(s, lc, ltree); /* send a literal byte */
|
||||
Tracecv(isgraph(lc), (stderr," '%c' ", lc));
|
||||
@ -931,8 +936,12 @@ local void compress_block(deflate_state *s, const ct_data *ltree,
|
||||
}
|
||||
} /* literal or match pair ? */
|
||||
|
||||
/* Check that the overlay between pending_buf and sym_buf is ok: */
|
||||
/* Check for no overlay of pending_buf on needed symbols */
|
||||
#ifdef LIT_MEM
|
||||
Assert(s->pending < (s->lit_bufsize << 1) + sx, "pendingBuf overflow");
|
||||
#else
|
||||
Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow");
|
||||
#endif
|
||||
|
||||
} while (sx < s->sym_next);
|
||||
|
||||
@ -1082,9 +1091,14 @@ void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf,
|
||||
* the current block must be flushed.
|
||||
*/
|
||||
int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc) {
|
||||
#ifdef LIT_MEM
|
||||
s->d_buf[s->sym_next] = (ush)dist;
|
||||
s->l_buf[s->sym_next++] = (uch)lc;
|
||||
#else
|
||||
s->sym_buf[s->sym_next++] = (uch)dist;
|
||||
s->sym_buf[s->sym_next++] = (uch)(dist >> 8);
|
||||
s->sym_buf[s->sym_next++] = (uch)lc;
|
||||
#endif
|
||||
if (dist == 0) {
|
||||
/* lc is the unmatched char */
|
||||
s->dyn_ltree[lc].Freq++;
|
||||
|
Loading…
Reference in New Issue
Block a user