diff options
Diffstat (limited to 'modules/xz-embedded/src/xz_dec_lzma2.c')
-rw-r--r-- | modules/xz-embedded/src/xz_dec_lzma2.c | 193 |
1 files changed, 181 insertions, 12 deletions
diff --git a/modules/xz-embedded/src/xz_dec_lzma2.c b/modules/xz-embedded/src/xz_dec_lzma2.c index 08c3c80499..c929f1c82b 100644 --- a/modules/xz-embedded/src/xz_dec_lzma2.c +++ b/modules/xz-embedded/src/xz_dec_lzma2.c @@ -2,7 +2,7 @@ * LZMA2 decoder * * Authors: Lasse Collin <lasse.collin@tukaani.org> - * Igor Pavlov <http://7-zip.org/> + * Igor Pavlov <https://7-zip.org/> * * This file has been put into the public domain. * You can do whatever you want with this file. @@ -147,8 +147,8 @@ struct lzma_dec { /* * LZMA properties or related bit masks (number of literal - * context bits, a mask dervied from the number of literal - * position bits, and a mask dervied from the number + * context bits, a mask derived from the number of literal + * position bits, and a mask derived from the number * position bits) */ uint32_t lc; @@ -248,6 +248,10 @@ struct lzma2_dec { * before the first LZMA chunk. */ bool need_props; + +#ifdef XZ_DEC_MICROLZMA + bool pedantic_microlzma; +#endif }; struct xz_dec_lzma2 { @@ -387,7 +391,14 @@ static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b, *left -= copy_size; - memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size); + /* + * If doing in-place decompression in single-call mode and the + * uncompressed size of the file is larger than the caller + * thought (i.e. it is invalid input!), the buffers below may + * overlap and cause undefined behavior with memcpy(). + * With valid inputs memcpy() would be fine here. + */ + memmove(dict->buf + dict->pos, b->in + b->in_pos, copy_size); dict->pos += copy_size; if (dict->full < dict->pos) @@ -397,7 +408,11 @@ static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b, if (dict->pos == dict->end) dict->pos = 0; - memcpy(b->out + b->out_pos, b->in + b->in_pos, + /* + * Like above but for multi-call mode: use memmove() + * to avoid undefined behavior with invalid input. + */ + memmove(b->out + b->out_pos, b->in + b->in_pos, copy_size); } @@ -408,6 +423,12 @@ static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b, } } +#ifdef XZ_DEC_MICROLZMA +# define DICT_FLUSH_SUPPORTS_SKIPPING true +#else +# define DICT_FLUSH_SUPPORTS_SKIPPING false +#endif + /* * Flush pending data from dictionary to b->out. It is assumed that there is * enough space in b->out. This is guaranteed because caller uses dict_limit() @@ -421,8 +442,19 @@ static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b) if (dict->pos == dict->end) dict->pos = 0; - memcpy(b->out + b->out_pos, dict->buf + dict->start, - copy_size); + /* + * These buffers cannot overlap even if doing in-place + * decompression because in multi-call mode dict->buf + * has been allocated by us in this file; it's not + * provided by the caller like in single-call mode. + * + * With MicroLZMA, b->out can be NULL to skip bytes that + * the caller doesn't need. This cannot be done with XZ + * because it would break BCJ filters. + */ + if (!DICT_FLUSH_SUPPORTS_SKIPPING || b->out != NULL) + memcpy(b->out + b->out_pos, dict->buf + dict->start, + copy_size); } dict->start = dict->pos; @@ -484,11 +516,11 @@ static __always_inline void rc_normalize(struct rc_dec *rc) } /* - * Decode one bit. In some versions, this function has been splitted in three + * Decode one bit. In some versions, this function has been split in three * functions so that the compiler is supposed to be able to more easily avoid * an extra branch. In this particular version of the LZMA decoder, this * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3 - * on x86). Using a non-splitted version results in nicer looking code too. + * on x86). Using a non-split version results in nicer looking code too. * * NOTE: This must return an int. Do not make it return a bool or the speed * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care, @@ -761,7 +793,7 @@ static bool lzma_main(struct xz_dec_lzma2 *s) } /* - * Reset the LZMA decoder and range decoder state. Dictionary is nore reset + * Reset the LZMA decoder and range decoder state. Dictionary is not reset * here, because LZMA state may be reset without resetting the dictionary. */ static void lzma_reset(struct xz_dec_lzma2 *s) @@ -774,6 +806,7 @@ static void lzma_reset(struct xz_dec_lzma2 *s) s->lzma.rep1 = 0; s->lzma.rep2 = 0; s->lzma.rep3 = 0; + s->lzma.len = 0; /* * All probabilities are initialized to the same value. This hack @@ -1146,6 +1179,7 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props) if (DEC_IS_DYNALLOC(s->dict.mode)) { if (s->dict.allocated < s->dict.size) { + s->dict.allocated = s->dict.size; vfree(s->dict.buf); s->dict.buf = vmalloc(s->dict.size); if (s->dict.buf == NULL) { @@ -1156,8 +1190,6 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props) } } - s->lzma.len = 0; - s->lzma2.sequence = SEQ_CONTROL; s->lzma2.need_dict_reset = true; @@ -1173,3 +1205,140 @@ XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s) kfree(s); } + +#ifdef XZ_DEC_MICROLZMA +/* This is a wrapper struct to have a nice struct name in the public API. */ +struct xz_dec_microlzma { + struct xz_dec_lzma2 s; +}; + +enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s_ptr, + struct xz_buf *b) +{ + struct xz_dec_lzma2 *s = &s_ptr->s; + + /* + * sequence is SEQ_PROPERTIES before the first input byte, + * SEQ_LZMA_PREPARE until a total of five bytes have been read, + * and SEQ_LZMA_RUN for the rest of the input stream. + */ + if (s->lzma2.sequence != SEQ_LZMA_RUN) { + if (s->lzma2.sequence == SEQ_PROPERTIES) { + /* One byte is needed for the props. */ + if (b->in_pos >= b->in_size) + return XZ_OK; + + /* + * Don't increment b->in_pos here. The same byte is + * also passed to rc_read_init() which will ignore it. + */ + if (!lzma_props(s, ~b->in[b->in_pos])) + return XZ_DATA_ERROR; + + s->lzma2.sequence = SEQ_LZMA_PREPARE; + } + + /* + * xz_dec_microlzma_reset() doesn't validate the compressed + * size so we do it here. We have to limit the maximum size + * to avoid integer overflows in lzma2_lzma(). 3 GiB is a nice + * round number and much more than users of this code should + * ever need. + */ + if (s->lzma2.compressed < RC_INIT_BYTES + || s->lzma2.compressed > (3U << 30)) + return XZ_DATA_ERROR; + + if (!rc_read_init(&s->rc, b)) + return XZ_OK; + + s->lzma2.compressed -= RC_INIT_BYTES; + s->lzma2.sequence = SEQ_LZMA_RUN; + + dict_reset(&s->dict, b); + } + + /* This is to allow increasing b->out_size between calls. */ + if (DEC_IS_SINGLE(s->dict.mode)) + s->dict.end = b->out_size - b->out_pos; + + while (true) { + dict_limit(&s->dict, min_t(size_t, b->out_size - b->out_pos, + s->lzma2.uncompressed)); + + if (!lzma2_lzma(s, b)) + return XZ_DATA_ERROR; + + s->lzma2.uncompressed -= dict_flush(&s->dict, b); + + if (s->lzma2.uncompressed == 0) { + if (s->lzma2.pedantic_microlzma) { + if (s->lzma2.compressed > 0 || s->lzma.len > 0 + || !rc_is_finished(&s->rc)) + return XZ_DATA_ERROR; + } + + return XZ_STREAM_END; + } + + if (b->out_pos == b->out_size) + return XZ_OK; + + if (b->in_pos == b->in_size + && s->temp.size < s->lzma2.compressed) + return XZ_OK; + } +} + +struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, + uint32_t dict_size) +{ + struct xz_dec_microlzma *s; + + /* Restrict dict_size to the same range as in the LZMA2 code. */ + if (dict_size < 4096 || dict_size > (3U << 30)) + return NULL; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (s == NULL) + return NULL; + + s->s.dict.mode = mode; + s->s.dict.size = dict_size; + + if (DEC_IS_MULTI(mode)) { + s->s.dict.end = dict_size; + + s->s.dict.buf = vmalloc(dict_size); + if (s->s.dict.buf == NULL) { + kfree(s); + return NULL; + } + } + + return s; +} + +void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, uint32_t comp_size, + uint32_t uncomp_size, int uncomp_size_is_exact) +{ + /* + * comp_size is validated in xz_dec_microlzma_run(). + * uncomp_size can safely be anything. + */ + s->s.lzma2.compressed = comp_size; + s->s.lzma2.uncompressed = uncomp_size; + s->s.lzma2.pedantic_microlzma = uncomp_size_is_exact; + + s->s.lzma2.sequence = SEQ_PROPERTIES; + s->s.temp.size = 0; +} + +void xz_dec_microlzma_end(struct xz_dec_microlzma *s) +{ + if (DEC_IS_MULTI(s->s.dict.mode)) + vfree(s->s.dict.buf); + + kfree(s); +} +#endif |