Taken from https://github.com/Cisco-Talos/clamav/commit/fb541aed643 and applied with a little fuzz to the 0.103.x branch. The bug is not actually exploitable on 0.103.x (according to the release notes), but we might as well apply the readily-available fix. diff --git a/libclamav/pdf.c b/libclamav/pdf.c index b8135f6..fe50146 100644 --- a/libclamav/pdf.c +++ b/libclamav/pdf.c @@ -441,7 +441,7 @@ int pdf_findobj_in_objstm(struct pdf_struct *pdf, struct objstm_struct *objstm, if (CL_SUCCESS != cli_strntol_wrap(index, bytes_remaining, 0, 10, &temp_long)) { /* Failed to find obj offset for next obj */ - cli_dbgmsg("pdf_findobj_in_objstm: Failed to find next obj offset for obj in object stream though there should be {%u} more.\n", objstm->n - objstm->nobjs_found); + cli_dbgmsg("pdf_findobj_in_objstm: Failed to find next obj offset for obj in object stream though there should be {%zu} more.\n", objstm->n - objstm->nobjs_found); status = CL_EPARSE; goto done; } else if (temp_long < 0) { @@ -1551,18 +1551,18 @@ cl_error_t pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t } } - cli_dbgmsg("pdf_extract_obj: calculated length %lld\n", (long long)length); + cli_dbgmsg("pdf_extract_obj: calculated length %zu\n", length); } else { if (obj->stream_size > (size_t)length + 2) { cli_dbgmsg("cli_pdf: calculated length %zu < %zu\n", - (size_t)length, obj->stream_size); + length, obj->stream_size); length = obj->stream_size; } } - if ((0 != orig_length) && (obj->stream_size > (size_t)orig_length + 20)) { - cli_dbgmsg("pdf_extract_obj: orig length: %lld, length: %lld, size: %zu\n", - (long long)orig_length, (long long)length, obj->stream_size); + if ((0 != orig_length) && (obj->stream_size > orig_length + 20)) { + cli_dbgmsg("pdf_extract_obj: orig length: %zu, length: %zu, size: %zu\n", + orig_length, length, obj->stream_size); pdfobj_flag(pdf, obj, BAD_STREAMLEN); } @@ -1613,18 +1613,18 @@ cl_error_t pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t */ dict_len = obj->stream - start; if (NULL != (pstr = pdf_getdict(start, &dict_len, "/Type/ObjStm"))) { - int32_t objstm_first = -1; - int32_t objstm_length = -1; - int32_t objstm_n = -1; + int objstm_first = -1; + int objstm_length = -1; + int objstm_n = -1; cli_dbgmsg("pdf_extract_obj: Found /Type/ObjStm\n"); dict_len = obj->stream - start; - if ((-1 == (objstm_first = pdf_readint(start, dict_len, "/First")))) { + if (-1 == (objstm_first = pdf_readint(start, dict_len, "/First"))) { cli_warnmsg("pdf_extract_obj: Failed to find offset of first object in object stream\n"); - } else if ((-1 == (objstm_length = pdf_readint(start, dict_len, "/Length")))) { + } else if (-1 == (objstm_length = pdf_readint(start, dict_len, "/Length"))) { cli_warnmsg("pdf_extract_obj: Failed to find length of object stream\n"); - } else if ((-1 == (objstm_n = pdf_readint(start, dict_len, "/N")))) { + } else if (-1 == (objstm_n = pdf_readint(start, dict_len, "/N"))) { cli_warnmsg("pdf_extract_obj: Failed to find num objects in object stream\n"); } else { /* Add objstm to pdf struct, so it can be freed eventually */ @@ -1646,19 +1646,19 @@ cl_error_t pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t memset(objstm, 0, sizeof(*objstm)); - objstm->first = (uint32_t)objstm_first; - objstm->current = (uint32_t)objstm_first; + objstm->first = (size_t)objstm_first; + objstm->current = (size_t)objstm_first; objstm->current_pair = 0; - objstm->length = (uint32_t)objstm_length; - objstm->n = (uint32_t)objstm_n; + objstm->length = (size_t)objstm_length; + objstm->n = (size_t)objstm_n; - cli_dbgmsg("pdf_extract_obj: ObjStm first obj at offset %d\n", objstm->first); - cli_dbgmsg("pdf_extract_obj: ObjStm length is %d bytes\n", objstm->length); - cli_dbgmsg("pdf_extract_obj: ObjStm should contain %d objects\n", objstm->n); + cli_dbgmsg("pdf_extract_obj: ObjStm first obj at offset %zu\n", objstm->first); + cli_dbgmsg("pdf_extract_obj: ObjStm length is %zu bytes\n", objstm->length); + cli_dbgmsg("pdf_extract_obj: ObjStm should contain %zu objects\n", objstm->n); } } - sum = pdf_decodestream(pdf, obj, dparams, obj->stream, (uint32_t)length, xref, fout, &rc, objstm); + sum = pdf_decodestream(pdf, obj, dparams, obj->stream, length, xref, fout, &rc, objstm); if ((CL_SUCCESS != rc) && (CL_VIRUS != rc)) { cli_dbgmsg("Error decoding stream! Error code: %d\n", rc); @@ -3341,7 +3341,7 @@ cl_error_t pdf_find_and_parse_objs_in_objstm(struct pdf_struct *pdf, struct objs retval = pdf_findobj_in_objstm(pdf, objstm, &obj); if (retval != CL_SUCCESS) { if (retval != CL_BREAK) { - cli_dbgmsg("pdf_find_and_parse_objs_in_objstm: Fewer objects in stream than expected: %u found, %u expected.\n", + cli_dbgmsg("pdf_find_and_parse_objs_in_objstm: Fewer objects in stream than expected: %zu found, %zu expected.\n", objstm->nobjs_found, objstm->n); badobjects++; pdf->stats.ninvalidobjs++; diff --git a/libclamav/pdf.h b/libclamav/pdf.h index 3a03f19..b5b69ce 100644 --- a/libclamav/pdf.h +++ b/libclamav/pdf.h @@ -25,14 +25,14 @@ #define PDF_FILTERLIST_MAX 64 struct objstm_struct { - uint32_t first; // offset of first obj - uint32_t current; // offset of current obj - uint32_t current_pair; // offset of current pair describing id, location of object - uint32_t length; // total length of all objects (starting at first) - uint32_t n; // number of objects that should be found in the object stream - uint32_t nobjs_found; // number of objects actually found in the object stream - char *streambuf; // address of stream buffer, beginning with first obj pair - size_t streambuf_len; // length of stream buffer, includes pairs followed by actual objects + size_t first; // offset of first obj + size_t current; // offset of current obj + size_t current_pair; // offset of current pair describing id, location of object + size_t length; // total length of all objects (starting at first) + size_t n; // number of objects that should be found in the object stream + size_t nobjs_found; // number of objects actually found in the object stream + char *streambuf; // address of stream buffer, beginning with first obj pair + size_t streambuf_len; // length of stream buffer, includes pairs followed by actual objects }; struct pdf_obj { diff --git a/libclamav/pdfdecode.c b/libclamav/pdfdecode.c index 473cfcd..92ba52d 100644 --- a/libclamav/pdfdecode.c +++ b/libclamav/pdfdecode.c @@ -73,7 +73,7 @@ struct pdf_token { uint32_t flags; /* tracking flags */ uint32_t success; /* successfully decoded filters */ - uint32_t length; /* length of current content; TODO: transition to size_t */ + size_t length; /* length of current content; TODO: transition to size_t */ uint8_t *content; /* content stream */ }; @@ -461,10 +461,16 @@ static cl_error_t filter_ascii85decode(struct pdf_struct *pdf, struct pdf_obj *o uint32_t declen = 0; const uint8_t *ptr = (uint8_t *)token->content; - uint32_t remaining = token->length; + size_t remaining = token->length; int quintet = 0, rc = CL_SUCCESS; uint64_t sum = 0; + /* Check for overflow */ + if (remaining > (SIZE_MAX / 4)) { + cli_dbgmsg("cli_pdf: ascii85decode: overflow detected\n"); + return CL_EFORMAT; + } + /* 5:4 decoding ratio, with 1:4 expansion sequences => (4*length)+1 */ if (!(dptr = decoded = (uint8_t *)cli_malloc((4 * remaining) + 1))) { cli_errmsg("cli_pdf: cannot allocate memory for decoded output\n"); @@ -851,8 +857,8 @@ static cl_error_t filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj * uint8_t *decoded; const uint8_t *content = (uint8_t *)token->content; - uint32_t length = token->length; - uint32_t i, j; + size_t length = token->length; + size_t i, j; cl_error_t rc = CL_SUCCESS; if (!(decoded = (uint8_t *)cli_calloc(length / 2 + 1, sizeof(uint8_t)))) { @@ -882,8 +888,8 @@ static cl_error_t filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj * if (rc == CL_SUCCESS) { free(token->content); - cli_dbgmsg("cli_pdf: deflated %lu bytes from %lu total bytes\n", - (unsigned long)j, (unsigned long)(token->length)); + cli_dbgmsg("cli_pdf: deflated %zu bytes from %zu total bytes\n", + j, token->length); token->content = decoded; token->length = j; @@ -891,8 +897,8 @@ static cl_error_t filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj * if (!(obj->flags & ((1 << OBJ_IMAGE) | (1 << OBJ_TRUNCATED)))) pdfobj_flag(pdf, obj, BAD_ASCIIDECODE); - cli_dbgmsg("cli_pdf: error occurred parsing byte %lu of %lu\n", - (unsigned long)i, (unsigned long)(token->length)); + cli_dbgmsg("cli_pdf: error occurred parsing byte %zu of %zu\n", + i, token->length); free(decoded); } return rc; @@ -933,27 +939,29 @@ static cl_error_t filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, st return CL_EPARSE; /* TODO: what should this value be? CL_SUCCESS would mirror previous behavior */ } - cli_dbgmsg("cli_pdf: decrypted %zu bytes from %u total bytes\n", + cli_dbgmsg("cli_pdf: decrypted %zu bytes from %zu total bytes\n", length, token->length); free(token->content); token->content = (uint8_t *)decrypted; - token->length = (uint32_t)length; /* this may truncate unfortunately, TODO: use 64-bit values internally? */ + token->length = length; return CL_SUCCESS; } static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token) { uint8_t *decoded, *temp; - uint32_t declen = 0, capacity = 0; + size_t declen = 0, capacity = 0; uint8_t *content = (uint8_t *)token->content; uint32_t length = token->length; lzw_stream stream; int echg = 1, lzwstat, rc = CL_SUCCESS; - if (pdf->ctx && !(pdf->ctx->dconf->other & OTHER_CONF_LZW)) - return CL_BREAK; + if (pdf->ctx && !(pdf->ctx->dconf->other & OTHER_CONF_LZW)) { + rc = CL_BREAK; + goto done; + } if (params) { struct pdf_dict_node *node = params->nodes; @@ -984,15 +992,18 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, * Sample 0015315109, it has \r followed by zlib header. * Flag pdf as suspicious, and attempt to extract by skipping the \r. */ - if (!length) - return CL_SUCCESS; + if (!length) { + rc = CL_SUCCESS; + goto done; + } } capacity = INFLATE_CHUNK_SIZE; if (!(decoded = (uint8_t *)cli_malloc(capacity))) { cli_errmsg("cli_pdf: cannot allocate memory for decoded output\n"); - return CL_EMEM; + rc = CL_EMEM; + goto done; } memset(&stream, 0, sizeof(stream)); @@ -1007,7 +1018,8 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, if (lzwstat != Z_OK) { cli_warnmsg("cli_pdf: lzwInit failed\n"); free(decoded); - return CL_EMEM; + rc = CL_EMEM; + goto done; } /* initial inflate */ @@ -1022,16 +1034,23 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, length -= q - content; content = q; - stream.next_in = (Bytef *)content; - stream.avail_in = length; - stream.next_out = (Bytef *)decoded; + stream.next_in = (Bytef *)content; + stream.avail_in = length; + stream.next_out = (Bytef *)decoded; + /* Make sure we don't overflow during type conversion */ + if (capacity > UINT_MAX) { + cli_dbgmsg("cli_pdf: lzwdecode: overflow detected\n"); + rc = CL_EFORMAT; + goto done; + } stream.avail_out = capacity; lzwstat = lzwInit(&stream); if (lzwstat != Z_OK) { cli_warnmsg("cli_pdf: lzwInit failed\n"); free(decoded); - return CL_EMEM; + rc = CL_EMEM; + goto done; } pdfobj_flag(pdf, obj, BAD_FLATESTART); @@ -1044,7 +1063,7 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, /* extend output capacity if needed,*/ if (stream.avail_out == 0) { if ((rc = cli_checklimits("pdf", pdf->ctx, capacity + INFLATE_CHUNK_SIZE, 0, 0)) != CL_SUCCESS) { - cli_dbgmsg("cli_pdf: required buffer size to inflate compressed filter exceeds maximum: %u\n", capacity + INFLATE_CHUNK_SIZE); + cli_dbgmsg("cli_pdf: required buffer size to inflate compressed filter exceeds maximum: %zu\n", capacity + INFLATE_CHUNK_SIZE); break; } @@ -1056,7 +1075,17 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, decoded = temp; stream.next_out = decoded + capacity; stream.avail_out = INFLATE_CHUNK_SIZE; + if (declen > (SIZE_MAX - INFLATE_CHUNK_SIZE)) { + cli_dbgmsg("cli_pdf: lzwdecode: overflow detected\n"); + rc = CL_EFORMAT; + goto done; + } declen += INFLATE_CHUNK_SIZE; + if (capacity > (SIZE_MAX - INFLATE_CHUNK_SIZE)) { + cli_dbgmsg("cli_pdf: lzwdecode: overflow detected\n"); + rc = CL_EFORMAT; + goto done; + } capacity += INFLATE_CHUNK_SIZE; } @@ -1064,6 +1093,12 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, lzwstat = lzwInflate(&stream); } + if (declen > (UINT32_MAX - (INFLATE_CHUNK_SIZE - stream.avail_out))) { + cli_dbgmsg("cli_pdf: lzwdecode: overflow detected\n"); + rc = CL_EFORMAT; + goto done; + } + /* add stream end fragment to decoded length */ declen += (INFLATE_CHUNK_SIZE - stream.avail_out); @@ -1104,6 +1139,7 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, (void)lzwInflateEnd(&stream); +done: if (rc == CL_SUCCESS) { if (declen == 0) { cli_dbgmsg("cli_pdf: empty stream after inflation completed.\n");