/* * LZW algorithm as used in the IBM .DSK floppy disk image format. * Uses a fixed 12bit code size and an LRU dictionary entry replacement policy. * The only special code is 0 and it's used to represent the end of the * compressed stream. Single byte strings (0x00 to 0xff) occupy codes from * 1 to 256. */ #include "lzw.h" #include #include #include #include #include #include void lzw_init(struct lzw_ctx* c) { uint16_t i; c->input_buffer = 0; c->dict[0].usecount = 256; c->dict[0].length = 0; c->dict[0].prefix = c->dict[0].lru_prev = c->dict[0].lru_next = 0; for (i = 1; i <= 256; i++) { c->dict[i].length = 1; c->dict[i].prefix = 0; c->dict[i].last = i - 1; c->dict[i].first = i - 1; c->dict[i].usecount = 1; /* so that this code will never end up in the lru list */ c->dict[i].lru_prev = c->dict[i].lru_next = 0; } for (i = 257; i < 4096; i++) { c->dict[i].length = 0; c->dict[i].prefix = 0; c->dict[i].last = 0; c->dict[i].first = 0; c->dict[i].usecount = 0; c->dict[i].lru_prev = (i == 257 ? 0 : i - 1); c->dict[i].lru_next = (i == 4095 ? 0 : i + 1); } c->lru_head = 257; c->lru_tail = 4095; c->last_emitted_code = 0; c->output_buffer_start = 0; c->output_buffer_used = 0; c->input_code = c->input_code_bits = 0; c->uncompressed_bytes_processed = c->compressed_bytes_processed = c->codes_processed = 0; c->eos = 0; } /* Remove code from the lru list */ static void lzw_lru_unlink(struct lzw_ctx* c, uint16_t code) { uint16_t next, prev; next = c->dict[code].lru_next; prev = c->dict[code].lru_prev; if (prev != 0) c->dict[prev].lru_next = next; else c->lru_head = next; if (next != 0) c->dict[next].lru_prev = prev; else c->lru_tail = prev; } /* Add the given code to the back of the lru list. The code must be removed from the list before calling this function. */ static void lzw_lru_append(struct lzw_ctx* c, uint16_t code) { c->dict[code].lru_next = 0; c->dict[code].lru_prev = c->lru_tail; c->dict[c->lru_tail].lru_next = code; c->lru_tail = code; } #ifdef DEBUG static void lzw_print_lru(struct lzw_ctx* c) { printf("head=%03x tail=%03x\n", c->lru_head, c->lru_tail); for (int i = 0; i < 4096; i++) printf("[%03x] next=%03x prev=%03x\n", i, c->dict[i].lru_next, c->dict[i].lru_prev); } static void lzw_validate_lru(struct lzw_ctx* c) { for (uint16_t code = c->lru_head; code != 0; code = c->dict[code].lru_next) { printf("%03x", code); if (c->dict[code].usecount != 0) putchar('!'); if (c->dict[code].lru_next != 0 && c->dict[c->dict[code].lru_next].lru_prev != code) putchar('>'); putchar(' '); } putchar('\n'); } #endif static int lzw_build_entry(struct lzw_ctx* c, uint16_t code) { uint16_t pos; pos = c->lru_head; lzw_lru_unlink(c, pos); if (c->dict[pos].prefix != 0) { c->dict[c->dict[pos].prefix].usecount--; if (c->dict[c->dict[pos].prefix].usecount == 0) lzw_lru_append(c, c->dict[pos].prefix); } c->dict[pos].prefix = c->last_emitted_code; c->dict[pos].last = c->dict[(code == pos ? c->dict[pos].prefix : code)].first; c->dict[pos].first = c->dict[c->dict[pos].prefix].first; c->dict[pos].length = c->dict[c->dict[pos].prefix].length + 1; c->dict[pos].usecount = 0; if (c->dict[c->dict[pos].prefix].usecount == 0) lzw_lru_unlink(c, c->dict[pos].prefix); c->dict[c->dict[pos].prefix].usecount++; lzw_lru_append(c, pos); return pos; } #define MIN(a, b) ((a)<(b)?(a):(b)) int lzw_decompress(struct lzw_ctx* c, uint8_t* src, size_t src_size, size_t* src_consumed, uint8_t* dst, size_t dst_size, size_t* dst_consumed) { int bytes_written = 0; uint16_t code; uint16_t code_len, current_code; /* First, flush out any remaining data in the string buffer */ /* NOLINTBEGIN(clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling) */ if (c->output_buffer_used > 0) { bytes_written = MIN(c->output_buffer_used, dst_size - *dst_consumed); memcpy(&dst[*dst_consumed], &c->output_buffer[c->output_buffer_start], bytes_written); c->output_buffer_used -= bytes_written; c->output_buffer_start += bytes_written; c->uncompressed_bytes_processed += bytes_written; *dst_consumed += bytes_written; } if (*dst_consumed >= dst_size) return bytes_written; /* NOLINTEND(clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling) */ while (*src_consumed < src_size && *dst_consumed < dst_size && c->eos == 0) { /* Get next 12bit code from the input buffer */ while (c->input_code_bits < 12 && *src_consumed < src_size) { c->input_code = (c->input_code << 8) | src[(*src_consumed)++]; c->input_code_bits += 8; c->compressed_bytes_processed++; } if (c->input_code_bits < 12) return bytes_written; code = (c->input_code >> (c->input_code_bits - 12)) & 0x0fff; c->input_code_bits -= 12; c->codes_processed++; if (code == 0) { c->eos = 1; return bytes_written; } /* Build the new dictionary entry */ if (c->last_emitted_code != 0) lzw_build_entry(c, code); /* Output the corresponding string */ c->output_buffer_start = 0; for (code_len = c->dict[code].length, current_code = code; current_code != 0 && code_len != 0; code_len--, current_code = c->dict[current_code].prefix) { if (code_len <= dst_size - *dst_consumed) { dst[*dst_consumed + code_len - 1] = c->dict[current_code].last; bytes_written++; c->uncompressed_bytes_processed++; } else { c->output_buffer[code_len - (dst_size - *dst_consumed) - 1] = c->dict[current_code].last; c->output_buffer_used++; } } if (c->dict[code].length < dst_size - *dst_consumed) *dst_consumed += c->dict[code].length; else *dst_consumed = dst_size; c->last_emitted_code = code; } return bytes_written; }