dsk/lzw.c

184 lines
5.6 KiB
C

/*
* LZW algorithm as used in the IBM .DSK floppy disk image format.
* Uses a fixed 12bit code size and an LRU dictionary entry replacement policy.
* The only special code is 0 and it's used to represent the end of the
* compressed stream. Single byte strings (0x00 to 0xff) occupy codes from
* 1 to 256.
*/
#include "lzw.h"
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
void lzw_init(struct lzw_ctx* c)
{
uint16_t i;
c->input_buffer = 0;
c->dict[0].usecount = 256;
c->dict[0].length = 0;
c->dict[0].prefix = c->dict[0].lru_prev = c->dict[0].lru_next = 0;
for (i = 1; i <= 256; i++) {
c->dict[i].length = 1;
c->dict[i].prefix = 0;
c->dict[i].last = i - 1;
c->dict[i].first = i - 1;
c->dict[i].usecount = 1; /* so that this code will never end up in the lru list */
c->dict[i].lru_prev = c->dict[i].lru_next = 0;
}
for (i = 257; i < 4096; i++) {
c->dict[i].length = 0;
c->dict[i].prefix = 0;
c->dict[i].last = 0;
c->dict[i].first = 0;
c->dict[i].usecount = 0;
c->dict[i].lru_prev = (i == 257 ? 0 : i - 1);
c->dict[i].lru_next = (i == 4095 ? 0 : i + 1);
}
c->lru_head = 257;
c->lru_tail = 4095;
c->last_emitted_code = 0;
c->output_buffer_start = 0;
c->output_buffer_used = 0;
c->input_code = c->input_code_bits = 0;
c->eos = 0;
}
/* Remove code from the lru list */
static void lzw_lru_unlink(struct lzw_ctx* c, uint16_t code)
{
uint16_t next, prev;
next = c->dict[code].lru_next;
prev = c->dict[code].lru_prev;
if (prev != 0)
c->dict[prev].lru_next = next;
else
c->lru_head = next;
if (next != 0)
c->dict[next].lru_prev = prev;
else
c->lru_tail = prev;
}
/* Add the given code to the back of the lru list. The code must be removed from the list before calling this function. */
static void lzw_lru_append(struct lzw_ctx* c, uint16_t code)
{
c->dict[code].lru_next = 0;
c->dict[code].lru_prev = c->lru_tail;
c->dict[c->lru_tail].lru_next = code;
c->lru_tail = code;
}
#ifdef DEBUG
static void lzw_print_lru(struct lzw_ctx* c)
{
printf("head=%03x tail=%03x\n", c->lru_head, c->lru_tail);
for (int i = 0; i < 4096; i++)
printf("[%03x] next=%03x prev=%03x\n", i, c->dict[i].lru_next,
c->dict[i].lru_prev);
}
static void lzw_validate_lru(struct lzw_ctx* c)
{
for (uint16_t code = c->lru_head; code != 0; code = c->dict[code].lru_next) {
printf("%03x", code);
if (c->dict[code].usecount != 0)
putchar('!');
if (c->dict[code].lru_next != 0 && c->dict[c->dict[code].lru_next].lru_prev != code)
putchar('>');
putchar(' ');
}
putchar('\n');
}
#endif
static int lzw_build_entry(struct lzw_ctx* c, uint16_t code)
{
uint16_t pos;
pos = c->lru_head;
lzw_lru_unlink(c, pos);
if (c->dict[pos].prefix != 0) {
c->dict[c->dict[pos].prefix].usecount--;
if (c->dict[c->dict[pos].prefix].usecount == 0)
lzw_lru_append(c, c->dict[pos].prefix);
}
c->dict[pos].prefix = c->last_emitted_code;
c->dict[pos].last = c->dict[(code == pos ? c->dict[pos].prefix : code)].first;
c->dict[pos].first = c->dict[c->dict[pos].prefix].first;
c->dict[pos].length = c->dict[c->dict[pos].prefix].length + 1;
c->dict[pos].usecount = 0;
if (c->dict[c->dict[pos].prefix].usecount == 0)
lzw_lru_unlink(c, c->dict[pos].prefix);
c->dict[c->dict[pos].prefix].usecount++;
lzw_lru_append(c, pos);
return pos;
}
int lzw_decompress(struct lzw_ctx* c, uint8_t* src, size_t* src_count, uint8_t* dst, size_t* dst_count)
{
int bytes_read = 0;
int bytes_written = 0;
uint16_t code;
uint16_t i, t;
if (c->eos != 0)
return 0;
/* First, flush out any remaining data in the string buffer */
while ((c->output_buffer_used > 0) && (*dst_count > 0)) {
dst[bytes_written++] = c->output_buffer[c->output_buffer_start];
c->output_buffer_start = (c->output_buffer_start + 1) % 4096;
c->output_buffer_used--;
--*dst_count;
}
while ((*src_count > 0) && (*dst_count > 0)) {
/* Get next 12bit code from the input buffer */
while ((c->input_code_bits < 12) && (*src_count > 0)) {
c->input_code = (c->input_code << 8) | src[bytes_read++];
c->input_code_bits += 8;
--*src_count;
}
if (c->input_code_bits < 12)
return bytes_written;
code = (c->input_code >> (c->input_code_bits - 12)) & 0x0fff;
c->input_code_bits -= 12;
if (code == 0) {
c->eos = 1;
return bytes_written;
}
/* Build the new dictionary entry */
if (c->last_emitted_code != 0)
lzw_build_entry(c, code);
/* Output the corresponding string */
c->output_buffer_start = 0;
for (i = c->dict[code].length, t = code; t != 0 && i != 0; i--, t = c->dict[t].prefix) {
if (i <= *dst_count)
dst[bytes_written + i - 1] = c->dict[t].last;
else
c->output_buffer[i - *dst_count - 1] = c->dict[t].last;
}
if (c->dict[code].length <= *dst_count) {
c->output_buffer_used = 0;
bytes_written += c->dict[code].length;
*dst_count -= c->dict[code].length;
} else {
c->output_buffer_used = c->dict[code].length - *dst_count;
bytes_written += *dst_count;
*dst_count = 0;
}
c->last_emitted_code = code;
}
return bytes_written;
}