183 lines
4.8 KiB
C
183 lines
4.8 KiB
C
/*
|
|
* LZW algorithm as used in the IBM .DSK floppy disk image format.
|
|
* Uses a fixed 12bit code size and an LRU dictionary entry replacement policy.
|
|
*/
|
|
|
|
#include "lzw.h"
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <sys/types.h>
|
|
|
|
void lzw_init(struct lzw_ctx* c)
|
|
{
|
|
int i;
|
|
|
|
c->coffset = c->doffset = c->codes = 0L;
|
|
c->input_buffer = 0;
|
|
c->dict[0].usecount = c->dict[0].length = 0;
|
|
c->dict[0].prefix = c->dict[0].lru_prev = c->dict[0].lru_next = 0;
|
|
for (i = 1; i <= 256; i++) {
|
|
c->dict[i].length = 1;
|
|
c->dict[i].prefix = 0;
|
|
c->dict[i].last = i - 1;
|
|
c->dict[i].first = i - 1;
|
|
c->dict[i].usecount = 1; /* so that this code will never end up in the lru list */
|
|
c->dict[i].lru_prev = c->dict[i].lru_next = 0;
|
|
}
|
|
for (i = 257; i < 4096; i++) {
|
|
c->dict[i].length = 0;
|
|
c->dict[i].prefix = 0;
|
|
c->dict[i].last = 0;
|
|
c->dict[i].first = 0;
|
|
c->dict[i].usecount = 0;
|
|
c->dict[i].lru_prev = (i == 257 ? 0 : i - 1);
|
|
c->dict[i].lru_next = (i == 4095 ? 0 : i + 1);
|
|
}
|
|
c->lru_head = 257;
|
|
c->lru_tail = 4095;
|
|
c->last_emitted_code = 0;
|
|
}
|
|
|
|
/* Remove code from the lru list */
|
|
static void lzw_lru_unlink(struct lzw_ctx* c, uint16_t code)
|
|
{
|
|
uint16_t next, prev;
|
|
|
|
next = c->dict[code].lru_next;
|
|
prev = c->dict[code].lru_prev;
|
|
if (prev != 0)
|
|
c->dict[prev].lru_next = next;
|
|
else
|
|
c->lru_head = next;
|
|
if (next != 0)
|
|
c->dict[next].lru_prev = prev;
|
|
else
|
|
c->lru_tail = prev;
|
|
}
|
|
|
|
/* Add the given code to the back of the lru list. The code must be removed from the list before calling this function. */
|
|
static void lzw_lru_append(struct lzw_ctx* c, uint16_t code)
|
|
{
|
|
c->dict[code].lru_next = 0;
|
|
c->dict[code].lru_prev = c->lru_tail;
|
|
c->dict[c->lru_tail].lru_next = code;
|
|
c->lru_tail = code;
|
|
}
|
|
|
|
#if 0
|
|
static void lzw_print_lru(struct lzw_ctx* c)
|
|
{
|
|
printf("head=%03x tail=%03x\n", c->lru_head, c->lru_tail);
|
|
for (int i = 0; i < 4096; i++)
|
|
printf("[%03x] next=%03x prev=%03x\n", i, c->dict[i].lru_next,
|
|
c->dict[i].lru_prev);
|
|
}
|
|
|
|
static void lzw_validate_lru(struct lzw_ctx* c)
|
|
{
|
|
for (uint16_t code = c->lru_head; code != 0; code = c->dict[code].lru_next) {
|
|
printf("%03x", code);
|
|
if (c->dict[code].usecount != 0)
|
|
putchar('!');
|
|
if (c->dict[code].lru_next != 0 && c->dict[c->dict[code].lru_next].lru_prev != code)
|
|
putchar('>');
|
|
putchar(' ');
|
|
}
|
|
putchar('\n');
|
|
}
|
|
#endif
|
|
|
|
static int lzw_build_entry(struct lzw_ctx* c, uint16_t code)
|
|
{
|
|
int pos;
|
|
|
|
pos = c->lru_head;
|
|
lzw_lru_unlink(c, pos);
|
|
|
|
if (c->dict[pos].prefix != 0) {
|
|
c->dict[c->dict[pos].prefix].usecount--;
|
|
if (c->dict[c->dict[pos].prefix].usecount == 0)
|
|
lzw_lru_append(c, c->dict[pos].prefix);
|
|
}
|
|
|
|
c->dict[pos].prefix = c->last_emitted_code;
|
|
c->dict[pos].last = c->dict[(code == pos ? c->dict[pos].prefix : code)].first;
|
|
c->dict[pos].first = c->dict[c->dict[pos].prefix].first;
|
|
c->dict[pos].length = c->dict[c->dict[pos].prefix].length + 1;
|
|
c->dict[pos].usecount = 0;
|
|
|
|
if (c->dict[c->dict[pos].prefix].usecount == 0)
|
|
lzw_lru_unlink(c, c->dict[pos].prefix);
|
|
c->dict[c->dict[pos].prefix].usecount++;
|
|
lzw_lru_append(c, pos);
|
|
|
|
return pos;
|
|
}
|
|
|
|
static void lzw_emit(struct lzw_ctx* c, uint16_t code, FILE* f)
|
|
{
|
|
int i;
|
|
uint16_t t;
|
|
|
|
for (i = c->dict[code].length, t = code; t != 0 && i > 0; i--, t = c->dict[t].prefix)
|
|
c->output_buffer[i - 1] = c->dict[t].last;
|
|
fwrite(c->output_buffer, c->dict[code].length, 1, f);
|
|
|
|
c->doffset += c->dict[code].length;
|
|
c->last_emitted_code = code;
|
|
}
|
|
|
|
static int lzw_get_next_code(struct lzw_ctx* c, FILE* f)
|
|
{
|
|
uint16_t result;
|
|
int next_byte;
|
|
|
|
next_byte = fgetc(f);
|
|
if (next_byte == EOF)
|
|
return -1;
|
|
c->coffset++;
|
|
|
|
if ((c->codes & 1) == 0) {
|
|
c->input_buffer = fgetc(f);
|
|
if (c->input_buffer == EOF)
|
|
return -1;
|
|
c->coffset++;
|
|
result = (next_byte << 4) | (c->input_buffer >> 4);
|
|
} else {
|
|
result = ((c->input_buffer & 0x0f) << 8) | next_byte;
|
|
}
|
|
|
|
c->codes++;
|
|
return result & 0x0fff;
|
|
}
|
|
|
|
int lzw_decompress_file(FILE* infp, FILE* outfp)
|
|
{
|
|
struct lzw_ctx* ctx;
|
|
int code;
|
|
|
|
ctx = (struct lzw_ctx*)malloc(sizeof(struct lzw_ctx));
|
|
if (ctx == NULL)
|
|
return -1;
|
|
|
|
lzw_init(ctx);
|
|
|
|
while ((code = lzw_get_next_code(ctx, infp)) != 0) {
|
|
if (ctx->last_emitted_code != 0)
|
|
lzw_build_entry(ctx, code);
|
|
lzw_emit(ctx, code, outfp);
|
|
}
|
|
|
|
/*
|
|
printf(
|
|
"\nEnd of file after reading %ld bytes (%ld symbols) and writing %ld "
|
|
"bytes\n",
|
|
ctx.coffset, ctx.codes, ctx.doffset);
|
|
*/
|
|
free(ctx);
|
|
|
|
return 0;
|
|
}
|