Rework LZW code to work on memory rather than files

This commit is contained in:
Maurizio Porrato 2023-10-19 23:45:44 +01:00
parent e272387f8d
commit adf779af8f
6 changed files with 289 additions and 189 deletions

View File

@ -1,5 +1,5 @@
CC = clang
CFLAGS ?= -Wall -pedantic -std=c89 -O0 -g
CFLAGS ?= -Wall -Wextra -pedantic -std=c89 -Og -g
STRIP = strip
FORMAT = clang-format -i
BIN = dsk2img
@ -20,4 +20,4 @@ clean:
$(RM) $(BIN) $(TESTS) *.o *~ *%
format:
$(FORMAT) *.c
$(FORMAT) *.c *.h

View File

@ -2,6 +2,7 @@
export WATCOM=/opt/ow
PATH="$WATCOM/binl:$PATH"
export INCLUDE="$WATCOM/h"
CFLAGS=(-Wextra -std=c89 -O3 -g0 -s)
CFILES=(dsk2img.c lzw.c)

300
dsk2img.c
View File

@ -19,6 +19,9 @@
#define DIRSEP '/'
#endif
#define READ_BUFFER_SIZE 4096
#define DECOMPRESS_BUFFER_SIZE 4096
#pragma pack(1)
struct dskheader {
uint16_t magic; /* magic identifier (0x58aa, 0x59aa or 0x5aaa) */
@ -127,8 +130,7 @@ void pad_file_to_size(FILE* f, unsigned long size)
int valid_header(const struct dskheader* h)
{
unsigned int final_sectors;
unsigned long int final_bytes;
unsigned long int final_sectors, cluster_sectors, final_bytes;
if (!(h->magic == MAGIC_DSK_COMPRESSED || h->magic == MAGIC_DSK_UNCOMPRESSED || h->magic == MAGIC_DSK_OLD))
return 0;
@ -144,6 +146,7 @@ int valid_header(const struct dskheader* h)
return 0;
final_sectors = h->cylinders * h->heads * h->sectors;
cluster_sectors = ((unsigned long int)h->imageclusters) << h->clustershift;
final_bytes = final_sectors * h->sectorsize;
if (final_bytes > 4000000UL)
@ -153,7 +156,7 @@ int valid_header(const struct dskheader* h)
return 0;
if (h->reservedsectors + h->fatcopies * h->sectorsperfat != h->rootdirsector)
return 0;
if (h->firstclustersector - 1 + (h->imageclusters << h->clustershift) != final_sectors)
if (h->firstclustersector - 1 + cluster_sectors != final_sectors)
return 0;
if (h->firstclustersector != h->reservedsectors + h->fatcopies * h->sectorsperfat + h->rootentries * 32 / h->sectorsize)
return 0;
@ -165,7 +168,7 @@ int valid_header(const struct dskheader* h)
return 1;
}
#if 0
#ifdef DEBUG
void dump_dsk_header(struct dskheader* h)
{
printf("=== header =====================================\n");
@ -193,22 +196,163 @@ void dump_dsk_header(struct dskheader* h)
}
#endif
void update_checksum(uint32_t* sum, uint8_t* buffer, size_t size)
{
uint16_t* p = (uint16_t*)buffer;
size_t i;
for (i = 0; i < size / 2; i++)
*sum += p[i];
if (size & 1)
*sum += buffer[size - 1] << 8;
}
int copy_image_data(FILE* fin, FILE* fout, size_t size, uint32_t* checksum)
{
unsigned long int copied_bytes;
size_t rres, wres;
uint8_t* buffer = NULL;
buffer = (uint8_t*)malloc(READ_BUFFER_SIZE);
if (buffer == NULL) {
perror("malloc()");
return -1;
}
for (copied_bytes = 0; copied_bytes < size;) {
rres = fread(buffer, 1, READ_BUFFER_SIZE, fin);
if (rres == 0) {
perror("fread()");
free(buffer);
return -2;
}
update_checksum(checksum, buffer, rres);
wres = fwrite(buffer, 1, rres, fout);
if (rres != wres) {
perror("fwrite()");
free(buffer);
return -3;
}
copied_bytes += rres;
}
free(buffer);
return 0;
}
int copy_compressed_image_data(FILE* fin, FILE* fout)
{
size_t rres, wres;
uint8_t* buffer = NULL;
buffer = (uint8_t*)malloc(READ_BUFFER_SIZE);
if (buffer == NULL) {
perror("malloc()");
return -1;
}
for (;;) {
rres = fread(buffer, 1, READ_BUFFER_SIZE, fin);
if (rres == 0) {
perror("fread()");
free(buffer);
return -2;
}
wres = fwrite(buffer, 1, rres, fout);
if (rres != wres) {
perror("fwrite()");
free(buffer);
return -3;
}
if (rres < READ_BUFFER_SIZE)
break;
}
free(buffer);
return 0;
}
int decompress_image_data(FILE* fin, FILE* fout, size_t size, uint32_t* checksum)
{
unsigned long int copied_bytes = 0;
size_t to_decompress, space;
uint8_t *buffer, *decompress_buffer;
uint8_t *pin, *pout;
struct lzw_ctx* ctx = NULL;
buffer = (uint8_t*)malloc(READ_BUFFER_SIZE);
if (buffer == NULL) {
perror("malloc()");
return -1;
}
decompress_buffer = (uint8_t*)malloc(DECOMPRESS_BUFFER_SIZE);
if (decompress_buffer == NULL) {
perror("malloc()");
free(buffer);
return -2;
}
ctx = (struct lzw_ctx*)malloc(sizeof(struct lzw_ctx));
if (ctx == NULL) {
perror("malloc()");
free(buffer);
free(decompress_buffer);
return -3;
}
lzw_init(ctx);
to_decompress = 0;
space = DECOMPRESS_BUFFER_SIZE;
pin = buffer;
pout = decompress_buffer;
while ((ctx->eos == 0) && (copied_bytes < size)) {
if (to_decompress == 0) {
to_decompress = fread(buffer, 1, READ_BUFFER_SIZE, fin);
pin = buffer;
}
if (space == 0) {
update_checksum(checksum, decompress_buffer, DECOMPRESS_BUFFER_SIZE);
fwrite(decompress_buffer, 1, DECOMPRESS_BUFFER_SIZE, fout);
copied_bytes += DECOMPRESS_BUFFER_SIZE;
space = DECOMPRESS_BUFFER_SIZE;
pout = decompress_buffer;
}
lzw_decompress(ctx, pin, &to_decompress, pout, &space);
pin = buffer + READ_BUFFER_SIZE - to_decompress;
pout = decompress_buffer + DECOMPRESS_BUFFER_SIZE - space;
}
if (space < DECOMPRESS_BUFFER_SIZE) {
update_checksum(checksum, decompress_buffer, DECOMPRESS_BUFFER_SIZE - space);
fwrite(decompress_buffer, 1, DECOMPRESS_BUFFER_SIZE - space, fout);
copied_bytes += DECOMPRESS_BUFFER_SIZE - space;
}
if (copied_bytes != size)
printf("WARNING: Decompressed image size (%lu) does not match expected (%lu)", copied_bytes, size);
free(buffer);
free(decompress_buffer);
free(ctx);
return 0;
}
void dsk2img(const char* filename, int no_lzw)
{
FILE *inf, *outf;
FILE *inf = NULL, *outf = NULL;
struct dskheader header;
char comment[600];
char *c, *outname;
char *c, *outname = NULL;
unsigned long rres;
int sres;
unsigned long int final_sectors, image_sectors, sector;
unsigned long int final_size;
unsigned long int final_sectors;
unsigned long int final_size, saved_size;
long int start_offset;
uint8_t* buffer;
uint32_t checksum;
int i;
int type_geom;
char *rbuf = NULL, *wbuf = NULL;
char *rbuf = NULL, *wbuf = NULL; /* for setvbuf() */
inf = fopen(filename, "rb");
if (inf == NULL)
@ -223,24 +367,21 @@ void dsk2img(const char* filename, int no_lzw)
rres = fread(&header, sizeof(header), 1, inf);
if (rres != 1) {
printf("Short read\n");
fclose(inf);
if (rbuf)
free(rbuf);
return;
goto done;
}
/* dump_dsk_header(&header); */
#ifdef DEBUG
dump_dsk_header(&header);
#endif
if (!valid_header(&header)) {
fclose(inf);
puts("Not a valid DSK file!");
if (rbuf)
free(rbuf);
return;
goto done;
}
final_sectors = header.cylinders * header.heads * header.sectors;
final_size = final_sectors * header.sectorsize;
saved_size = header.imagesectors * header.sectorsize;
printf("DSK format: ");
switch (header.magic) {
@ -264,19 +405,13 @@ void dsk2img(const char* filename, int no_lzw)
sres = fseek(inf, header.commentoffset, SEEK_SET);
if (sres != 0) {
perror("comment seek()");
fclose(inf);
if (rbuf)
free(rbuf);
return;
goto done;
}
c = fgets(comment, sizeof(comment), inf);
if (c == NULL) {
perror("fgets()");
fclose(inf);
if (rbuf)
free(rbuf);
return;
goto done;
}
if (strlen(c) > 0)
printf("comment: %s", c);
@ -285,10 +420,7 @@ void dsk2img(const char* filename, int no_lzw)
sres = fseek(inf, start_offset, SEEK_SET);
if (sres != 0) {
perror("start seek()");
fclose(inf);
if (rbuf)
free(rbuf);
return;
goto done;
}
outname = guess_output_filename(filename, (header.magic == MAGIC_DSK_COMPRESSED) && (no_lzw != 0));
@ -296,11 +428,7 @@ void dsk2img(const char* filename, int no_lzw)
outf = fopen(outname, "wb+");
if (outf == NULL) {
perror("fopen()");
fclose(inf);
free(outname);
if (rbuf)
free(rbuf);
return;
goto done;
}
wbuf = (char*)malloc(4096);
@ -309,94 +437,32 @@ void dsk2img(const char* filename, int no_lzw)
else
printf("WARNING: can't allocate write buffer.");
buffer = (uint8_t*)malloc(header.sectorsize);
if (buffer == NULL) {
perror("malloc()");
fclose(outf);
fclose(inf);
free(outname);
if (rbuf)
free(rbuf);
if (wbuf)
free(wbuf);
return;
}
/* image_sectors = (header.imageclusters << header.clustershift) +
header.firstclustersector - 1; */
image_sectors = header.imagesectors;
checksum = 0;
if (header.magic != MAGIC_DSK_COMPRESSED) {
for (sector = 0; sector < image_sectors; sector++) {
rres = fread(buffer, header.sectorsize, 1, inf);
if (rres != 1) {
perror("fread()");
fclose(outf);
fclose(inf);
free(buffer);
free(outname);
if (rbuf)
free(rbuf);
if (wbuf)
free(wbuf);
return;
}
for (i = 0; i < header.sectorsize / 2; i++)
checksum += buffer[i * 2] + 256 * buffer[i * 2 + 1];
rres = fwrite(buffer, header.sectorsize, 1, outf);
if (rres != 1) {
perror("fwrite()");
fclose(outf);
fclose(inf);
free(buffer);
free(outname);
if (rbuf)
free(rbuf);
if (wbuf)
free(wbuf);
return;
}
}
pad_file_to_size(outf, final_size);
if (checksum != header.checksum)
printf("ERROR: image checksum does not match. Expected 0x%08x but got 0x%08x\n", header.checksum, checksum);
else
puts("Done.");
if (copy_image_data(inf, outf, saved_size, &checksum) < 0)
goto done;
} else {
if (no_lzw) {
size_t wres;
for (sector = 0;; sector++) {
rres = fread(buffer, 1, header.sectorsize, inf);
for (i = 0; i < rres; i++)
checksum += buffer[i];
wres = fwrite(buffer, 1, rres, outf);
if (wres != rres) {
perror("fwrite()");
fclose(outf);
fclose(inf);
free(buffer);
free(outname);
if (rbuf)
free(rbuf);
if (wbuf)
free(wbuf);
return;
}
if (rres < header.sectorsize)
break;
}
copy_compressed_image_data(inf, outf);
goto done;
} else {
/* TODO: checksum */
lzw_decompress_file(inf, outf);
pad_file_to_size(outf, final_size);
if (decompress_image_data(inf, outf, saved_size, &checksum) < 0)
goto done;
}
}
pad_file_to_size(outf, final_size);
if (checksum != header.checksum)
printf("ERROR: image checksum does not match. Expected 0x%08x but got 0x%08x\n", header.checksum, checksum);
else
puts("Done.");
fclose(outf);
fclose(inf);
free(buffer);
free(outname);
done:
if (outf)
fclose(outf);
if (inf)
fclose(inf);
if (outname)
free(outname);
if (rbuf)
free(rbuf);
if (wbuf)

127
lzw.c
View File

@ -1,9 +1,13 @@
/*
* LZW algorithm as used in the IBM .DSK floppy disk image format.
* Uses a fixed 12bit code size and an LRU dictionary entry replacement policy.
* The only special code is 0 and it's used to represent the end of the
* compressed stream. Single byte strings (0x00 to 0xff) occupy codes from
* 1 to 256.
*/
#include "lzw.h"
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
@ -12,11 +16,11 @@
void lzw_init(struct lzw_ctx* c)
{
int i;
uint16_t i;
c->coffset = c->doffset = c->codes = 0L;
c->input_buffer = 0;
c->dict[0].usecount = c->dict[0].length = 0;
c->dict[0].usecount = 256;
c->dict[0].length = 0;
c->dict[0].prefix = c->dict[0].lru_prev = c->dict[0].lru_next = 0;
for (i = 1; i <= 256; i++) {
c->dict[i].length = 1;
@ -38,6 +42,10 @@ void lzw_init(struct lzw_ctx* c)
c->lru_head = 257;
c->lru_tail = 4095;
c->last_emitted_code = 0;
c->output_buffer_start = 0;
c->output_buffer_used = 0;
c->input_code = c->input_code_bits = 0;
c->eos = 0;
}
/* Remove code from the lru list */
@ -66,7 +74,7 @@ static void lzw_lru_append(struct lzw_ctx* c, uint16_t code)
c->lru_tail = code;
}
#if 0
#ifdef DEBUG
static void lzw_print_lru(struct lzw_ctx* c)
{
printf("head=%03x tail=%03x\n", c->lru_head, c->lru_tail);
@ -91,7 +99,7 @@ static void lzw_validate_lru(struct lzw_ctx* c)
static int lzw_build_entry(struct lzw_ctx* c, uint16_t code)
{
int pos;
uint16_t pos;
pos = c->lru_head;
lzw_lru_unlink(c, pos);
@ -116,67 +124,60 @@ static int lzw_build_entry(struct lzw_ctx* c, uint16_t code)
return pos;
}
static void lzw_emit(struct lzw_ctx* c, uint16_t code, FILE* f)
int lzw_decompress(struct lzw_ctx* c, uint8_t* src, size_t* src_count, uint8_t* dst, size_t* dst_count)
{
int i;
uint16_t t;
int bytes_read = 0;
int bytes_written = 0;
uint16_t code;
uint16_t i, t;
for (i = c->dict[code].length, t = code; t != 0 && i > 0; i--, t = c->dict[t].prefix)
c->output_buffer[i - 1] = c->dict[t].last;
fwrite(c->output_buffer, c->dict[code].length, 1, f);
if (c->eos != 0)
return 0;
c->doffset += c->dict[code].length;
c->last_emitted_code = code;
}
static int lzw_get_next_code(struct lzw_ctx* c, FILE* f)
{
uint16_t result;
int next_byte;
next_byte = fgetc(f);
if (next_byte == EOF)
return -1;
c->coffset++;
if ((c->codes & 1) == 0) {
c->input_buffer = fgetc(f);
if (c->input_buffer == EOF)
return -1;
c->coffset++;
result = (next_byte << 4) | (c->input_buffer >> 4);
} else {
result = ((c->input_buffer & 0x0f) << 8) | next_byte;
/* First, flush out any remaining data in the string buffer */
while ((c->output_buffer_used > 0) && (*dst_count > 0)) {
dst[bytes_written++] = c->output_buffer[c->output_buffer_start];
c->output_buffer_start = (c->output_buffer_start + 1) % 4096;
c->output_buffer_used--;
--*dst_count;
}
while ((*src_count > 0) && (*dst_count > 0)) {
/* Get next 12bit code from the input buffer */
while ((c->input_code_bits < 12) && (*src_count > 0)) {
c->input_code = (c->input_code << 8) | src[bytes_read++];
c->input_code_bits += 8;
--*src_count;
}
if (c->input_code_bits < 12)
return bytes_written;
code = (c->input_code >> (c->input_code_bits - 12)) & 0x0fff;
c->input_code_bits -= 12;
if (code == 0) {
c->eos = 1;
return bytes_written;
}
/* Build the new dictionary entry */
if (c->last_emitted_code != 0)
lzw_build_entry(c, code);
/* Output the corresponding string */
c->output_buffer_start = 0;
for (i = c->dict[code].length, t = code; t != 0 && i != 0; i--, t = c->dict[t].prefix) {
if (i <= *dst_count)
dst[bytes_written + i - 1] = c->dict[t].last;
else
c->output_buffer[i - *dst_count - 1] = c->dict[t].last;
}
if (c->dict[code].length <= *dst_count) {
c->output_buffer_used = 0;
bytes_written += c->dict[code].length;
*dst_count -= c->dict[code].length;
} else {
c->output_buffer_used = c->dict[code].length - *dst_count;
bytes_written += *dst_count;
*dst_count = 0;
}
c->last_emitted_code = code;
}
c->codes++;
return result & 0x0fff;
}
int lzw_decompress_file(FILE* infp, FILE* outfp)
{
struct lzw_ctx* ctx;
int code;
ctx = (struct lzw_ctx*)malloc(sizeof(struct lzw_ctx));
if (ctx == NULL)
return -1;
lzw_init(ctx);
while ((code = lzw_get_next_code(ctx, infp)) != 0) {
if (ctx->last_emitted_code != 0)
lzw_build_entry(ctx, code);
lzw_emit(ctx, code, outfp);
}
/*
printf(
"\nEnd of file after reading %ld bytes (%ld symbols) and writing %ld "
"bytes\n",
ctx.coffset, ctx.codes, ctx.doffset);
*/
free(ctx);
return 0;
return bytes_written;
}

10
lzw.h
View File

@ -18,16 +18,18 @@ struct lzw_dict_entry {
struct lzw_ctx {
struct lzw_dict_entry dict[4096]; /* Dictionary entries */
uint8_t output_buffer[4096]; /* Temporary buffer used to collect the byte string corresponding to a code */
size_t coffset; /* Position in the compressed stream */
size_t doffset; /* Position in the decompressed stream */
size_t codes; /* Number of codes processed so far */
int eos; /* Flag indicating that the end of stream code was encountered */
int input_buffer; /* Temporary buffer used in the extraction of 12bit codes from the compressed stream */
uint16_t lru_head; /* Index of the first entry in the lru list */
uint16_t lru_tail; /* Index of the last entry in the lru list */
uint16_t last_emitted_code; /* Code emitted in the previous round */
uint16_t output_buffer_start;
uint16_t output_buffer_used;
uint16_t input_code;
uint16_t input_code_bits;
};
void lzw_init(struct lzw_ctx* c);
int lzw_decompress_file(FILE* infp, FILE* outfp);
int lzw_decompress(struct lzw_ctx* c, uint8_t* src, size_t* src_count, uint8_t* dst, size_t* dst_count);
#endif

View File

@ -1,14 +1,25 @@
#include "lzw.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int decompress(char* filename)
int decompress_file(char* filename)
{
char* outfilename;
FILE *fp, *outfp;
int i;
unsigned int ifnlen, ofnlen;
struct lzw_ctx* ctx;
uint8_t bufin[4096], bufout[4096];
uint8_t *pin, *pout;
size_t to_decompress, space;
int res;
ctx = (struct lzw_ctx*)malloc(sizeof(struct lzw_ctx));
if (ctx == NULL)
return 0;
lzw_init(ctx);
fp = fopen(filename, "rb");
if (fp == NULL) {
@ -38,7 +49,26 @@ int decompress(char* filename)
return -1;
}
lzw_decompress_file(fp, outfp);
to_decompress = 0;
space = sizeof(bufout);
pout = bufout;
while (ctx->eos == 0) {
if (to_decompress == 0) {
to_decompress = fread(bufin, 1, sizeof(bufin), fp);
pin = bufin;
}
if (space == 0) {
fwrite(bufout, 1, sizeof(bufout), outfp);
space = sizeof(bufout);
pout = bufout;
}
res = lzw_decompress(ctx, pin, &to_decompress, pout, &space);
pin = bufin + sizeof(bufin) - to_decompress;
pout = bufout + sizeof(bufout) - space;
}
if (space < sizeof(bufout))
fwrite(bufout, 1, sizeof(bufout) - space, outfp);
free(outfilename);
fclose(outfp);
fclose(fp);
@ -52,7 +82,7 @@ int main(int argc, char* argv[])
for (i = 1; i < argc; i++) {
printf("Decompressing %s\n", argv[i]);
decompress(argv[i]);
decompress_file(argv[i]);
}
return EXIT_SUCCESS;