From adf779af8fa6b3be28563717c660077eeb13ff73 Mon Sep 17 00:00:00 2001 From: Maurizio Porrato Date: Thu, 19 Oct 2023 23:45:44 +0100 Subject: [PATCH] Rework LZW code to work on memory rather than files --- Makefile | 4 +- build-legacy.sh | 1 + dsk2img.c | 300 +++++++++++++++++++++++++++++------------------- lzw.c | 127 ++++++++++---------- lzw.h | 10 +- test-lzw.c | 36 +++++- 6 files changed, 289 insertions(+), 189 deletions(-) diff --git a/Makefile b/Makefile index 4cf324a..e690944 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ CC = clang -CFLAGS ?= -Wall -pedantic -std=c89 -O0 -g +CFLAGS ?= -Wall -Wextra -pedantic -std=c89 -Og -g STRIP = strip FORMAT = clang-format -i BIN = dsk2img @@ -20,4 +20,4 @@ clean: $(RM) $(BIN) $(TESTS) *.o *~ *% format: - $(FORMAT) *.c + $(FORMAT) *.c *.h diff --git a/build-legacy.sh b/build-legacy.sh index b6128a4..949759e 100755 --- a/build-legacy.sh +++ b/build-legacy.sh @@ -2,6 +2,7 @@ export WATCOM=/opt/ow PATH="$WATCOM/binl:$PATH" +export INCLUDE="$WATCOM/h" CFLAGS=(-Wextra -std=c89 -O3 -g0 -s) CFILES=(dsk2img.c lzw.c) diff --git a/dsk2img.c b/dsk2img.c index 129333a..cfdd0e1 100644 --- a/dsk2img.c +++ b/dsk2img.c @@ -19,6 +19,9 @@ #define DIRSEP '/' #endif +#define READ_BUFFER_SIZE 4096 +#define DECOMPRESS_BUFFER_SIZE 4096 + #pragma pack(1) struct dskheader { uint16_t magic; /* magic identifier (0x58aa, 0x59aa or 0x5aaa) */ @@ -127,8 +130,7 @@ void pad_file_to_size(FILE* f, unsigned long size) int valid_header(const struct dskheader* h) { - unsigned int final_sectors; - unsigned long int final_bytes; + unsigned long int final_sectors, cluster_sectors, final_bytes; if (!(h->magic == MAGIC_DSK_COMPRESSED || h->magic == MAGIC_DSK_UNCOMPRESSED || h->magic == MAGIC_DSK_OLD)) return 0; @@ -144,6 +146,7 @@ int valid_header(const struct dskheader* h) return 0; final_sectors = h->cylinders * h->heads * h->sectors; + cluster_sectors = ((unsigned long int)h->imageclusters) << h->clustershift; final_bytes = final_sectors * h->sectorsize; if (final_bytes > 4000000UL) @@ -153,7 +156,7 @@ int valid_header(const struct dskheader* h) return 0; if (h->reservedsectors + h->fatcopies * h->sectorsperfat != h->rootdirsector) return 0; - if (h->firstclustersector - 1 + (h->imageclusters << h->clustershift) != final_sectors) + if (h->firstclustersector - 1 + cluster_sectors != final_sectors) return 0; if (h->firstclustersector != h->reservedsectors + h->fatcopies * h->sectorsperfat + h->rootentries * 32 / h->sectorsize) return 0; @@ -165,7 +168,7 @@ int valid_header(const struct dskheader* h) return 1; } -#if 0 +#ifdef DEBUG void dump_dsk_header(struct dskheader* h) { printf("=== header =====================================\n"); @@ -193,22 +196,163 @@ void dump_dsk_header(struct dskheader* h) } #endif +void update_checksum(uint32_t* sum, uint8_t* buffer, size_t size) +{ + uint16_t* p = (uint16_t*)buffer; + size_t i; + + for (i = 0; i < size / 2; i++) + *sum += p[i]; + + if (size & 1) + *sum += buffer[size - 1] << 8; +} + +int copy_image_data(FILE* fin, FILE* fout, size_t size, uint32_t* checksum) +{ + unsigned long int copied_bytes; + size_t rres, wres; + uint8_t* buffer = NULL; + + buffer = (uint8_t*)malloc(READ_BUFFER_SIZE); + if (buffer == NULL) { + perror("malloc()"); + return -1; + } + for (copied_bytes = 0; copied_bytes < size;) { + rres = fread(buffer, 1, READ_BUFFER_SIZE, fin); + if (rres == 0) { + perror("fread()"); + free(buffer); + return -2; + } + update_checksum(checksum, buffer, rres); + wres = fwrite(buffer, 1, rres, fout); + if (rres != wres) { + perror("fwrite()"); + free(buffer); + return -3; + } + copied_bytes += rres; + } + + free(buffer); + return 0; +} + +int copy_compressed_image_data(FILE* fin, FILE* fout) +{ + size_t rres, wres; + uint8_t* buffer = NULL; + + buffer = (uint8_t*)malloc(READ_BUFFER_SIZE); + if (buffer == NULL) { + perror("malloc()"); + return -1; + } + + for (;;) { + rres = fread(buffer, 1, READ_BUFFER_SIZE, fin); + if (rres == 0) { + perror("fread()"); + free(buffer); + return -2; + } + wres = fwrite(buffer, 1, rres, fout); + if (rres != wres) { + perror("fwrite()"); + free(buffer); + return -3; + } + if (rres < READ_BUFFER_SIZE) + break; + } + + free(buffer); + return 0; +} + +int decompress_image_data(FILE* fin, FILE* fout, size_t size, uint32_t* checksum) +{ + unsigned long int copied_bytes = 0; + size_t to_decompress, space; + uint8_t *buffer, *decompress_buffer; + uint8_t *pin, *pout; + struct lzw_ctx* ctx = NULL; + + buffer = (uint8_t*)malloc(READ_BUFFER_SIZE); + if (buffer == NULL) { + perror("malloc()"); + return -1; + } + + decompress_buffer = (uint8_t*)malloc(DECOMPRESS_BUFFER_SIZE); + if (decompress_buffer == NULL) { + perror("malloc()"); + free(buffer); + return -2; + } + + ctx = (struct lzw_ctx*)malloc(sizeof(struct lzw_ctx)); + if (ctx == NULL) { + perror("malloc()"); + free(buffer); + free(decompress_buffer); + return -3; + } + + lzw_init(ctx); + + to_decompress = 0; + space = DECOMPRESS_BUFFER_SIZE; + pin = buffer; + pout = decompress_buffer; + while ((ctx->eos == 0) && (copied_bytes < size)) { + if (to_decompress == 0) { + to_decompress = fread(buffer, 1, READ_BUFFER_SIZE, fin); + pin = buffer; + } + if (space == 0) { + update_checksum(checksum, decompress_buffer, DECOMPRESS_BUFFER_SIZE); + fwrite(decompress_buffer, 1, DECOMPRESS_BUFFER_SIZE, fout); + copied_bytes += DECOMPRESS_BUFFER_SIZE; + space = DECOMPRESS_BUFFER_SIZE; + pout = decompress_buffer; + } + lzw_decompress(ctx, pin, &to_decompress, pout, &space); + pin = buffer + READ_BUFFER_SIZE - to_decompress; + pout = decompress_buffer + DECOMPRESS_BUFFER_SIZE - space; + } + if (space < DECOMPRESS_BUFFER_SIZE) { + update_checksum(checksum, decompress_buffer, DECOMPRESS_BUFFER_SIZE - space); + fwrite(decompress_buffer, 1, DECOMPRESS_BUFFER_SIZE - space, fout); + copied_bytes += DECOMPRESS_BUFFER_SIZE - space; + } + + if (copied_bytes != size) + printf("WARNING: Decompressed image size (%lu) does not match expected (%lu)", copied_bytes, size); + + free(buffer); + free(decompress_buffer); + free(ctx); + + return 0; +} + void dsk2img(const char* filename, int no_lzw) { - FILE *inf, *outf; + FILE *inf = NULL, *outf = NULL; struct dskheader header; char comment[600]; - char *c, *outname; + char *c, *outname = NULL; unsigned long rres; int sres; - unsigned long int final_sectors, image_sectors, sector; - unsigned long int final_size; + unsigned long int final_sectors; + unsigned long int final_size, saved_size; long int start_offset; - uint8_t* buffer; uint32_t checksum; - int i; int type_geom; - char *rbuf = NULL, *wbuf = NULL; + char *rbuf = NULL, *wbuf = NULL; /* for setvbuf() */ inf = fopen(filename, "rb"); if (inf == NULL) @@ -223,24 +367,21 @@ void dsk2img(const char* filename, int no_lzw) rres = fread(&header, sizeof(header), 1, inf); if (rres != 1) { printf("Short read\n"); - fclose(inf); - if (rbuf) - free(rbuf); - return; + goto done; } - /* dump_dsk_header(&header); */ +#ifdef DEBUG + dump_dsk_header(&header); +#endif if (!valid_header(&header)) { - fclose(inf); puts("Not a valid DSK file!"); - if (rbuf) - free(rbuf); - return; + goto done; } final_sectors = header.cylinders * header.heads * header.sectors; final_size = final_sectors * header.sectorsize; + saved_size = header.imagesectors * header.sectorsize; printf("DSK format: "); switch (header.magic) { @@ -264,19 +405,13 @@ void dsk2img(const char* filename, int no_lzw) sres = fseek(inf, header.commentoffset, SEEK_SET); if (sres != 0) { perror("comment seek()"); - fclose(inf); - if (rbuf) - free(rbuf); - return; + goto done; } c = fgets(comment, sizeof(comment), inf); if (c == NULL) { perror("fgets()"); - fclose(inf); - if (rbuf) - free(rbuf); - return; + goto done; } if (strlen(c) > 0) printf("comment: %s", c); @@ -285,10 +420,7 @@ void dsk2img(const char* filename, int no_lzw) sres = fseek(inf, start_offset, SEEK_SET); if (sres != 0) { perror("start seek()"); - fclose(inf); - if (rbuf) - free(rbuf); - return; + goto done; } outname = guess_output_filename(filename, (header.magic == MAGIC_DSK_COMPRESSED) && (no_lzw != 0)); @@ -296,11 +428,7 @@ void dsk2img(const char* filename, int no_lzw) outf = fopen(outname, "wb+"); if (outf == NULL) { perror("fopen()"); - fclose(inf); - free(outname); - if (rbuf) - free(rbuf); - return; + goto done; } wbuf = (char*)malloc(4096); @@ -309,94 +437,32 @@ void dsk2img(const char* filename, int no_lzw) else printf("WARNING: can't allocate write buffer."); - buffer = (uint8_t*)malloc(header.sectorsize); - if (buffer == NULL) { - perror("malloc()"); - fclose(outf); - fclose(inf); - free(outname); - if (rbuf) - free(rbuf); - if (wbuf) - free(wbuf); - return; - } - - /* image_sectors = (header.imageclusters << header.clustershift) + - header.firstclustersector - 1; */ - image_sectors = header.imagesectors; checksum = 0; if (header.magic != MAGIC_DSK_COMPRESSED) { - for (sector = 0; sector < image_sectors; sector++) { - rres = fread(buffer, header.sectorsize, 1, inf); - if (rres != 1) { - perror("fread()"); - fclose(outf); - fclose(inf); - free(buffer); - free(outname); - if (rbuf) - free(rbuf); - if (wbuf) - free(wbuf); - return; - } - for (i = 0; i < header.sectorsize / 2; i++) - checksum += buffer[i * 2] + 256 * buffer[i * 2 + 1]; - rres = fwrite(buffer, header.sectorsize, 1, outf); - if (rres != 1) { - perror("fwrite()"); - fclose(outf); - fclose(inf); - free(buffer); - free(outname); - if (rbuf) - free(rbuf); - if (wbuf) - free(wbuf); - return; - } - } - pad_file_to_size(outf, final_size); - if (checksum != header.checksum) - printf("ERROR: image checksum does not match. Expected 0x%08x but got 0x%08x\n", header.checksum, checksum); - else - puts("Done."); + if (copy_image_data(inf, outf, saved_size, &checksum) < 0) + goto done; } else { if (no_lzw) { - size_t wres; - - for (sector = 0;; sector++) { - rres = fread(buffer, 1, header.sectorsize, inf); - for (i = 0; i < rres; i++) - checksum += buffer[i]; - wres = fwrite(buffer, 1, rres, outf); - if (wres != rres) { - perror("fwrite()"); - fclose(outf); - fclose(inf); - free(buffer); - free(outname); - if (rbuf) - free(rbuf); - if (wbuf) - free(wbuf); - return; - } - if (rres < header.sectorsize) - break; - } + copy_compressed_image_data(inf, outf); + goto done; } else { - /* TODO: checksum */ - lzw_decompress_file(inf, outf); - pad_file_to_size(outf, final_size); + if (decompress_image_data(inf, outf, saved_size, &checksum) < 0) + goto done; } } + pad_file_to_size(outf, final_size); + if (checksum != header.checksum) + printf("ERROR: image checksum does not match. Expected 0x%08x but got 0x%08x\n", header.checksum, checksum); + else + puts("Done."); - fclose(outf); - fclose(inf); - free(buffer); - free(outname); +done: + if (outf) + fclose(outf); + if (inf) + fclose(inf); + if (outname) + free(outname); if (rbuf) free(rbuf); if (wbuf) diff --git a/lzw.c b/lzw.c index fa4abe8..5480c9b 100644 --- a/lzw.c +++ b/lzw.c @@ -1,9 +1,13 @@ /* * LZW algorithm as used in the IBM .DSK floppy disk image format. * Uses a fixed 12bit code size and an LRU dictionary entry replacement policy. + * The only special code is 0 and it's used to represent the end of the + * compressed stream. Single byte strings (0x00 to 0xff) occupy codes from + * 1 to 256. */ #include "lzw.h" +#include #include #include #include @@ -12,11 +16,11 @@ void lzw_init(struct lzw_ctx* c) { - int i; + uint16_t i; - c->coffset = c->doffset = c->codes = 0L; c->input_buffer = 0; - c->dict[0].usecount = c->dict[0].length = 0; + c->dict[0].usecount = 256; + c->dict[0].length = 0; c->dict[0].prefix = c->dict[0].lru_prev = c->dict[0].lru_next = 0; for (i = 1; i <= 256; i++) { c->dict[i].length = 1; @@ -38,6 +42,10 @@ void lzw_init(struct lzw_ctx* c) c->lru_head = 257; c->lru_tail = 4095; c->last_emitted_code = 0; + c->output_buffer_start = 0; + c->output_buffer_used = 0; + c->input_code = c->input_code_bits = 0; + c->eos = 0; } /* Remove code from the lru list */ @@ -66,7 +74,7 @@ static void lzw_lru_append(struct lzw_ctx* c, uint16_t code) c->lru_tail = code; } -#if 0 +#ifdef DEBUG static void lzw_print_lru(struct lzw_ctx* c) { printf("head=%03x tail=%03x\n", c->lru_head, c->lru_tail); @@ -91,7 +99,7 @@ static void lzw_validate_lru(struct lzw_ctx* c) static int lzw_build_entry(struct lzw_ctx* c, uint16_t code) { - int pos; + uint16_t pos; pos = c->lru_head; lzw_lru_unlink(c, pos); @@ -116,67 +124,60 @@ static int lzw_build_entry(struct lzw_ctx* c, uint16_t code) return pos; } -static void lzw_emit(struct lzw_ctx* c, uint16_t code, FILE* f) +int lzw_decompress(struct lzw_ctx* c, uint8_t* src, size_t* src_count, uint8_t* dst, size_t* dst_count) { - int i; - uint16_t t; + int bytes_read = 0; + int bytes_written = 0; + uint16_t code; + uint16_t i, t; - for (i = c->dict[code].length, t = code; t != 0 && i > 0; i--, t = c->dict[t].prefix) - c->output_buffer[i - 1] = c->dict[t].last; - fwrite(c->output_buffer, c->dict[code].length, 1, f); + if (c->eos != 0) + return 0; - c->doffset += c->dict[code].length; - c->last_emitted_code = code; -} - -static int lzw_get_next_code(struct lzw_ctx* c, FILE* f) -{ - uint16_t result; - int next_byte; - - next_byte = fgetc(f); - if (next_byte == EOF) - return -1; - c->coffset++; - - if ((c->codes & 1) == 0) { - c->input_buffer = fgetc(f); - if (c->input_buffer == EOF) - return -1; - c->coffset++; - result = (next_byte << 4) | (c->input_buffer >> 4); - } else { - result = ((c->input_buffer & 0x0f) << 8) | next_byte; + /* First, flush out any remaining data in the string buffer */ + while ((c->output_buffer_used > 0) && (*dst_count > 0)) { + dst[bytes_written++] = c->output_buffer[c->output_buffer_start]; + c->output_buffer_start = (c->output_buffer_start + 1) % 4096; + c->output_buffer_used--; + --*dst_count; + } + while ((*src_count > 0) && (*dst_count > 0)) { + /* Get next 12bit code from the input buffer */ + while ((c->input_code_bits < 12) && (*src_count > 0)) { + c->input_code = (c->input_code << 8) | src[bytes_read++]; + c->input_code_bits += 8; + --*src_count; + } + if (c->input_code_bits < 12) + return bytes_written; + code = (c->input_code >> (c->input_code_bits - 12)) & 0x0fff; + c->input_code_bits -= 12; + if (code == 0) { + c->eos = 1; + return bytes_written; + } + /* Build the new dictionary entry */ + if (c->last_emitted_code != 0) + lzw_build_entry(c, code); + /* Output the corresponding string */ + c->output_buffer_start = 0; + for (i = c->dict[code].length, t = code; t != 0 && i != 0; i--, t = c->dict[t].prefix) { + if (i <= *dst_count) + dst[bytes_written + i - 1] = c->dict[t].last; + else + c->output_buffer[i - *dst_count - 1] = c->dict[t].last; + } + if (c->dict[code].length <= *dst_count) { + c->output_buffer_used = 0; + bytes_written += c->dict[code].length; + *dst_count -= c->dict[code].length; + } else { + c->output_buffer_used = c->dict[code].length - *dst_count; + bytes_written += *dst_count; + *dst_count = 0; + } + c->last_emitted_code = code; } - c->codes++; - return result & 0x0fff; -} - -int lzw_decompress_file(FILE* infp, FILE* outfp) -{ - struct lzw_ctx* ctx; - int code; - - ctx = (struct lzw_ctx*)malloc(sizeof(struct lzw_ctx)); - if (ctx == NULL) - return -1; - - lzw_init(ctx); - - while ((code = lzw_get_next_code(ctx, infp)) != 0) { - if (ctx->last_emitted_code != 0) - lzw_build_entry(ctx, code); - lzw_emit(ctx, code, outfp); - } - - /* - printf( - "\nEnd of file after reading %ld bytes (%ld symbols) and writing %ld " - "bytes\n", - ctx.coffset, ctx.codes, ctx.doffset); - */ - free(ctx); - - return 0; + return bytes_written; } diff --git a/lzw.h b/lzw.h index 21d5d62..eaab9dc 100644 --- a/lzw.h +++ b/lzw.h @@ -18,16 +18,18 @@ struct lzw_dict_entry { struct lzw_ctx { struct lzw_dict_entry dict[4096]; /* Dictionary entries */ uint8_t output_buffer[4096]; /* Temporary buffer used to collect the byte string corresponding to a code */ - size_t coffset; /* Position in the compressed stream */ - size_t doffset; /* Position in the decompressed stream */ - size_t codes; /* Number of codes processed so far */ + int eos; /* Flag indicating that the end of stream code was encountered */ int input_buffer; /* Temporary buffer used in the extraction of 12bit codes from the compressed stream */ uint16_t lru_head; /* Index of the first entry in the lru list */ uint16_t lru_tail; /* Index of the last entry in the lru list */ uint16_t last_emitted_code; /* Code emitted in the previous round */ + uint16_t output_buffer_start; + uint16_t output_buffer_used; + uint16_t input_code; + uint16_t input_code_bits; }; void lzw_init(struct lzw_ctx* c); -int lzw_decompress_file(FILE* infp, FILE* outfp); +int lzw_decompress(struct lzw_ctx* c, uint8_t* src, size_t* src_count, uint8_t* dst, size_t* dst_count); #endif diff --git a/test-lzw.c b/test-lzw.c index daf8ad3..6456ea8 100644 --- a/test-lzw.c +++ b/test-lzw.c @@ -1,14 +1,25 @@ #include "lzw.h" +#include #include #include #include -int decompress(char* filename) +int decompress_file(char* filename) { char* outfilename; FILE *fp, *outfp; int i; unsigned int ifnlen, ofnlen; + struct lzw_ctx* ctx; + uint8_t bufin[4096], bufout[4096]; + uint8_t *pin, *pout; + size_t to_decompress, space; + int res; + + ctx = (struct lzw_ctx*)malloc(sizeof(struct lzw_ctx)); + if (ctx == NULL) + return 0; + lzw_init(ctx); fp = fopen(filename, "rb"); if (fp == NULL) { @@ -38,7 +49,26 @@ int decompress(char* filename) return -1; } - lzw_decompress_file(fp, outfp); + to_decompress = 0; + space = sizeof(bufout); + pout = bufout; + while (ctx->eos == 0) { + if (to_decompress == 0) { + to_decompress = fread(bufin, 1, sizeof(bufin), fp); + pin = bufin; + } + if (space == 0) { + fwrite(bufout, 1, sizeof(bufout), outfp); + space = sizeof(bufout); + pout = bufout; + } + res = lzw_decompress(ctx, pin, &to_decompress, pout, &space); + pin = bufin + sizeof(bufin) - to_decompress; + pout = bufout + sizeof(bufout) - space; + } + if (space < sizeof(bufout)) + fwrite(bufout, 1, sizeof(bufout) - space, outfp); + free(outfilename); fclose(outfp); fclose(fp); @@ -52,7 +82,7 @@ int main(int argc, char* argv[]) for (i = 1; i < argc; i++) { printf("Decompressing %s\n", argv[i]); - decompress(argv[i]); + decompress_file(argv[i]); } return EXIT_SUCCESS;