From c183681289721406f9e558556b27667640855df8 Mon Sep 17 00:00:00 2001 From: Maurizio Porrato Date: Sat, 21 Oct 2023 22:26:03 +0100 Subject: [PATCH] Change lzw_decompress() signature --- Makefile | 6 ++--- dsk2img.c | 70 ++++++++++++++++++++++++--------------------------- lzw.c | 61 +++++++++++++++++++++++++-------------------- lzw.h | 5 +++- test-lzw.c | 56 ++++++++++++++++++++--------------------- utils.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ utils.h | 15 +++++++++++ 7 files changed, 190 insertions(+), 96 deletions(-) create mode 100644 utils.c create mode 100644 utils.h diff --git a/Makefile b/Makefile index 9ed3c11..c56f66b 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ CC = clang -CFLAGS ?= -Wall -Wextra -pedantic -std=c89 -Og -g -pg +CFLAGS ?= -Wall -Wextra -pedantic -std=c89 -O0 -g -pg STRIP = strip FORMAT = clang-format -i BIN = dsk2img @@ -13,8 +13,8 @@ tests: $(TESTS) strip: $(BIN) $(STRIP) $^ -test-lzw: lzw.o -dsk2img: lzw.o +test-lzw: lzw.o utils.o +dsk2img: lzw.o utils.o clean: $(RM) $(BIN) $(TESTS) *.o *~ *% diff --git a/dsk2img.c b/dsk2img.c index e77c5ed..217de2a 100644 --- a/dsk2img.c +++ b/dsk2img.c @@ -1,5 +1,6 @@ #define _POSIX_C_SOURCE 2 #include "lzw.h" +#include "utils.h" #include #include #include @@ -7,6 +8,8 @@ #include #include +#undef DEBUG + #define MAGIC_DSK_OLD 0x58aa #define MAGIC_DSK_UNCOMPRESSED 0x59aa #define MAGIC_DSK_COMPRESSED 0x5aaa @@ -58,10 +61,10 @@ const struct media_type { { 512, 80, 2, 18, "3.5\" DSHD 1.44MB" }, { 512, 80, 2, 9, "3.5\" DSDD 720KB" }, { 512, 80, 2, 15, "5.25\" DSHD 1.2MB" }, + { 512, 40, 2, 9, "5.25\" DSDD 360KB" }, { 512, 40, 1, 8, "5.25\" SSDD 160KB" }, { 512, 40, 1, 9, "5.25\" SSDD 180KB" }, { 512, 40, 2, 8, "5.25\" DSDD 320KB" }, - { 512, 40, 2, 9, "5.25\" DSDD 360KB" }, { 512, 80, 1, 8, "5.25\" SSQD 320KB" }, { 512, 80, 2, 8, "5.25\" DSQD 640KB" }, { 512, 80, 1, 8, "3.5\" SSDD 320KB" }, @@ -78,11 +81,10 @@ char* guess_output_filename(const char* input_filename, int is_compressed) unsigned int ifnlen, ofnlen; int i; - /* NOLINTBEGIN(clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling) */ ifnlen = strlen(input_filename); ofnlen = ifnlen + 9; outname = (char*)malloc(ofnlen); - strncpy(outname, input_filename, ofnlen); + strlcpy(outname, input_filename, ofnlen); /* strip extension */ for (i = ifnlen; i > 0; i--) { if (outname[i - 1] == '.') { @@ -93,15 +95,14 @@ char* guess_output_filename(const char* input_filename, int is_compressed) } #ifdef DOSLIKE if (is_compressed) /* use filename.lzw instead of filename.img.lzw on dos and win */ - strcat(outname, ".lzw"); + strlcat(outname, ".lzw", ofnlen); else - strcat(outname, ".img"); + strlcat(outname, ".img", ofnlen); #else - strncat(outname, ".img", ofnlen); + strlcat(outname, ".img", ofnlen); if (is_compressed) - strncat(outname, ".lzw", ofnlen); + strlcat(outname, ".lzw", ofnlen); #endif - /* NOLINTEND(clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling) */ return outname; } @@ -156,8 +157,10 @@ int valid_header(const struct dskheader* h) return 0; if (h->reservedsectors + h->fatcopies * h->sectorsperfat != h->rootdirsector) return 0; + /* if (h->firstclustersector - 1 + cluster_sectors != final_sectors) return 0; + */ if (h->firstclustersector != h->reservedsectors + h->fatcopies * h->sectorsperfat + h->rootentries * 32 / h->sectorsize) return 0; if (h->imagesectors > final_sectors) @@ -275,9 +278,8 @@ int copy_compressed_image_data(FILE* fin, FILE* fout) int decompress_image_data(FILE* fin, FILE* fout, size_t size, uint32_t* checksum) { unsigned long int copied_bytes = 0; - size_t read_count, decompress_count; + size_t read_available, read_consumed, decompress_consumed; uint8_t *read_buffer, *decompress_buffer; - uint8_t *read_ptr, *decompress_ptr; struct lzw_ctx* lzw = NULL; read_buffer = (uint8_t*)malloc(READ_BUFFER_SIZE); @@ -303,34 +305,25 @@ int decompress_image_data(FILE* fin, FILE* fout, size_t size, uint32_t* checksum lzw_init(lzw); - read_count = 0; - decompress_count = DECOMPRESS_BUFFER_SIZE; - read_ptr = read_buffer; - decompress_ptr = decompress_buffer; - while ((lzw->eos == 0) && (copied_bytes < size)) { - if (read_count == 0) { - read_count = fread(read_buffer, 1, READ_BUFFER_SIZE, fin); - read_ptr = read_buffer; + read_available = read_consumed = decompress_consumed = 0; + while (1) { + if (read_consumed >= read_available) { + read_available = fread(read_buffer, 1, READ_BUFFER_SIZE, fin); + read_consumed = 0; } - if (decompress_count == 0) { - update_checksum(checksum, decompress_buffer, DECOMPRESS_BUFFER_SIZE); - fwrite(decompress_buffer, 1, DECOMPRESS_BUFFER_SIZE, fout); - copied_bytes += DECOMPRESS_BUFFER_SIZE; - decompress_count = DECOMPRESS_BUFFER_SIZE; - decompress_ptr = decompress_buffer; + if (decompress_consumed >= DECOMPRESS_BUFFER_SIZE || ((lzw->eos != 0 || read_available == 0) && decompress_consumed > 0)) { + update_checksum(checksum, decompress_buffer, decompress_consumed); + fwrite(decompress_buffer, 1, decompress_consumed, fout); + copied_bytes += decompress_consumed; + decompress_consumed = 0; } - lzw_decompress(lzw, read_ptr, &read_count, decompress_ptr, &decompress_count); - read_ptr = read_buffer + READ_BUFFER_SIZE - read_count; - decompress_ptr = decompress_buffer + DECOMPRESS_BUFFER_SIZE - decompress_count; - } - if (decompress_count < DECOMPRESS_BUFFER_SIZE) { - update_checksum(checksum, decompress_buffer, DECOMPRESS_BUFFER_SIZE - decompress_count); - fwrite(decompress_buffer, 1, DECOMPRESS_BUFFER_SIZE - decompress_count, fout); - copied_bytes += DECOMPRESS_BUFFER_SIZE - decompress_count; + if (lzw->eos || read_available == 0) + break; + lzw_decompress(lzw, read_buffer, read_available, &read_consumed, decompress_buffer, DECOMPRESS_BUFFER_SIZE, &decompress_consumed); } if (copied_bytes != size) - printf("WARNING: Decompressed image size (%lu) does not match expected (%lu)", copied_bytes, size); + printf("WARNING: Decompressed image size (%lu) does not match expected (%lu)\n", copied_bytes, size); free(read_buffer); free(decompress_buffer); @@ -354,6 +347,10 @@ void dsk2img(const char* filename, int no_lzw) int type_geom; char *rbuf = NULL, *wbuf = NULL; /* for setvbuf() */ +#ifdef DEBUG + fprintf(stderr, "Extracting %s\n", filename); +#endif + inf = fopen(filename, "rb"); if (inf == NULL) return; @@ -370,12 +367,11 @@ void dsk2img(const char* filename, int no_lzw) goto done; } -#ifdef DEBUG - dump_dsk_header(&header); -#endif - if (!valid_header(&header)) { puts("Not a valid DSK file!"); +#ifdef DEBUG + dump_dsk_header(&header); +#endif goto done; } diff --git a/lzw.c b/lzw.c index 7401b7a..cb9d5bc 100644 --- a/lzw.c +++ b/lzw.c @@ -45,6 +45,7 @@ void lzw_init(struct lzw_ctx* c) c->output_buffer_start = 0; c->output_buffer_used = 0; c->input_code = c->input_code_bits = 0; + c->uncompressed_bytes_processed = c->compressed_bytes_processed = c->codes_processed = 0; c->eos = 0; } @@ -124,34 +125,41 @@ static int lzw_build_entry(struct lzw_ctx* c, uint16_t code) return pos; } -int lzw_decompress(struct lzw_ctx* c, uint8_t* src, size_t* src_count, uint8_t* dst, size_t* dst_count) +#define MIN(a, b) ((a)<(b)?(a):(b)) + +int lzw_decompress(struct lzw_ctx* c, + uint8_t* src, size_t src_size, size_t* src_consumed, + uint8_t* dst, size_t dst_size, size_t* dst_consumed) { - int bytes_read = 0; int bytes_written = 0; uint16_t code; uint16_t code_len, current_code; - if (c->eos != 0) - return 0; - /* First, flush out any remaining data in the string buffer */ - while ((c->output_buffer_used > 0) && (*dst_count > 0)) { - dst[bytes_written++] = c->output_buffer[c->output_buffer_start]; - c->output_buffer_start = (c->output_buffer_start + 1) % 4096; - c->output_buffer_used--; - --*dst_count; + /* NOLINTBEGIN(clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling) */ + if (c->output_buffer_used > 0) { + bytes_written = MIN(c->output_buffer_used, dst_size - *dst_consumed); + memcpy(&dst[*dst_consumed], &c->output_buffer[c->output_buffer_start], bytes_written); + c->output_buffer_used -= bytes_written; + c->output_buffer_start += bytes_written; + c->uncompressed_bytes_processed += bytes_written; + *dst_consumed += bytes_written; } - while ((*src_count > 0) && (*dst_count > 0)) { + if (*dst_consumed >= dst_size) + return bytes_written; + /* NOLINTEND(clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling) */ + while (*src_consumed < src_size && *dst_consumed < dst_size && c->eos == 0) { /* Get next 12bit code from the input buffer */ - while ((c->input_code_bits < 12) && (*src_count > 0)) { - c->input_code = (c->input_code << 8) | src[bytes_read++]; + while (c->input_code_bits < 12 && *src_consumed < src_size) { + c->input_code = (c->input_code << 8) | src[(*src_consumed)++]; c->input_code_bits += 8; - --*src_count; + c->compressed_bytes_processed++; } if (c->input_code_bits < 12) return bytes_written; code = (c->input_code >> (c->input_code_bits - 12)) & 0x0fff; c->input_code_bits -= 12; + c->codes_processed++; if (code == 0) { c->eos = 1; return bytes_written; @@ -162,20 +170,19 @@ int lzw_decompress(struct lzw_ctx* c, uint8_t* src, size_t* src_count, uint8_t* /* Output the corresponding string */ c->output_buffer_start = 0; for (code_len = c->dict[code].length, current_code = code; current_code != 0 && code_len != 0; code_len--, current_code = c->dict[current_code].prefix) { - if (code_len <= *dst_count) - dst[bytes_written + code_len - 1] = c->dict[current_code].last; - else - c->output_buffer[code_len - *dst_count - 1] = c->dict[current_code].last; - } - if (c->dict[code].length <= *dst_count) { - c->output_buffer_used = 0; - bytes_written += c->dict[code].length; - *dst_count -= c->dict[code].length; - } else { - c->output_buffer_used = c->dict[code].length - *dst_count; - bytes_written += *dst_count; - *dst_count = 0; + if (code_len <= dst_size - *dst_consumed) { + dst[*dst_consumed + code_len - 1] = c->dict[current_code].last; + bytes_written++; + c->uncompressed_bytes_processed++; + } else { + c->output_buffer[code_len - (dst_size - *dst_consumed) - 1] = c->dict[current_code].last; + c->output_buffer_used++; + } } + if (c->dict[code].length < dst_size - *dst_consumed) + *dst_consumed += c->dict[code].length; + else + *dst_consumed = dst_size; c->last_emitted_code = code; } diff --git a/lzw.h b/lzw.h index eaab9dc..6d6cd6e 100644 --- a/lzw.h +++ b/lzw.h @@ -27,9 +27,12 @@ struct lzw_ctx { uint16_t output_buffer_used; uint16_t input_code; uint16_t input_code_bits; + size_t compressed_bytes_processed; + size_t uncompressed_bytes_processed; + size_t codes_processed; }; void lzw_init(struct lzw_ctx* c); -int lzw_decompress(struct lzw_ctx* c, uint8_t* src, size_t* src_count, uint8_t* dst, size_t* dst_count); +int lzw_decompress(struct lzw_ctx* c, uint8_t* src, size_t src_count, size_t* src_consumed, uint8_t* dst, size_t dst_count, size_t* dst_consumed); #endif diff --git a/test-lzw.c b/test-lzw.c index 6456ea8..e93a5c4 100644 --- a/test-lzw.c +++ b/test-lzw.c @@ -1,25 +1,29 @@ #include "lzw.h" +#include "utils.h" #include #include #include #include +#define READ_BUFFER_SIZE 4096 +#define DECOMPRESS_BUFFER_SIZE 4096 + +uint8_t read_buffer[READ_BUFFER_SIZE], decompress_buffer[DECOMPRESS_BUFFER_SIZE]; + int decompress_file(char* filename) { char* outfilename; FILE *fp, *outfp; int i; unsigned int ifnlen, ofnlen; - struct lzw_ctx* ctx; - uint8_t bufin[4096], bufout[4096]; - uint8_t *pin, *pout; - size_t to_decompress, space; - int res; + struct lzw_ctx* lzw; + unsigned long int copied_bytes = 0; + size_t read_available, read_consumed, decompress_consumed; - ctx = (struct lzw_ctx*)malloc(sizeof(struct lzw_ctx)); - if (ctx == NULL) + lzw = (struct lzw_ctx*)malloc(sizeof(struct lzw_ctx)); + if (lzw == NULL) return 0; - lzw_init(ctx); + lzw_init(lzw); fp = fopen(filename, "rb"); if (fp == NULL) { @@ -27,19 +31,17 @@ int decompress_file(char* filename) return -1; } - /* NOLINTBEGIN(clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling) */ ifnlen = strlen(filename); ofnlen = ifnlen + 5; outfilename = (char*)malloc(ofnlen); - strncpy(outfilename, filename, ofnlen); + strlcpy(outfilename, filename, ofnlen); for (i = ifnlen - 1; i > 0 && outfilename[i] != '/'; i--) if (outfilename[i] == '.') { outfilename[i] = '\0'; break; } if (strcmp(filename, outfilename) == 0) - strncat(outfilename, ".out", ofnlen); - /* NOLINTEND(clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling) */ + strlcat(outfilename, ".out", ofnlen); printf("Writing to %s\n", outfilename); outfp = fopen(outfilename, "wb"); @@ -49,30 +51,28 @@ int decompress_file(char* filename) return -1; } - to_decompress = 0; - space = sizeof(bufout); - pout = bufout; - while (ctx->eos == 0) { - if (to_decompress == 0) { - to_decompress = fread(bufin, 1, sizeof(bufin), fp); - pin = bufin; + read_available = read_consumed = decompress_consumed = 0; + while (1) { + if (read_consumed >= read_available) { + read_available = fread(read_buffer, 1, READ_BUFFER_SIZE, fp); + read_consumed = 0; } - if (space == 0) { - fwrite(bufout, 1, sizeof(bufout), outfp); - space = sizeof(bufout); - pout = bufout; + if (decompress_consumed >= DECOMPRESS_BUFFER_SIZE || ((lzw->eos != 0 || read_available == 0) && decompress_consumed > 0)) { + fwrite(decompress_buffer, 1, decompress_consumed, outfp); + copied_bytes += decompress_consumed; + decompress_consumed = 0; } - res = lzw_decompress(ctx, pin, &to_decompress, pout, &space); - pin = bufin + sizeof(bufin) - to_decompress; - pout = bufout + sizeof(bufout) - space; + if (lzw->eos || read_available == 0) + break; + lzw_decompress(lzw, read_buffer, read_available, &read_consumed, decompress_buffer, DECOMPRESS_BUFFER_SIZE, &decompress_consumed); } - if (space < sizeof(bufout)) - fwrite(bufout, 1, sizeof(bufout) - space, outfp); free(outfilename); fclose(outfp); fclose(fp); + printf("Extracted %lu bytes", copied_bytes); + return 0; } diff --git a/utils.c b/utils.c new file mode 100644 index 0000000..4c4d344 --- /dev/null +++ b/utils.c @@ -0,0 +1,73 @@ +#include +#include + +/* Taken from the FreeBSD source code */ + +#ifndef strlcpy +/* + * Copy string src to buffer dst of size dsize. At most dsize-1 + * chars will be copied. Always NUL terminates (unless dsize == 0). + * Returns strlen(src); if retval >= dsize, truncation occurred. + */ +size_t +strlcpy(char* __restrict dst, const char* __restrict src, size_t dsize) +{ + const char* osrc = src; + size_t nleft = dsize; + + /* Copy as many bytes as will fit. */ + if (nleft != 0) { + while (--nleft != 0) { + if ((*dst++ = *src++) == '\0') + break; + } + } + + /* Not enough room in dst, add NUL and traverse rest of src. */ + if (nleft == 0) { + if (dsize != 0) + *dst = '\0'; /* NUL-terminate dst */ + while (*src++) + ; + } + + return (src - osrc - 1); /* count does not include NUL */ +} +#endif + +#ifndef strlcat +/* + * Appends src to string dst of size siz (unlike strncat, siz is the + * full size of dst, not space left). At most siz-1 characters + * will be copied. Always NUL terminates (unless siz <= strlen(dst)). + * Returns strlen(src) + MIN(siz, strlen(initial dst)). + * If retval >= siz, truncation occurred. + */ +size_t +strlcat(char* dst, const char* src, size_t siz) +{ + char* d = dst; + const char* s = src; + size_t n = siz; + size_t dlen; + + /* Find the end of dst and adjust bytes left but don't go past end */ + while (n-- != 0 && *d != '\0') + d++; + dlen = d - dst; + n = siz - dlen; + + if (n == 0) + return (dlen + strlen(s)); + while (*s != '\0') { + if (n != 1) { + *d++ = *s; + n--; + } + s++; + } + *d = '\0'; + + return (dlen + (s - src)); /* count does not include NUL */ +} +#endif diff --git a/utils.h b/utils.h new file mode 100644 index 0000000..d8d5e9d --- /dev/null +++ b/utils.h @@ -0,0 +1,15 @@ +#ifndef UTILS_H_ +#define UTILS_H_ + +#include + +#ifndef strlcpy +size_t +strlcpy(char* __restrict dst, const char* __restrict src, size_t dsize); +#endif +#ifndef strlcat +size_t +strlcat(char* dst, const char* src, size_t siz); +#endif + +#endif