summaryrefslogtreecommitdiffstats
path: root/lisiblepng
diff options
context:
space:
mode:
authorClement Sibille <clements+git@lisible.xyz>2024-02-27 17:23:33 +0900
committerClement Sibille <clements+git@lisible.xyz>2024-02-27 17:23:33 +0900
commite1e5b4e92bcd460b43ce1b852560751b6525593e (patch)
tree4b8bef35e0621240c6531ee28abb55a42e1a70f4 /lisiblepng
parent228854f8672d5015550ab6f6c76e806d099bb4db (diff)
Implement Deflate decompression
This patch adds the decompression code for zlib compressed data streams that are compressed using Deflate based on RFC-1950 and RFC-1951. This implementation lacks support for zlib prefix dictionaries and for non-compressed Deflate blocks as these are less common.
Diffstat (limited to 'lisiblepng')
-rw-r--r--lisiblepng/src/bitstream.c3
-rw-r--r--lisiblepng/src/deflate.c364
-rw-r--r--lisiblepng/src/deflate.h8
-rw-r--r--lisiblepng/src/lisiblepng.c3
4 files changed, 361 insertions, 17 deletions
diff --git a/lisiblepng/src/bitstream.c b/lisiblepng/src/bitstream.c
index b34a890..e7ddb79 100644
--- a/lisiblepng/src/bitstream.c
+++ b/lisiblepng/src/bitstream.c
@@ -30,8 +30,7 @@ void Bitstream_skip(Bitstream *bitstream, size_t bit_count) {
uint16_t Bitstream_next_bits(Bitstream *bitstream, int bit_count) {
ASSERT(bitstream != NULL);
ASSERT(bit_count <= 16);
- ASSERT(bitstream->current_byte_index +
- (bitstream->current_bit_offset + bit_count) % 8 <=
+ ASSERT((bitstream->current_bit_offset + (size_t)bit_count) / 8 <=
bitstream->data_size);
int bit_to_read = bit_count;
diff --git a/lisiblepng/src/deflate.c b/lisiblepng/src/deflate.c
index 3f8ce2e..d894a35 100644
--- a/lisiblepng/src/deflate.c
+++ b/lisiblepng/src/deflate.c
@@ -1,6 +1,7 @@
#include "deflate.h"
#include "assert.h"
#include "bitstream.h"
+#include <string.h>
#define CM_LENGTH_BITS 4
#define CINFO_LENGTH_BITS 4
@@ -8,10 +9,335 @@
#define FDICT_LENGTH_BITS 1
#define FLEVEL_LENGTH_BITS 2
-bool deflate_decompress(Bitstream *bitstream);
+#define BFINAL_LENGTH_BITS 1
+#define BTYPE_LENGTH_BITS 2
-bool zlib_decompress(const uint8_t *compressed_data_buffer,
- const size_t compressed_data_length) {
+#define CODE_LENGTH_ALPHABET_MAX_SYMBOL_COUNT 19
+#define MAX_HUFFMAN_CODE_LENGTH 15
+#define MAX_LENGTH_CODES 286
+#define MAX_DISTANCE_CODES 32
+#define FIXED_LENGTH_LITERAL_CODE_COUNT 288
+
+typedef enum {
+ DeflateBlockType_NoCompression,
+ DeflateBlockType_FixedHuffman,
+ DeflateBlockType_DynamicHuffman,
+ DeflateBlockType_Error
+} DeflateBlockType;
+
+typedef struct {
+ uint16_t counts[CODE_LENGTH_ALPHABET_MAX_SYMBOL_COUNT];
+ uint16_t symbols[CODE_LENGTH_ALPHABET_MAX_SYMBOL_COUNT];
+} CodeLengthTable;
+
+typedef struct {
+ uint16_t counts[MAX_LENGTH_CODES];
+ uint16_t symbols[MAX_LENGTH_CODES];
+} LengthLiteralTable;
+
+typedef struct {
+ uint16_t counts[MAX_DISTANCE_CODES];
+ uint16_t symbols[MAX_DISTANCE_CODES];
+} DistanceTable;
+
+#define DEFLATE_OUTPUT_BUFFER_INITIAL_CAPACITY (1024 * 256)
+
+typedef struct {
+ char *buffer;
+ size_t len;
+ size_t cap;
+} OutputBuffer;
+
+void OutputBuffer_init(OutputBuffer *output_buffer) {
+ ASSERT(output_buffer != NULL);
+ output_buffer->buffer = calloc(DEFLATE_OUTPUT_BUFFER_INITIAL_CAPACITY, 1);
+ if (!output_buffer->buffer) {
+ LOG0("Couldn't allocate deflate output buffer (out of memory?)");
+ abort();
+ }
+ output_buffer->cap = DEFLATE_OUTPUT_BUFFER_INITIAL_CAPACITY;
+ output_buffer->len = 0;
+}
+
+void OutputBuffer_expand(OutputBuffer *output_buffer) {
+ ASSERT(output_buffer != NULL);
+ size_t new_cap = output_buffer->cap * 2;
+ output_buffer->buffer = realloc(output_buffer->buffer, new_cap);
+ if (!output_buffer->buffer) {
+ LOG0("Couldn't reallocate deflate output buffer (out of memory?)");
+ abort();
+ }
+
+ output_buffer->cap = new_cap;
+}
+
+void OutputBuffer_push(OutputBuffer *output_buffer, char byte) {
+ ASSERT(output_buffer != NULL);
+ if (output_buffer->len == output_buffer->cap) {
+ OutputBuffer_expand(output_buffer);
+ }
+
+ output_buffer->buffer[output_buffer->len++] = byte;
+}
+
+size_t OutputBuffer_length(const OutputBuffer *output_buffer) {
+ ASSERT(output_buffer != NULL);
+ return output_buffer->len;
+}
+
+int huffman_table_decode(const uint16_t *symbols, const uint16_t *counts,
+ Bitstream *bitstream) {
+ ASSERT(symbols != NULL);
+ ASSERT(counts != NULL);
+ ASSERT(bitstream != NULL);
+ int code = 0;
+ int index = 0;
+ int first = 0;
+ for (int i = 1; i <= MAX_HUFFMAN_CODE_LENGTH; i++) {
+ code |= Bitstream_next_bits(bitstream, 1);
+ int count = counts[i];
+ if (code - count < first) {
+ return symbols[index + (code - first)];
+ }
+
+ index += count;
+ first += count;
+ first <<= 1;
+ code <<= 1;
+ }
+
+ return -1;
+}
+
+void build_huffman_table_from_codelengths(uint16_t *symbols, uint16_t *counts,
+ const uint16_t *codelengths,
+ uint16_t codelength_count) {
+ ASSERT(symbols != NULL);
+ ASSERT(counts != NULL);
+ ASSERT(codelengths != NULL);
+
+ for (int i = 0; i < codelength_count; i++) {
+ counts[codelengths[i]]++;
+ }
+ counts[0] = 0;
+
+ uint16_t next_code[MAX_HUFFMAN_CODE_LENGTH + 1] = {0};
+ for (int i = 1; i < MAX_HUFFMAN_CODE_LENGTH; i++) {
+ next_code[i + 1] = next_code[i] + counts[i];
+ }
+
+ for (int i = 0; i < codelength_count; i++) {
+ if (codelengths[i] == 0) {
+ continue;
+ }
+
+ symbols[next_code[codelengths[i]]++] = i;
+ }
+}
+void CodeLengthTable_build_from_codelengths(
+ CodeLengthTable *table,
+ uint16_t codelength_codelengths[CODE_LENGTH_ALPHABET_MAX_SYMBOL_COUNT]) {
+ ASSERT(table != NULL);
+ ASSERT(codelength_codelengths != NULL);
+ static const uint8_t CODELENGTH_MAPPING[] = {
+ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+ uint16_t mapped_codelengths[CODE_LENGTH_ALPHABET_MAX_SYMBOL_COUNT] = {0};
+ for (int i = 0; i < CODE_LENGTH_ALPHABET_MAX_SYMBOL_COUNT; i++) {
+ mapped_codelengths[CODELENGTH_MAPPING[i]] = codelength_codelengths[i];
+ }
+
+ build_huffman_table_from_codelengths(table->symbols, table->counts,
+ mapped_codelengths,
+ CODE_LENGTH_ALPHABET_MAX_SYMBOL_COUNT);
+}
+
+void build_codelength_table(CodeLengthTable *codelength_table,
+ Bitstream *bitstream, const uint8_t hclen) {
+ ASSERT(codelength_table != NULL);
+ ASSERT(bitstream != NULL);
+
+ uint16_t codelengths_codelengths[CODE_LENGTH_ALPHABET_MAX_SYMBOL_COUNT] = {0};
+ for (int i = 0; i < hclen; i++) {
+ codelengths_codelengths[i] = Bitstream_next_bits(bitstream, 3);
+ }
+
+ CodeLengthTable_build_from_codelengths(codelength_table,
+ codelengths_codelengths);
+}
+
+bool deflate_decompress_(Bitstream *bitstream,
+ const LengthLiteralTable *length_literal_table,
+ const DistanceTable *distance_table,
+ OutputBuffer *output) {
+ ASSERT(bitstream != NULL);
+ ASSERT(output != NULL);
+ ASSERT(length_literal_table != NULL);
+ ASSERT(distance_table != NULL);
+ static const uint16_t length_size_base[29] = {
+ 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27,
+ 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258};
+ static const uint16_t length_extra_bits[29] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
+ 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
+ 4, 4, 4, 4, 5, 5, 5, 5, 0};
+ static const uint16_t distance_offset_base[30] = {
+ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25,
+ 33, 49, 65, 97, 129, 193, 257, 385, 513, 769,
+ 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577};
+ static const uint16_t distance_extra_bits[30] = {
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6,
+ 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13};
+
+ int symbol = 0;
+ while (symbol != 256) {
+ symbol = huffman_table_decode(length_literal_table->symbols,
+ length_literal_table->counts, bitstream);
+ if (symbol < 0) {
+ LOG0("Unknown symbol decoded");
+ return false;
+ }
+ if (symbol < 256) {
+ OutputBuffer_push(output, (char)symbol);
+ } else if (symbol > 256) {
+ symbol -= 257;
+ int length = length_size_base[symbol] +
+ Bitstream_next_bits(bitstream, length_extra_bits[symbol]);
+ symbol = huffman_table_decode(distance_table->symbols,
+ distance_table->counts, bitstream);
+ if (symbol < 0) {
+ return false;
+ }
+
+ int distance =
+ distance_offset_base[symbol] +
+ Bitstream_next_bits(bitstream, distance_extra_bits[symbol]);
+ while (length > 0) {
+ size_t output_buffer_length = OutputBuffer_length(output);
+ OutputBuffer_push(
+ output, (char)output->buffer[output_buffer_length - distance]);
+ length--;
+ }
+ }
+ }
+
+ return true;
+}
+
+bool deflate_decompress(Bitstream *bitstream, OutputBuffer *output) {
+ ASSERT(bitstream != NULL);
+ ASSERT(output != NULL);
+
+ uint8_t b_final = 0;
+ while (!b_final) {
+ LOG0("Parse deflate block");
+ b_final = Bitstream_next_bits(bitstream, BFINAL_LENGTH_BITS);
+ const uint8_t b_type = Bitstream_next_bits(bitstream, BTYPE_LENGTH_BITS);
+ if (b_type == DeflateBlockType_NoCompression) {
+ LOG0("Uncompressed deflate blocks aren't supported");
+ abort();
+ } else {
+ if (b_type == DeflateBlockType_FixedHuffman) {
+ static bool are_tables_blank = true;
+ static LengthLiteralTable length_literal_table = {0};
+ static DistanceTable distance_table = {0};
+
+ uint16_t lenlit_codelengths[FIXED_LENGTH_LITERAL_CODE_COUNT];
+
+ if (are_tables_blank) {
+ for (int symbol = 0; symbol < 144; symbol++) {
+ lenlit_codelengths[symbol] = 8;
+ }
+
+ for (int symbol = 144; symbol < 256; symbol++) {
+ lenlit_codelengths[symbol] = 9;
+ }
+
+ for (int symbol = 256; symbol < 280; symbol++) {
+ lenlit_codelengths[symbol] = 7;
+ }
+
+ for (int symbol = 280; symbol < FIXED_LENGTH_LITERAL_CODE_COUNT;
+ symbol++) {
+ lenlit_codelengths[symbol] = 8;
+ }
+
+ build_huffman_table_from_codelengths(
+ length_literal_table.symbols, length_literal_table.counts,
+ lenlit_codelengths, FIXED_LENGTH_LITERAL_CODE_COUNT);
+
+ uint16_t distance_codelengths[MAX_DISTANCE_CODES] = {0};
+ for (int symbol = 0; symbol < MAX_DISTANCE_CODES; symbol++) {
+ distance_codelengths[symbol] = 5;
+ }
+
+ build_huffman_table_from_codelengths(
+ distance_table.symbols, distance_table.counts,
+ distance_codelengths, MAX_DISTANCE_CODES);
+ are_tables_blank = false;
+ }
+ deflate_decompress_(bitstream, &length_literal_table, &distance_table,
+ output);
+ } else {
+ LengthLiteralTable length_literal_table = {0};
+ DistanceTable distance_table = {0};
+ const uint16_t hlit = Bitstream_next_bits(bitstream, 5) + 257;
+ const uint8_t hdist = Bitstream_next_bits(bitstream, 5) + 1;
+ const uint8_t hclen = Bitstream_next_bits(bitstream, 4) + 4;
+ LOGN("Dynamic table:\n"
+ "hlit: %d\n"
+ "hdist: %d\n"
+ "hclen: %d",
+ hlit, hdist, hclen);
+ CodeLengthTable codelength_table = {0};
+ build_codelength_table(&codelength_table, bitstream, hclen);
+
+ uint16_t lenlit_dist_codelengths[MAX_LENGTH_CODES +
+ MAX_DISTANCE_CODES] = {0};
+ int index = 0;
+ while (index < hlit + hdist) {
+ int symbol = huffman_table_decode(codelength_table.symbols,
+ codelength_table.counts, bitstream);
+ if (symbol < 0) {
+ LOG0("Unknown symbol decoded using huffman table");
+ return false;
+ } else if (symbol < 16) {
+ lenlit_dist_codelengths[index++] = symbol;
+ } else {
+ int repeat = 0;
+ int codelength = 0;
+ if (symbol == 16) {
+ repeat = 3 + Bitstream_next_bits(bitstream, 2);
+ codelength = lenlit_dist_codelengths[index - 1];
+ } else if (symbol == 17) {
+ repeat = 3 + Bitstream_next_bits(bitstream, 3);
+ } else {
+ repeat = 11 + Bitstream_next_bits(bitstream, 7);
+ }
+
+ for (int i = 0; i < repeat; i++) {
+ lenlit_dist_codelengths[index++] = codelength;
+ }
+ }
+ }
+
+ // The block must have an end
+ ASSERT(lenlit_dist_codelengths[256] > 0);
+
+ build_huffman_table_from_codelengths(length_literal_table.symbols,
+ length_literal_table.counts,
+ lenlit_dist_codelengths, hlit);
+ build_huffman_table_from_codelengths(
+ distance_table.symbols, distance_table.counts,
+ lenlit_dist_codelengths + hlit, hdist);
+ deflate_decompress_(bitstream, &length_literal_table, &distance_table,
+ output);
+ }
+ }
+ }
+ return true;
+}
+
+char *zlib_decompress(const uint8_t *compressed_data_buffer,
+ const size_t compressed_data_length) {
ASSERT(compressed_data_buffer != NULL);
Bitstream bitstream;
@@ -37,20 +363,32 @@ bool zlib_decompress(const uint8_t *compressed_data_buffer,
return false;
}
- uint32_t dictionary_identifier = 0;
if (fdict != 0) {
- uint16_t dictionary_identifier_0 = Bitstream_next_bits(&bitstream, 16);
- uint16_t dictionary_identifier_1 = Bitstream_next_bits(&bitstream, 16);
- dictionary_identifier =
- (dictionary_identifier_0 << 0) + (dictionary_identifier_1 << 16);
- LOGN("Dictionary identifier: %d", dictionary_identifier);
+ LOG0("preset dictionnaries are unsupported");
+ return NULL;
}
- if (!deflate_decompress(&bitstream)) {
- return false;
+ OutputBuffer output;
+ OutputBuffer_init(&output);
+ if (!deflate_decompress(&bitstream, &output)) {
+ LOG0("deflate decompression failed");
+ return NULL;
}
- // TODO adler32 checksum
+ int adler32 = bitstream.data[bitstream.current_byte_index + 4] +
+ (bitstream.data[bitstream.current_byte_index + 3] << 8) +
+ (bitstream.data[bitstream.current_byte_index + 2] << 16) +
+ (bitstream.data[bitstream.current_byte_index + 1] << 24);
+ LOGN("Adler32 checksum: %d", adler32);
- return true;
+ uint32_t a = 1;
+ uint32_t b = 0;
+ for (size_t i = 0; i < output.len; i++) {
+ a = (a + (unsigned char)output.buffer[i]) % 65521;
+ b = (b + a) % 65521;
+ }
+ int computed_adler32 = (b << 16) | a;
+ LOGN("Computed Adler32 checksum: %d", computed_adler32);
+
+ return output.buffer;
}
diff --git a/lisiblepng/src/deflate.h b/lisiblepng/src/deflate.h
index e40f816..77100c6 100644
--- a/lisiblepng/src/deflate.h
+++ b/lisiblepng/src/deflate.h
@@ -5,7 +5,11 @@
#include <stdint.h>
#include <stdlib.h>
-bool zlib_decompress(const uint8_t *compressed_data_buffer,
- const size_t compressed_data_length);
+/// Decompresses zlib compressed data as defined by RFC 1950
+///
+/// @return A pointer to the decompressed data, the caller is responsible to
+/// deallocate it using free(). In case of error, NULL is returned.
+char *zlib_decompress(const uint8_t *compressed_data_buffer,
+ const size_t compressed_data_length);
#endif // LISIBLE_PNG_DEFLATE_H
diff --git a/lisiblepng/src/lisiblepng.c b/lisiblepng/src/lisiblepng.c
index 3dfda24..5ccd300 100644
--- a/lisiblepng/src/lisiblepng.c
+++ b/lisiblepng/src/lisiblepng.c
@@ -76,6 +76,9 @@ void DeflateDecompressor_init(DeflateDecompressor *ctx, FILE *stream) {
ASSERT(ctx != NULL);
ASSERT(stream != NULL);
ctx->stream = stream;
+#ifdef LISIBLE_PNG_COMPUTE_CRC
+ ctx->computed_crc = 0xFFFFFFFFu;
+#endif // LISIBLE_PNG_COMPUTE_CRC
}
void ParsingContext_crc_reset(DeflateDecompressor *ctx) {
Go back to lisible.xyz