diff options
| author | Clement Sibille <clements+git@lisible.xyz> | 2024-02-27 17:23:33 +0900 | 
|---|---|---|
| committer | Clement Sibille <clements+git@lisible.xyz> | 2024-02-27 17:23:33 +0900 | 
| commit | e1e5b4e92bcd460b43ce1b852560751b6525593e (patch) | |
| tree | 4b8bef35e0621240c6531ee28abb55a42e1a70f4 | |
| parent | 228854f8672d5015550ab6f6c76e806d099bb4db (diff) | |
Implement Deflate decompression
This patch adds the decompression code for zlib compressed data streams
that are compressed using Deflate based on RFC-1950 and RFC-1951.
This implementation lacks support for zlib prefix dictionaries and for
non-compressed Deflate blocks as these are less common.
| -rw-r--r-- | lisiblepng/src/bitstream.c | 3 | ||||
| -rw-r--r-- | lisiblepng/src/deflate.c | 364 | ||||
| -rw-r--r-- | lisiblepng/src/deflate.h | 8 | ||||
| -rw-r--r-- | lisiblepng/src/lisiblepng.c | 3 | 
4 files changed, 361 insertions, 17 deletions
diff --git a/lisiblepng/src/bitstream.c b/lisiblepng/src/bitstream.c index b34a890..e7ddb79 100644 --- a/lisiblepng/src/bitstream.c +++ b/lisiblepng/src/bitstream.c @@ -30,8 +30,7 @@ void Bitstream_skip(Bitstream *bitstream, size_t bit_count) {  uint16_t Bitstream_next_bits(Bitstream *bitstream, int bit_count) {    ASSERT(bitstream != NULL);    ASSERT(bit_count <= 16); -  ASSERT(bitstream->current_byte_index + -             (bitstream->current_bit_offset + bit_count) % 8 <= +  ASSERT((bitstream->current_bit_offset + (size_t)bit_count) / 8 <=           bitstream->data_size);    int bit_to_read = bit_count; diff --git a/lisiblepng/src/deflate.c b/lisiblepng/src/deflate.c index 3f8ce2e..d894a35 100644 --- a/lisiblepng/src/deflate.c +++ b/lisiblepng/src/deflate.c @@ -1,6 +1,7 @@  #include "deflate.h"  #include "assert.h"  #include "bitstream.h" +#include <string.h>  #define CM_LENGTH_BITS 4  #define CINFO_LENGTH_BITS 4 @@ -8,10 +9,335 @@  #define FDICT_LENGTH_BITS 1  #define FLEVEL_LENGTH_BITS 2 -bool deflate_decompress(Bitstream *bitstream); +#define BFINAL_LENGTH_BITS 1 +#define BTYPE_LENGTH_BITS 2 -bool zlib_decompress(const uint8_t *compressed_data_buffer, -                     const size_t compressed_data_length) { +#define CODE_LENGTH_ALPHABET_MAX_SYMBOL_COUNT 19 +#define MAX_HUFFMAN_CODE_LENGTH 15 +#define MAX_LENGTH_CODES 286 +#define MAX_DISTANCE_CODES 32 +#define FIXED_LENGTH_LITERAL_CODE_COUNT 288 + +typedef enum { +  DeflateBlockType_NoCompression, +  DeflateBlockType_FixedHuffman, +  DeflateBlockType_DynamicHuffman, +  DeflateBlockType_Error +} DeflateBlockType; + +typedef struct { +  uint16_t counts[CODE_LENGTH_ALPHABET_MAX_SYMBOL_COUNT]; +  uint16_t symbols[CODE_LENGTH_ALPHABET_MAX_SYMBOL_COUNT]; +} CodeLengthTable; + +typedef struct { +  uint16_t counts[MAX_LENGTH_CODES]; +  uint16_t symbols[MAX_LENGTH_CODES]; +} LengthLiteralTable; + +typedef struct { +  uint16_t counts[MAX_DISTANCE_CODES]; +  uint16_t symbols[MAX_DISTANCE_CODES]; +} DistanceTable; + +#define DEFLATE_OUTPUT_BUFFER_INITIAL_CAPACITY (1024 * 256) + +typedef struct { +  char *buffer; +  size_t len; +  size_t cap; +} OutputBuffer; + +void OutputBuffer_init(OutputBuffer *output_buffer) { +  ASSERT(output_buffer != NULL); +  output_buffer->buffer = calloc(DEFLATE_OUTPUT_BUFFER_INITIAL_CAPACITY, 1); +  if (!output_buffer->buffer) { +    LOG0("Couldn't allocate deflate output buffer (out of memory?)"); +    abort(); +  } +  output_buffer->cap = DEFLATE_OUTPUT_BUFFER_INITIAL_CAPACITY; +  output_buffer->len = 0; +} + +void OutputBuffer_expand(OutputBuffer *output_buffer) { +  ASSERT(output_buffer != NULL); +  size_t new_cap = output_buffer->cap * 2; +  output_buffer->buffer = realloc(output_buffer->buffer, new_cap); +  if (!output_buffer->buffer) { +    LOG0("Couldn't reallocate deflate output buffer (out of memory?)"); +    abort(); +  } + +  output_buffer->cap = new_cap; +} + +void OutputBuffer_push(OutputBuffer *output_buffer, char byte) { +  ASSERT(output_buffer != NULL); +  if (output_buffer->len == output_buffer->cap) { +    OutputBuffer_expand(output_buffer); +  } + +  output_buffer->buffer[output_buffer->len++] = byte; +} + +size_t OutputBuffer_length(const OutputBuffer *output_buffer) { +  ASSERT(output_buffer != NULL); +  return output_buffer->len; +} + +int huffman_table_decode(const uint16_t *symbols, const uint16_t *counts, +                         Bitstream *bitstream) { +  ASSERT(symbols != NULL); +  ASSERT(counts != NULL); +  ASSERT(bitstream != NULL); +  int code = 0; +  int index = 0; +  int first = 0; +  for (int i = 1; i <= MAX_HUFFMAN_CODE_LENGTH; i++) { +    code |= Bitstream_next_bits(bitstream, 1); +    int count = counts[i]; +    if (code - count < first) { +      return symbols[index + (code - first)]; +    } + +    index += count; +    first += count; +    first <<= 1; +    code <<= 1; +  } + +  return -1; +} + +void build_huffman_table_from_codelengths(uint16_t *symbols, uint16_t *counts, +                                          const uint16_t *codelengths, +                                          uint16_t codelength_count) { +  ASSERT(symbols != NULL); +  ASSERT(counts != NULL); +  ASSERT(codelengths != NULL); + +  for (int i = 0; i < codelength_count; i++) { +    counts[codelengths[i]]++; +  } +  counts[0] = 0; + +  uint16_t next_code[MAX_HUFFMAN_CODE_LENGTH + 1] = {0}; +  for (int i = 1; i < MAX_HUFFMAN_CODE_LENGTH; i++) { +    next_code[i + 1] = next_code[i] + counts[i]; +  } + +  for (int i = 0; i < codelength_count; i++) { +    if (codelengths[i] == 0) { +      continue; +    } + +    symbols[next_code[codelengths[i]]++] = i; +  } +} +void CodeLengthTable_build_from_codelengths( +    CodeLengthTable *table, +    uint16_t codelength_codelengths[CODE_LENGTH_ALPHABET_MAX_SYMBOL_COUNT]) { +  ASSERT(table != NULL); +  ASSERT(codelength_codelengths != NULL); +  static const uint8_t CODELENGTH_MAPPING[] = { +      16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; +  uint16_t mapped_codelengths[CODE_LENGTH_ALPHABET_MAX_SYMBOL_COUNT] = {0}; +  for (int i = 0; i < CODE_LENGTH_ALPHABET_MAX_SYMBOL_COUNT; i++) { +    mapped_codelengths[CODELENGTH_MAPPING[i]] = codelength_codelengths[i]; +  } + +  build_huffman_table_from_codelengths(table->symbols, table->counts, +                                       mapped_codelengths, +                                       CODE_LENGTH_ALPHABET_MAX_SYMBOL_COUNT); +} + +void build_codelength_table(CodeLengthTable *codelength_table, +                            Bitstream *bitstream, const uint8_t hclen) { +  ASSERT(codelength_table != NULL); +  ASSERT(bitstream != NULL); + +  uint16_t codelengths_codelengths[CODE_LENGTH_ALPHABET_MAX_SYMBOL_COUNT] = {0}; +  for (int i = 0; i < hclen; i++) { +    codelengths_codelengths[i] = Bitstream_next_bits(bitstream, 3); +  } + +  CodeLengthTable_build_from_codelengths(codelength_table, +                                         codelengths_codelengths); +} + +bool deflate_decompress_(Bitstream *bitstream, +                         const LengthLiteralTable *length_literal_table, +                         const DistanceTable *distance_table, +                         OutputBuffer *output) { +  ASSERT(bitstream != NULL); +  ASSERT(output != NULL); +  ASSERT(length_literal_table != NULL); +  ASSERT(distance_table != NULL); +  static const uint16_t length_size_base[29] = { +      3,  4,  5,  6,  7,  8,  9,  10, 11,  13,  15,  17,  19,  23, 27, +      31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258}; +  static const uint16_t length_extra_bits[29] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, +                                                 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, +                                                 4, 4, 4, 4, 5, 5, 5, 5, 0}; +  static const uint16_t distance_offset_base[30] = { +      1,    2,    3,    4,    5,    7,    9,    13,    17,    25, +      33,   49,   65,   97,   129,  193,  257,  385,   513,   769, +      1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577}; +  static const uint16_t distance_extra_bits[30] = { +      0, 0, 0, 0, 1, 1, 2, 2,  3,  3,  4,  4,  5,  5,  6, +      6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13}; + +  int symbol = 0; +  while (symbol != 256) { +    symbol = huffman_table_decode(length_literal_table->symbols, +                                  length_literal_table->counts, bitstream); +    if (symbol < 0) { +      LOG0("Unknown symbol decoded"); +      return false; +    } +    if (symbol < 256) { +      OutputBuffer_push(output, (char)symbol); +    } else if (symbol > 256) { +      symbol -= 257; +      int length = length_size_base[symbol] + +                   Bitstream_next_bits(bitstream, length_extra_bits[symbol]); +      symbol = huffman_table_decode(distance_table->symbols, +                                    distance_table->counts, bitstream); +      if (symbol < 0) { +        return false; +      } + +      int distance = +          distance_offset_base[symbol] + +          Bitstream_next_bits(bitstream, distance_extra_bits[symbol]); +      while (length > 0) { +        size_t output_buffer_length = OutputBuffer_length(output); +        OutputBuffer_push( +            output, (char)output->buffer[output_buffer_length - distance]); +        length--; +      } +    } +  } + +  return true; +} + +bool deflate_decompress(Bitstream *bitstream, OutputBuffer *output) { +  ASSERT(bitstream != NULL); +  ASSERT(output != NULL); + +  uint8_t b_final = 0; +  while (!b_final) { +    LOG0("Parse deflate block"); +    b_final = Bitstream_next_bits(bitstream, BFINAL_LENGTH_BITS); +    const uint8_t b_type = Bitstream_next_bits(bitstream, BTYPE_LENGTH_BITS); +    if (b_type == DeflateBlockType_NoCompression) { +      LOG0("Uncompressed deflate blocks aren't supported"); +      abort(); +    } else { +      if (b_type == DeflateBlockType_FixedHuffman) { +        static bool are_tables_blank = true; +        static LengthLiteralTable length_literal_table = {0}; +        static DistanceTable distance_table = {0}; + +        uint16_t lenlit_codelengths[FIXED_LENGTH_LITERAL_CODE_COUNT]; + +        if (are_tables_blank) { +          for (int symbol = 0; symbol < 144; symbol++) { +            lenlit_codelengths[symbol] = 8; +          } + +          for (int symbol = 144; symbol < 256; symbol++) { +            lenlit_codelengths[symbol] = 9; +          } + +          for (int symbol = 256; symbol < 280; symbol++) { +            lenlit_codelengths[symbol] = 7; +          } + +          for (int symbol = 280; symbol < FIXED_LENGTH_LITERAL_CODE_COUNT; +               symbol++) { +            lenlit_codelengths[symbol] = 8; +          } + +          build_huffman_table_from_codelengths( +              length_literal_table.symbols, length_literal_table.counts, +              lenlit_codelengths, FIXED_LENGTH_LITERAL_CODE_COUNT); + +          uint16_t distance_codelengths[MAX_DISTANCE_CODES] = {0}; +          for (int symbol = 0; symbol < MAX_DISTANCE_CODES; symbol++) { +            distance_codelengths[symbol] = 5; +          } + +          build_huffman_table_from_codelengths( +              distance_table.symbols, distance_table.counts, +              distance_codelengths, MAX_DISTANCE_CODES); +          are_tables_blank = false; +        } +        deflate_decompress_(bitstream, &length_literal_table, &distance_table, +                            output); +      } else { +        LengthLiteralTable length_literal_table = {0}; +        DistanceTable distance_table = {0}; +        const uint16_t hlit = Bitstream_next_bits(bitstream, 5) + 257; +        const uint8_t hdist = Bitstream_next_bits(bitstream, 5) + 1; +        const uint8_t hclen = Bitstream_next_bits(bitstream, 4) + 4; +        LOGN("Dynamic table:\n" +             "hlit: %d\n" +             "hdist: %d\n" +             "hclen: %d", +             hlit, hdist, hclen); +        CodeLengthTable codelength_table = {0}; +        build_codelength_table(&codelength_table, bitstream, hclen); + +        uint16_t lenlit_dist_codelengths[MAX_LENGTH_CODES + +                                         MAX_DISTANCE_CODES] = {0}; +        int index = 0; +        while (index < hlit + hdist) { +          int symbol = huffman_table_decode(codelength_table.symbols, +                                            codelength_table.counts, bitstream); +          if (symbol < 0) { +            LOG0("Unknown symbol decoded using huffman table"); +            return false; +          } else if (symbol < 16) { +            lenlit_dist_codelengths[index++] = symbol; +          } else { +            int repeat = 0; +            int codelength = 0; +            if (symbol == 16) { +              repeat = 3 + Bitstream_next_bits(bitstream, 2); +              codelength = lenlit_dist_codelengths[index - 1]; +            } else if (symbol == 17) { +              repeat = 3 + Bitstream_next_bits(bitstream, 3); +            } else { +              repeat = 11 + Bitstream_next_bits(bitstream, 7); +            } + +            for (int i = 0; i < repeat; i++) { +              lenlit_dist_codelengths[index++] = codelength; +            } +          } +        } + +        // The block must have an end +        ASSERT(lenlit_dist_codelengths[256] > 0); + +        build_huffman_table_from_codelengths(length_literal_table.symbols, +                                             length_literal_table.counts, +                                             lenlit_dist_codelengths, hlit); +        build_huffman_table_from_codelengths( +            distance_table.symbols, distance_table.counts, +            lenlit_dist_codelengths + hlit, hdist); +        deflate_decompress_(bitstream, &length_literal_table, &distance_table, +                            output); +      } +    } +  } +  return true; +} + +char *zlib_decompress(const uint8_t *compressed_data_buffer, +                      const size_t compressed_data_length) {    ASSERT(compressed_data_buffer != NULL);    Bitstream bitstream; @@ -37,20 +363,32 @@ bool zlib_decompress(const uint8_t *compressed_data_buffer,      return false;    } -  uint32_t dictionary_identifier = 0;    if (fdict != 0) { -    uint16_t dictionary_identifier_0 = Bitstream_next_bits(&bitstream, 16); -    uint16_t dictionary_identifier_1 = Bitstream_next_bits(&bitstream, 16); -    dictionary_identifier = -        (dictionary_identifier_0 << 0) + (dictionary_identifier_1 << 16); -    LOGN("Dictionary identifier: %d", dictionary_identifier); +    LOG0("preset dictionnaries are unsupported"); +    return NULL;    } -  if (!deflate_decompress(&bitstream)) { -    return false; +  OutputBuffer output; +  OutputBuffer_init(&output); +  if (!deflate_decompress(&bitstream, &output)) { +    LOG0("deflate decompression failed"); +    return NULL;    } -  // TODO adler32 checksum +  int adler32 = bitstream.data[bitstream.current_byte_index + 4] + +                (bitstream.data[bitstream.current_byte_index + 3] << 8) + +                (bitstream.data[bitstream.current_byte_index + 2] << 16) + +                (bitstream.data[bitstream.current_byte_index + 1] << 24); +  LOGN("Adler32 checksum: %d", adler32); -  return true; +  uint32_t a = 1; +  uint32_t b = 0; +  for (size_t i = 0; i < output.len; i++) { +    a = (a + (unsigned char)output.buffer[i]) % 65521; +    b = (b + a) % 65521; +  } +  int computed_adler32 = (b << 16) | a; +  LOGN("Computed Adler32 checksum: %d", computed_adler32); + +  return output.buffer;  } diff --git a/lisiblepng/src/deflate.h b/lisiblepng/src/deflate.h index e40f816..77100c6 100644 --- a/lisiblepng/src/deflate.h +++ b/lisiblepng/src/deflate.h @@ -5,7 +5,11 @@  #include <stdint.h>  #include <stdlib.h> -bool zlib_decompress(const uint8_t *compressed_data_buffer, -                     const size_t compressed_data_length); +/// Decompresses zlib compressed data as defined by RFC 1950 +/// +/// @return A pointer to the decompressed data, the caller is responsible to +/// deallocate it using free(). In case of error, NULL is returned. +char *zlib_decompress(const uint8_t *compressed_data_buffer, +                      const size_t compressed_data_length);  #endif // LISIBLE_PNG_DEFLATE_H diff --git a/lisiblepng/src/lisiblepng.c b/lisiblepng/src/lisiblepng.c index 3dfda24..5ccd300 100644 --- a/lisiblepng/src/lisiblepng.c +++ b/lisiblepng/src/lisiblepng.c @@ -76,6 +76,9 @@ void DeflateDecompressor_init(DeflateDecompressor *ctx, FILE *stream) {    ASSERT(ctx != NULL);    ASSERT(stream != NULL);    ctx->stream = stream; +#ifdef LISIBLE_PNG_COMPUTE_CRC +  ctx->computed_crc = 0xFFFFFFFFu; +#endif // LISIBLE_PNG_COMPUTE_CRC  }  void ParsingContext_crc_reset(DeflateDecompressor *ctx) {  | 
