xtool/contrib/grittibanzli/grittibanzli.cc

// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "grittibanzli.h"

#include <limits.h>

#ifdef GRITTIBANZLI_PRINT_DEBUG
#include <iostream>
#endif  // GRITTIBANZLI_PRINT_DEBUG
#include <algorithm>

namespace grittibanzli {

namespace {

#define UNUSED(x) (void)(x)

bool PrintError(int line) {
#ifdef GRITTIBANZLI_PRINT_DEBUG
  std::cout << "error on line: " << line << std::endl;
#else  // GRITTIBANZLI_PRINT_DEBUG
  UNUSED(line);
#endif  // GRITTIBANZLI_PRINT_DEBUG
  return false;
}

#define FAILURE PrintError(__LINE__)

// returns index of last element that is <= value, or 0 if none
int BinarySearch(int value, const std::vector<int>& values) {
  if (value > values.back()) return values.size() - 1;
  int result = std::lower_bound(values.begin(),
      values.end(), value) - values.begin();
  if (result > 0 && values[result] > value) result--;
  return result;
}

int PeekBit(const uint8_t* data, size_t size, size_t bitpos) {
  UNUSED(size);
  return (data[bitpos >> 3] >> (bitpos & 7)) & 1;
}

int ReadBit(const uint8_t* data, size_t size, size_t* bitpos) {
  int result = PeekBit(data, size, *bitpos);
  (*bitpos)++;
  return result;
}

int ReadBits(int num, const uint8_t* data, size_t size, size_t* bitpos) {
  int result = 0;
  for (int i = 0; i < num; i++) {
    result |= ReadBit(data, size, bitpos) << i;
  }
  return result;
}

int ReadBitsInv(int num, const uint8_t* data, size_t size, size_t* bitpos) {
  int result = 0;
  for (int i = 0; i < num; i++) {
    result = (result << 1) | ReadBit(data, size, bitpos);
  }
  return result;
}

int PeekBitsInvSafe(int num, const uint8_t* data, size_t size, size_t bitpos) {
  int safe_num = std::min<int>(num, (size << 3) - bitpos);
  return ReadBitsInv(safe_num, data, size, &bitpos) << (num - safe_num);
}

void AppendBit(int bit, std::vector<uint8_t>* data, size_t* bitpos) {
  int m = (*bitpos) & 7;
  if (m == 0) {
    data->push_back(0);
  }
  data->back() |= bit << m;
  (*bitpos)++;
}

void AppendBits(int bits, int num, std::vector<uint8_t>* data, size_t* bitpos) {
  for (int i = 0; i < num; i++) {
    AppendBit((bits >> i) & 1, data, bitpos);
  }
}

void AppendBitsInv(int bits, int num, std::vector<uint8_t>* data,
    size_t* bitpos) {
  for (int i = 0; i < num; i++) {
    AppendBit((bits >> (num - 1 - i)) & 1, data, bitpos);
  }
}

////////////////////////////////////////////////////////////////////////////////

struct BlockChoices {
  int type;

  // amount of LZ77 references or literals (for type 0, amount of bytes)
  int blocksize;

  std::vector<int> lengths;  // 1 means literal
  std::vector<int> dists;  // 0 means literal

  // huffman trees
  int hlit;
  int hdist;
  int hclen;
  std::vector<int> ht_lengths;  // code length code lengths
  std::vector<int> all_rle;
  std::vector<int> all_extra;

  // Blocks of btype 0 can have unspecified filler bits in the first byte, and
  // bfinal blocks of btype 1 or 2 can have unspecified filler bits in the last
  // byte. This are bits between a not-fully used byte and byte-aligned data.
  // The deflate spec does not require these to be zero but ignores them, so
  // their values must be remembered by grittibanzli to reproduce them
  // exactly.
  int filler_bits;
};

// choices of a single deflate stream
struct DeflateChoices {
  // The deflate stream itself, as blocks
  std::vector<BlockChoices> blocks;
};

////////////////////////////////////////////////////////////////////////////////

// Huffman coding

// Given huffman code lengths, gives the huffman symbol bits, as in the deflate
// specification
bool CodeLengthsToSymbols(const std::vector<int>& lengths, int maxbits,
    std::vector<int>* symbols) {
  std::vector<int> bl_count(maxbits + 1, 0);
  std::vector<int> next_code(maxbits + 1);
  int code;
  symbols->resize(lengths.size(), 0);

  // 1) Count the number of codes for each code length. Let bl_count[N] be the
  // number of codes of length N, N >= 1.
  for (size_t i = 0; i < lengths.size(); i++) {
    if (lengths[i] > maxbits) return FAILURE;
    bl_count[lengths[i]]++;
  }
  // 2) Find the numerical value of the smallest code for each code length.
  code = 0;
  bl_count[0] = 0;
  for (int bits = 1; bits <= maxbits; bits++) {
    code = (code + bl_count[bits - 1]) << 1;
    // Impossible huffman tree
    if (code + bl_count[bits] >= (1 << bits) + 1) return FAILURE;
    next_code[bits] = code;
  }
  // 3) Assign numerical values to all codes, using consecutive values for all
  // codes of the same length with the base values determined at step 2.
  for (size_t i = 0; i < lengths.size(); i++) {
    int len = lengths[i];
    if (len != 0) {
      (*symbols)[i] = next_code[len];
      next_code[len]++;
    }
  }

  return true;
}

static const int kRootBits = 8;

// Makes huffman table for decoding, based on "root" bits
bool DecodableHuffmanTree(const std::vector<int>& lengths,
    int maxbits, std::vector<std::pair<int, int>>* result) {
  std::vector<int> symbols;
  if (!CodeLengthsToSymbols(lengths, maxbits, &symbols)) {
    return FAILURE;
  }
  int rootnum = (1 << kRootBits);
  int rootmask = rootnum - 1;
  *result = std::vector<std::pair<int, int>>(rootnum, {-1, -1});

  // Longest symbol length for symbols with size > kRootBits
  std::vector<int> maxlengths(rootnum, -1);
  for (size_t i = 0; i < symbols.size(); i++) {
    if (lengths[i] > kRootBits) {
      int prefix = (symbols[i] >> (lengths[i] - kRootBits)) & rootmask;
      maxlengths[prefix] = std::max(maxlengths[prefix], lengths[i]);
    }
  }

  for (size_t i = 0; i < symbols.size(); i++) {
    if (lengths[i] == 0) {
      continue;
    } else if (lengths[i] == kRootBits) {
      // Symbol bits exactly matches table bits
      (*result)[symbols[i]] = {lengths[i], i};
    } else if (lengths[i] < kRootBits) {
      // Multiple root table entries with the same prefix for this symbol.
      int shift = (kRootBits - lengths[i]);
      int num = 1 << shift;
      for (int j = 0; j < num; j++) {
        int b = (symbols[i] << shift) + j;
        if (!(b <= rootmask)) return FAILURE;
        (*result)[b] = {lengths[i], i};
      }
    } else {
      // kRootBits is now just a prefix.
      int prefix = (symbols[i] >> (lengths[i] - kRootBits)) & rootmask;
      int maxlen = maxlengths[prefix];
      int num = 1 << (maxlen - kRootBits);
      int mask = num - 1;
      int pointer = (*result)[prefix].second;
      if (pointer == -1) {
        pointer = result->size() - prefix;
        result->resize(result->size() + num, {-1, -1});
        (*result)[prefix] = {maxlen, pointer};
      }
      int postfix = (symbols[i] << (maxlen - lengths[i])) & mask;
      int index = prefix + pointer + postfix;
      if (lengths[i] == maxlen) {
        // Symbol bits exactly match subtable bits
        (*result)[index] = {lengths[i], i};
      } else {
        if (!(lengths[i] < maxlen)) return FAILURE;
        int shift = (maxlen - lengths[i]);
        int num2 = 1 << shift;
        for (int j = 0; j < num2; j++) {
          (*result)[index + j] = {lengths[i], i};
        }
      }
    }
  }
  return true;
}


// Decodes a single huffman symbol. Returns -1 on error.
int HuffmanDecodeSymbol(const uint8_t* compressed, size_t size,
    const std::vector<std::pair<int, int>>& tree, size_t* bitpos) {
  int index = PeekBitsInvSafe(kRootBits, compressed, size, *bitpos);

  if (tree[index].first <= kRootBits) {
    *bitpos += tree[index].first;
    if (*bitpos > size * 8) return -1;
    return tree[index].second;
  }

  *bitpos += kRootBits;
  if (*bitpos > size * 8) return -1;
  int numbits = tree[index].first - kRootBits;

  int index2 = index + tree[index].second +
      PeekBitsInvSafe(numbits, compressed, size, *bitpos);
  // tree[index2].first is length, tree[index2].second is the value.
  *bitpos += (tree[index2].first - kRootBits);
  if (*bitpos > size * 8) return -1;
  return tree[index2].second;
}


////////////////////////////////////////////////////////////////////////////////

// base lengths for codes 257-285
static const std::vector<int> kLengthBase = {
    3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
    67, 83, 99, 115, 131, 163, 195, 227, 258};

// num extra bits for length codes 257-285
static const std::vector<int> kLengthExtra = {
    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5,
    5, 5, 5, 0};

// base distances for distance codes
static const std::vector<int> kDistanceBase = {
    1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513,
    769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577};

// num extra bits for distance codes
static const std::vector<int> kDistanceExtra = {
    0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10,
    11, 11, 12, 12, 13, 13};

// Order of code length alphabet code lengths
static const std::vector<int> kClClOrder = {
    16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};

void GetSymbol(int length, int distance, int* lengthsymbol, int* lengthextra,
    int* lengthextranum, int* distsymbol, int* distextra, int* distextranum) {
  *lengthsymbol = BinarySearch(length, kLengthBase);
  *lengthextra = length - kLengthBase[*lengthsymbol];
  *lengthextranum = kLengthExtra[*lengthsymbol];
  *distsymbol = BinarySearch(distance, kDistanceBase);
  *distextra = distance - kDistanceBase[*distsymbol];
  *distextranum = kDistanceExtra[*distsymbol];
}

////////////////////////////////////////////////////////////////////////////////

bool DeflateEncodeDynamicHuffmanHeader(
    const BlockChoices& block, size_t* bitpos, std::vector<uint8_t>* result) {
  std::vector<int> ht_lengths = block.ht_lengths;
  std::vector<int> ht_symbols;
  if (!CodeLengthsToSymbols(ht_lengths, 7, &ht_symbols)) {
    return FAILURE;
  }
  int hlit = block.hlit;
  int hdist = block.hdist;
  int hclen = block.hclen;

  std::vector<int> all_rle = block.all_rle;
  std::vector<int> all_extra = block.all_extra;

  AppendBits(hlit, 5, result, bitpos);
  AppendBits(hdist, 5, result, bitpos);
  AppendBits(hclen, 4, result, bitpos);
  for (int i = 0; i < hclen + 4; i++) {
    AppendBits(ht_lengths[kClClOrder[i]], 3, result, bitpos);
  }

  for (size_t i = 0; i < all_rle.size(); i++) {
    int s = all_rle[i];
    AppendBitsInv(ht_symbols[s], ht_lengths[s], result, bitpos);
    if (s == 16) AppendBits(all_extra[i], 2, result, bitpos);
    if (s == 17) AppendBits(all_extra[i], 3, result, bitpos);
    if (s == 18) AppendBits(all_extra[i], 7, result, bitpos);
  }

  return true;
}

void MakeFixedCodes(
    std::vector<int>* ll_lengths, std::vector<int>* dist_lengths) {
  for (int i = 0; i <= 143; i++) ll_lengths->push_back(8);
  for (int i = 144; i <= 255; i++) ll_lengths->push_back(9);
  for (int i = 256; i <= 279; i++) ll_lengths->push_back(7);
  for (int i = 280; i <= 287; i++) ll_lengths->push_back(8);
  for (int i = 0; i <= 31; i++) dist_lengths->push_back(5);
}

void MakeDynamicCodesFromBlock(const BlockChoices& block,
    std::vector<int>* ll_lengths, std::vector<int>* dist_lengths) {
  std::vector<int> all_lengths;
  for (size_t i = 0; i < block.all_rle.size(); i++) {
    int s = block.all_rle[i];
    int e = block.all_extra[i];
    if (s < 16) {
      all_lengths.push_back(s);
    } else {
      int rep = 0;
      if (s == 16) {
        rep = e + 3;
        s = all_lengths.empty() ? 0 : all_lengths.back();
      } else if (s == 17) {
        rep = e + 3;
        s = 0;
      } else if (s == 18) {
        rep = e + 11;
        s = 0;
      }
      for (int i = 0; i < rep; i++) all_lengths.push_back(s);
    }
    if (all_lengths.size() >=
        static_cast<size_t>(block.hlit + 257 + block.hdist + 1)) {
      break;
    }
  }
  ll_lengths->assign(
      all_lengths.begin(), all_lengths.begin() + block.hlit + 257);
  dist_lengths->assign(
      all_lengths.begin() + block.hlit + 257, all_lengths.end());
  ll_lengths->resize(288, 0);
  dist_lengths->resize(32, 0);
}


bool DeflateDecodeHuffmanHeader(
    const uint8_t* data, size_t size, size_t* bitpos,
    BlockChoices* blockchoices, std::vector<int>* ll_lengths,
    std::vector<int>* dist_lengths) {
  size_t data_bits = size * 8;
  if (*bitpos + 14 > data_bits) return FAILURE;
  int hlit = ReadBits(5, data, size, bitpos);
  int hdist = ReadBits(5, data, size, bitpos);
  int hclen = ReadBits(4, data, size, bitpos);
  std::vector<int> ht_lengths(19, 0);

  for (int i = 0; i < hclen + 4; i++) {
    if (*bitpos + 3 > data_bits) return FAILURE;
    ht_lengths[kClClOrder[i]] = ReadBits(3, data, size, bitpos);
  }
  std::vector<int> all_lengths;
  std::vector<std::pair<int, int>> ht_tree;
  if (!DecodableHuffmanTree(ht_lengths, 15, &ht_tree)) {
    return FAILURE;
  }
  for (;;) {
    int s = HuffmanDecodeSymbol(data, size, ht_tree, bitpos);
    if (s < 0) return FAILURE;
    blockchoices->all_rle.push_back(s);
    blockchoices->all_extra.push_back(0);
    if (s < 16) {
      all_lengths.push_back(s);
    } else {
      int rep = 0;
      if (s == 16) {
        if (*bitpos + 2 > data_bits) return FAILURE;
        rep = ReadBits(2, data, size, bitpos) + 3;
        blockchoices->all_extra.back() = rep - 3;
        s = all_lengths.empty() ? 0 : all_lengths.back();
      } else if (s == 17) {
        if (*bitpos + 3 > data_bits) return FAILURE;
        rep = ReadBits(3, data, size, bitpos) + 3;
        blockchoices->all_extra.back() = rep - 3;
        s = 0;
      } else if (s == 18) {
        if (*bitpos + 7 > data_bits) return FAILURE;
        rep = ReadBits(7, data, size, bitpos) + 11;
        blockchoices->all_extra.back() = rep - 11;
        s = 0;
      }
      for (int i = 0; i < rep; i++) all_lengths.push_back(s);
    }
    if (all_lengths.size() >= static_cast<size_t>(hlit + 257 + hdist + 1)) {
      break;
    }
  }

  blockchoices->hlit = hlit;
  blockchoices->hdist = hdist;
  blockchoices->hclen = hclen;
  blockchoices->ht_lengths = ht_lengths;
  ll_lengths->assign(all_lengths.begin(), all_lengths.begin() + hlit + 257);
  dist_lengths->assign(all_lengths.begin() + hlit + 257, all_lengths.end());
  return true;
}

bool DeflateDecodeHuffmanHeader(const uint8_t* data, size_t size,
    size_t* bitpos, BlockChoices* blockchoices) {
  std::vector<int> dummy_ll, dummy_dist;
  return DeflateDecodeHuffmanHeader(data, size, bitpos, blockchoices,
                                    &dummy_ll, &dummy_dist);
}

bool DeflateDecode(const uint8_t* deflated, size_t size,
                   std::vector<uint8_t>* result, DeflateChoices* choices) {
  size_t bitpos = 0;
  size_t deflated_bits = size * 8;

  bool bfinal = false;
  while (!bfinal) {
    if (bitpos + 3 > deflated_bits) return FAILURE;
    bfinal = ReadBits(1, deflated, size, &bitpos);
    int btype = ReadBits(2, deflated, size, &bitpos);
    if (btype < 0 || btype > 2) return FAILURE;

    choices->blocks.resize(choices->blocks.size() + 1);
    BlockChoices* blockchoices = &choices->blocks.back();
    blockchoices->type = btype;
    blockchoices->filler_bits = 0;

    if (btype == 0) {
      size_t pos = (bitpos & 7) ? (bitpos >> 3) + 1 : (bitpos >> 3);
      if (pos + 4 > size) return FAILURE;

      int bits_in_byte = (bitpos & 7);
      if (bits_in_byte != 0) {
        blockchoices->filler_bits = deflated[bitpos >> 3] >> bits_in_byte;
      }

      int len = deflated[pos] + 256 * deflated[pos + 1];
      pos += 2;
      int nlen = deflated[pos] + 256 * deflated[pos + 1];
      pos += 2;
      if (pos + len > size) return FAILURE;
      if (len != 65535 - nlen) return FAILURE;

      for (int i = 0; i < len; i++) result->push_back(deflated[pos++]);

      blockchoices->blocksize = len;
      bitpos = pos * 8;
    } else {
      // huffman trees
      std::vector<int> ll_lengths;
      std::vector<int> dist_lengths;

      if (btype == 1) {
        MakeFixedCodes(&ll_lengths, &dist_lengths);
      } else {
        if (!DeflateDecodeHuffmanHeader(deflated, size, &bitpos, blockchoices,
            &ll_lengths, &dist_lengths)) {
          return FAILURE;
        }
      }
      std::vector<std::pair<int, int>> ll_tree;
      std::vector<std::pair<int, int>> dist_tree;
      if (!DecodableHuffmanTree(ll_lengths, 15, &ll_tree)) {
        return FAILURE;
      }
      if (!DecodableHuffmanTree(dist_lengths, 15, &dist_tree)) {
        return FAILURE;
      }

      for (;;) {
        int s = HuffmanDecodeSymbol(deflated, size, ll_tree, &bitpos);
        if (s < 0 || s > 285) return FAILURE;
        if (s == 256) {
          break;
        } else if (s > 256) {
          if (bitpos + kLengthExtra[s - 257] > deflated_bits) {
            return FAILURE;
          }
          int ll_extra =
              ReadBits(kLengthExtra[s - 257], deflated, size, &bitpos);
          int d = HuffmanDecodeSymbol(deflated, size, dist_tree, &bitpos);
          if (d < 0 || d > 29) return FAILURE;
          if (bitpos + kDistanceExtra[d] > deflated_bits) {
            return FAILURE;
          }
          int dist_extra = ReadBits(kDistanceExtra[d], deflated, size, &bitpos);
          int length = kLengthBase[s - 257] + ll_extra;
          if (s == 284 && ll_extra == 31) {
            // There are two ways to make length 258: with symbol 285, or with
            // symbol 284 and extra bits set to 31. Assume that using symbol
            // 284 is invalid according to the deflate spec, so treat as error.
            // If we would support this, we would need to output one more
            // choice byte on length 258 to indicate the representation.
            return FAILURE;
          }
          int dist = kDistanceBase[d] + dist_extra;
          if (static_cast<size_t>(dist) > result->size()) {
            return FAILURE;
          }
          for (int i = 0; i < length; i++) {
            result->push_back((*result)[result->size() - dist]);
          }
          blockchoices->lengths.push_back(length);
          blockchoices->dists.push_back(dist);
        } else {
          result->push_back(s);
          blockchoices->lengths.push_back(1);
          blockchoices->dists.push_back(0);
        }
      }
      blockchoices->blocksize = blockchoices->lengths.size();
    }
    if (bfinal) {
      int bits_in_byte = (bitpos & 7);
      if (bits_in_byte != 0) {
        blockchoices->filler_bits = deflated[bitpos >> 3] >> bits_in_byte;
      }
    }
  }
  // Deflate streams that have garbage bytes after the valid stream are not
  // supported.
  if (((bitpos + 7) >> 3) != size) return FAILURE;
  // Larger than 32 bit sizes not yet supported
  if (result->size() > 0xffffffff) return FAILURE;
  return true;
}

bool DeflateEncode(const uint8_t* data, size_t size,
    const DeflateChoices& choices, std::vector<uint8_t>* result) {
  size_t bitpos = 0;  // in result
  size_t pos = 0;  // in uncompressed data
  for (size_t b = 0; b < choices.blocks.size(); b++) {
    const BlockChoices& block = choices.blocks[b];
    const int blocksize = block.blocksize;
    int btype = block.type;
    int bfinal = (b + 1) == choices.blocks.size();
    if (btype < 0 || btype > 2) return FAILURE;

    AppendBits(bfinal, 1, result, &bitpos);
    AppendBits(btype, 2, result, &bitpos);

    if (btype == 0) {
      if (blocksize >= 65536) return FAILURE;
      if (pos + blocksize > size) return FAILURE;

      int bits_in_byte = (bitpos & 7);
      if (bits_in_byte != 0) {
        result->back() |= (block.filler_bits << bits_in_byte);
      }

      result->push_back(blocksize & 255);
      result->push_back((blocksize >> 8) & 255);
      result->push_back((65535 - blocksize) & 255);
      result->push_back(((65535 - blocksize) >> 8) & 255);
      for (int i = 0; i < blocksize; i++) {
        result->push_back(data[pos + i]);
      }
      bitpos = result->size() * 8;
      pos += block.blocksize;
    } else {
      std::vector<int> literals;
      std::vector<int> lengths;
      std::vector<int> distances;
      for (size_t i = 0; i < block.lengths.size(); i++) {
        if (block.lengths[i] > 1) {
          lengths.push_back(block.lengths[i]);
          distances.push_back(block.dists[i]);
          literals.push_back(INT_MAX);
        } else {
          if (pos >= size) return FAILURE;
          lengths.push_back(0);
          distances.push_back(0);
          literals.push_back(data[pos]);
        }
        pos += block.lengths[i];
      }
      if (lengths.size() != (size_t)blocksize) return FAILURE;

      // huffman trees
      std::vector<int> ll_lengths, ll_symbols;
      std::vector<int> dist_lengths, dist_symbols;

      if (btype == 1) {
        MakeFixedCodes(&ll_lengths, &dist_lengths);
      } else {
        // dynamic huffman codes
        if (!DeflateEncodeDynamicHuffmanHeader(block, &bitpos, result)) {
          return FAILURE;
        }
        MakeDynamicCodesFromBlock(block, &ll_lengths, &dist_lengths);
      }
      if (!CodeLengthsToSymbols(ll_lengths, 15, &ll_symbols)) {
        return FAILURE;
      }
      if (!CodeLengthsToSymbols(dist_lengths, 15, &dist_symbols)) {
        return FAILURE;
      }
      for (size_t i = 0; i < distances.size(); i++) {
        if (distances[i]) {
          int lengthsymbol, lengthextra, lengthextranum;
          int distsymbol, distextra, distextranum;
          GetSymbol(lengths[i], distances[i], &lengthsymbol, &lengthextra,
                   &lengthextranum, &distsymbol, &distextra, &distextranum);
          int lengthindex = lengthsymbol + 257;
          if (ll_lengths[lengthindex] == 0) return FAILURE;
          AppendBitsInv(ll_symbols[lengthindex],
                        ll_lengths[lengthindex], result, &bitpos);
          AppendBits(lengthextra, lengthextranum, result, &bitpos);
          if (dist_lengths[distsymbol] == 0) return FAILURE;
          AppendBitsInv(dist_symbols[distsymbol], dist_lengths[distsymbol],
                        result, &bitpos);
          AppendBits(distextra, distextranum, result, &bitpos);
        } else {
          if (ll_lengths[literals[i]] == 0) return FAILURE;
          AppendBitsInv(ll_symbols[literals[i]], ll_lengths[literals[i]],
                        result, &bitpos);
        }
      }
      // end symbol
      if (ll_lengths[256] == 0) return FAILURE;
      AppendBitsInv(ll_symbols[256], ll_lengths[256], result, &bitpos);

      if (bfinal) {
        int bits_in_byte = (bitpos & 7);
        if (bits_in_byte != 0) {
          result->back() |= (block.filler_bits << bits_in_byte);
        }
      }
    }
  }

  return true;
}

////////////////////////////////////////////////////////////////////////////////


// Appends a at the end of result
void AppendTo(const std::vector<uint8_t>& a, std::vector<uint8_t>* result) {
  result->insert(result->end(), a.begin(), a.end());
}

// Written with varint to be future-proof to support 64-bit values later
void Write32Bit(uint32_t value, std::vector<uint8_t>* data) {
  for (;;) {
    uint8_t byte = (value & 127);
    if (value > 127) byte |= 128;
    data->push_back(byte);
    value >>= 7u;
    if (!value) return;
  }
}

bool Read32Bit(const uint8_t* data, size_t size, size_t* pos, uint32_t* value) {
  int num = 0;
  *value = 0;
  for (;;) {
    if (*pos >= size) return FAILURE;
    uint8_t byte = data[(*pos)++];
    if (num > 4 || (num == 4 && byte > 15)) {
      return FAILURE;  // 32-bit overflow
    }
    *value |= (uint32_t)(byte & 127) << (num * 7);
    if (byte < 128) return true;  // success
    num++;
  }
}

bool Read32Bit(const uint8_t* data, size_t size, size_t* pos, int* value) {
  uint32_t value32;
  Read32Bit(data, size, pos, &value32);
  if (value32 > 0x7fffffff) return FAILURE;
  *value = static_cast<int>(value32);
  return true;
}

static const int kMinDeflateLength = 3;
static const int kMaxDeflateLength = 258;
// For speed: in FindLongestMatch, we only need to know if there is one or not.
static const int kMaxLongestLength = 3;
static const int kWindowSize = 32768;
static const int kWindowMask = (kWindowSize - 1);


// hash chain
static const unsigned kHashBits = 15;
static const unsigned kHashNumValues = 1 << kHashBits;
static const unsigned kHashBitMask = kHashNumValues - 1;
static const unsigned kHashShift = 5;

struct HashChain {
  int* head;
  uint16_t* chain;
  int* val;

  int undo_head = 0;
  uint16_t undo_chain = 0;
  int undo_val = 0;

  // Speed up repetitions of zero
  int* headz;
  uint16_t* chainz;
  uint16_t* zeros;
  uint32_t numzeros = 0;

  int undo_headz = 0;
  uint16_t undo_chainz = 0;
  int undo_zeros = 0;
  uint32_t undo_numzeros = 0;

  HashChain() {
    this->head = (int*)malloc(sizeof(int) * kHashNumValues);
    this->val = (int*)malloc(sizeof(int) * kWindowSize);
    this->chain = (uint16_t*)malloc(sizeof(uint16_t) * kWindowSize);

    for (uint32_t i = 0; i < kHashNumValues; ++i) {
      this->head[i] = -1;
    }
    for (uint32_t i = 0; i < kWindowSize; ++i) {
      this->val[i] = -1;
      this->chain[i] = i;  // same value as index indicates uninitialized
    }

    this->zeros = (uint16_t*)malloc(sizeof(uint16_t) * kWindowSize);
    this->headz = (int*)malloc(sizeof(int) * (kWindowSize + 1));
    this->chainz =
        (uint16_t*)malloc(sizeof(uint16_t) * kWindowSize);

    for (uint32_t i = 0; i < kHashNumValues; ++i) {
      this->headz[i] = -1;
    }
    for (uint32_t i = 0; i < kWindowSize; ++i) {
      this->chainz[i] = i;
    }
  }

  ~HashChain() {
    free(this->head);
    free(this->val);
    free(this->chain);

    free(this->headz);
    free(this->zeros);
    free(this->chainz);
  }
};

uint32_t GetHash(const uint8_t* data, size_t size, size_t pos) {
  uint32_t result = 0;
  if (pos + 2 < size) {
    result ^= (uint32_t)(data[pos + 0] << 0u);
    result ^= (uint32_t)(data[pos + 1] << kHashShift);
    result ^= (uint32_t)(data[pos + 2] << (kHashShift * 2));
  } else {
    size_t amount, i;
    if (pos >= size) return 0;
    amount = size - pos;
    for (i = 0; i < amount; ++i) {
      result ^= (uint32_t)(data[pos + i] << (i * kHashShift));
    }
  }
  return result & kHashBitMask;
}

uint32_t CountZeros(const uint8_t* data, size_t size, size_t pos,
                    uint32_t prevzeros) {
  size_t end = pos + kWindowSize;
  if (end > size) end = size;
  if (prevzeros > 0) {
    if (prevzeros >= kWindowMask && data[end - 1] == 0)  {
      return prevzeros;
    } else {
      return prevzeros - 1;
    }
  }
  uint32_t num = 0;
  while (pos + num < end && data[pos + num] == 0) num++;
  return num;
}

// wpos = pos & kWindowMask
void UpdateHashChain(const uint8_t* data, size_t size, size_t pos,
                     HashChain* hash) {
  uint32_t hashval = GetHash(data, size, pos);
  uint32_t wpos = pos & kWindowMask;

  hash->undo_val = hash->val[wpos];
  hash->undo_chain = hash->chain[wpos];
  hash->undo_head = hash->head[hashval];

  hash->val[wpos] = (int)hashval;
  if (hash->head[hashval] != -1) hash->chain[wpos] = hash->head[hashval];
  hash->head[hashval] = wpos;

  uint32_t numzeros = CountZeros(data, size, pos, hash->numzeros);
  hash->undo_zeros = hash->zeros[wpos];
  hash->undo_chainz = hash->chainz[wpos];
  hash->undo_headz = hash->headz[numzeros];
  hash->undo_numzeros = hash->numzeros;

  hash->zeros[wpos] = numzeros;
  if (hash->headz[numzeros] != -1) hash->chainz[wpos] = hash->headz[numzeros];
  hash->headz[numzeros] = wpos;
  hash->numzeros = numzeros;
}


void UndoUpdateHashChain(const uint8_t* data, size_t size, size_t pos,
                         HashChain* hash) {
  uint32_t hashval = GetHash(data, size, pos);
  uint32_t wpos = pos & kWindowMask;

  hash->val[wpos] = hash->undo_val;
  hash->chain[wpos] = hash->undo_chain;
  hash->head[hashval] = hash->undo_head;

  uint32_t numzeros = hash->numzeros;
  hash->zeros[wpos] = hash->undo_zeros;
  hash->chainz[wpos] = hash->undo_chainz;
  hash->headz[numzeros] = hash->undo_headz;
  hash->numzeros = hash->undo_numzeros;
}

////////////////////////////////////////////////////////////////////////////////

// The prediction tries to predict the LZ77 lengths and distances. For the
// length, it predicts the longest lazy match and dynamically adjusts. For the
// distance, it predicts the shortest possible distance for that length.
struct Predictor {
  Predictor(const uint8_t* data, size_t size) : data(data), size(size) {
  }

  bool PredictLength(int* pred_len, int* pred_dist, int* longest_possible) {
    if (byte >= size) return FAILURE;
    UpdateHashChain(data, size, byte, &chain);
    UpdateHashChain(data, size, byte, &chain_longest);
    int dummy_dist = 0;

    if (static_cast<size_t>(lazy_stored) == byte) {
      *pred_len = lazy_len;
      *pred_dist = lazy_dist;
      *longest_possible = lazy_longest_possible;
    } else {
      FindMatch(data, size, byte, kWindowSize,
                kMinDeflateLength, kMaxDeflateLength,
                maxchainlength, &chain, pred_dist, pred_len);
      FindLongestMatch(data, size, byte, kWindowSize,
                       kMinDeflateLength, kMaxLongestLength,
                       &chain_longest, &dummy_dist, longest_possible);
    }

    lazy_stored = 0;
    lazy_prev = false;
    if (*pred_len >= kMinDeflateLength &&
        byte + 1 < size && *pred_len < maxlazymatch) {
      UpdateHashChain(data, size, byte + 1, &chain);
      UpdateHashChain(data, size, byte + 1, &chain_longest);
      int maxchainlen = maxchainlength;
      if (*pred_len > good_length) maxchainlen >>= 2;
      FindMatch(data, size, byte + 1, kWindowSize,
                kMinDeflateLength, kMaxDeflateLength,
                maxchainlen, &chain,
                &lazy_dist, &lazy_len);
      FindLongestMatch(data, size, byte + 1, kWindowSize,
                       kMinDeflateLength, kMaxLongestLength,
                       &chain_longest,
                       &dummy_dist, &lazy_longest_possible);
      UndoUpdateHashChain(data, size, byte + 1, &chain);
      UndoUpdateHashChain(data, size, byte + 1, &chain_longest);
      if (lazy_len > *pred_len) {
        *pred_len = 1;
        *pred_dist = 0;
        lazy_stored = byte + 1;
        lazy_prev = true;
      }
    }

    if (*pred_len == kMinDeflateLength && *pred_dist > toofar) {
      *pred_len = 1;
      *pred_dist = 0;
    }

    if (*pred_len < 0 || *pred_len > kMaxDeflateLength || *pred_dist < 0
        || *pred_dist > kWindowSize || static_cast<size_t>(*pred_dist) > byte) {
      return FAILURE;
    }

    return true;
  }

  // use only if actual_len >= kMinDeflateLength. prev_dist is a previously
  // predicted dist, for predicting a next one
  void PredictDist(int actual_len, int* pred_dist, int prev_dist = 0) {
    FindShortestDistForLength(data, size, byte, kWindowSize,
                              kMinDeflateLength, kMaxDeflateLength, &chain,
                              actual_len, pred_dist, prev_dist);
  }

  void Update(int actual_len, int /*pred_len*/) {
    if (actual_len <= max_insert_length) {
      for (int i = 1; i < actual_len; i++) {
        UpdateHashChain(data, size, byte + i, &chain);
      }
    }
    for (int i = 1; i < actual_len; i++) {
      UpdateHashChain(data, size, byte + i, &chain_longest);
    }

    byte += actual_len;
  }

  // skip uncompresed bytes, but still update the hash chain
  void SkipBytes(int num) {
    if (num < kWindowSize) {
      for (int i = 0; i < num; i++) {
        UpdateHashChain(data, size, byte + i, &chain);
      }
      byte += num;
    } else {
      byte = byte + num - kWindowSize;
      for (int i = 0; i < kWindowSize; i++) {
        UpdateHashChain(data, size, byte + i, &chain);
      }
      byte += kWindowSize;
    }
  }

  void SetZlibLevel(int level) {
    int data[40] = {
      0, 0, 0, 0,
      4, 4, 8, 4,  // 1
      4, 5, 16, 8,  // 2
      4, 6, 32, 32,  // 3
      4, 4, 16, 16,  // 4
      8, 16, 32, 32,  // 5
      8, 16, 128, 128,  // 6
      8, 32, 128, 256,  // 7
      32, 128, 258, 1024,  // 8
      32, 258, 258, 4096  // 9
    };
    if (level >= 1 && level <= 9) {
      good_length = data[level * 4 + 0];
      maxlazymatch = data[level * 4 + 1];
      nice_length = data[level * 4 + 2];
      maxchainlength = data[level * 4 + 3];
      // emulate fast also
      if (level < 4) {
        max_insert_length = maxlazymatch;
        maxlazymatch = 0;
        toofar = 32768;
      }
    }
    // everything at the maximum
    if (level == 10) {
      good_length = kMaxDeflateLength;
      maxlazymatch = kMaxDeflateLength;
      nice_length = kMaxDeflateLength;
      maxchainlength = kWindowSize;
      max_insert_length = kMaxDeflateLength;
      toofar = 4096;
    }
  }

  void FindShortestDistForLength(const uint8_t* data, size_t size, size_t pos,
                               int max_dist, int /*min_len*/, int max_len,
                               HashChain* chain, int actual_len,
                               int* result_dist, int prev_result_dist) {
    size_t pos2 = pos - prev_result_dist;
    uint32_t wpos = pos & kWindowMask;
    uint32_t wpos2 = pos2 & kWindowMask;
    uint32_t hashval = GetHash(data, size, pos);
    uint32_t hashpos = chain->chain[wpos2];

    int prev_dist = prev_result_dist;
    int end = std::min<int>(pos + max_len, size);
    max_dist = std::min<int>(max_dist, pos);
    *result_dist = 0;

    for (;;) {
      int dist = (hashpos <= wpos) ?
          (wpos - hashpos) : (wpos - hashpos + kWindowMask + 1);
      // went completely around the circular buffer
      if (dist < prev_dist) break;
      prev_dist = dist;

      int len = 0;
      if (dist > 0) {
        int i = pos;
        int j = pos - dist;
        if (chain->numzeros > 3) {
          int r = std::min<int>(chain->numzeros, chain->zeros[hashpos]);
          i += r;
          j += r;
          len += r;
        }
        while (i < end && data[i] == data[j]) {
          i++;
          j++;
          len++;
        }
        if (len >= actual_len) {
          *result_dist = dist;
          return;
        }
      }

      if (chain->numzeros >= 3 && len > static_cast<int>(chain->numzeros)) {
        if (hashpos == chain->chainz[hashpos]) break;
        hashpos = chain->chainz[hashpos];
        if (chain->zeros[hashpos] != chain->numzeros) break;
      } else {
        if (hashpos == chain->chain[hashpos]) break;
        hashpos = chain->chain[hashpos];
        if (chain->val[hashpos] != (int)hashval) break;  // outdated hash value
      }
    }
  }

  // Finds longest LZ77 match as length, distance pair. Emulates the concept of
  // max chain length for the particular hash used, but not with speedup in mind
  // but to emulate zlib for better prediction. It will continue searching
  // without max chain length and store the longest theoretically possible
  // length in longest_possible.
  void FindMatch(const uint8_t* data, size_t size, size_t pos, int max_dist,
      int min_len, int max_len, uint32_t maxchainlength, HashChain* chain,
      int* result_dist, int* result_len) {
    uint32_t wpos = pos & kWindowMask;
    uint32_t hashval = GetHash(data, size, pos);
    uint32_t hashpos = chain->chain[wpos];

    int prev_dist = 0;
    int end = std::min<int>(pos + max_len, size);
    max_dist = std::min<int>(max_dist, pos);
    *result_len = 1;
    *result_dist = 0;

    uint32_t chainlength = 0;

    for (;;) {
      int dist = (hashpos <= wpos) ?
          (wpos - hashpos) : (wpos - hashpos + kWindowMask + 1);
      // went completely around the circular buffer
      if (dist < prev_dist) break;
      prev_dist = dist;
      int len = 0;
      if (dist > 0) {
        int i = pos;
        int j = pos - dist;
        if (chain->numzeros > 3) {
          int r = std::min<int>(chain->numzeros, chain->zeros[hashpos]);
          i += r;
          j += r;
          len += r;
        }
        if (len > max_len) len = max_len;
        while (i < end && data[i] == data[j]) {
          i++;
          j++;
          len++;
        }
        if (len >= min_len && len > *result_len) {
          *result_len = len;
          *result_dist = dist;
          if (len >= nice_length) break;
        }
      }

      chainlength++;
      if (chainlength >= maxchainlength) break;

      if (chain->numzeros >= 3 && len > static_cast<int>(chain->numzeros)) {
        if (hashpos == chain->chainz[hashpos]) break;
        hashpos = chain->chainz[hashpos];
        if (chain->zeros[hashpos] != chain->numzeros) break;
      } else {
        if (hashpos == chain->chain[hashpos]) break;
        hashpos = chain->chain[hashpos];
        if (chain->val[hashpos] != (int)hashval) break;  // outdated hash value
      }
    }
  }


  void FindLongestMatch(const uint8_t* data, size_t size, size_t pos,
      int max_dist, int min_len, int max_len, HashChain* chain,
      int* result_dist, int* result_len) {
    uint32_t wpos = pos & kWindowMask;
    uint32_t hashval = GetHash(data, size, pos);
    uint32_t hashpos = chain->chain[wpos];

    int prev_dist = 0;
    int end = std::min<int>(pos + max_len, size);
    max_dist = std::min<int>(max_dist, pos);
    *result_len = 1;
    *result_dist = 0;

    for (;;) {
      int dist = (hashpos <= wpos) ?
          (wpos - hashpos) : (wpos - hashpos + kWindowMask + 1);
      // went completely around the circular buffer
      if (dist < prev_dist) break;
      prev_dist = dist;
      int len = 0;
      if (dist > 0) {
        int i = pos;
        int j = pos - dist;
        if (chain->numzeros > 3) {
          int r = std::min<int>(chain->numzeros, chain->zeros[hashpos]);
          i += r;
          j += r;
          len += r;
        }
        if (len > max_len) len = max_len;
        while (i < end && data[i] == data[j]) {
          i++;
          j++;
          len++;
        }
        if (len >= min_len && len > *result_len) {
          *result_len = len;
          *result_dist = dist;
          if (len >= max_len) break;
        }
      }

      if (chain->numzeros >= 3 && len > static_cast<int>(chain->numzeros)) {
        if (hashpos == chain->chainz[hashpos]) break;
        hashpos = chain->chainz[hashpos];
        if (chain->zeros[hashpos] != chain->numzeros) break;
      } else {
        if (hashpos == chain->chain[hashpos]) break;
        hashpos = chain->chain[hashpos];
        if (chain->val[hashpos] != (int)hashval) break;  // outdated hash value
      }
    }
  }

  size_t byte = 0;  // byte position

  const uint8_t* data;
  size_t size;
  HashChain chain;
  HashChain chain_longest;

  // set to 0 to disable lazy matching, max value is kMaxDeflateLength.
  int maxlazymatch = kMaxDeflateLength;
  // set to kMaxDeflateLength to do regular encoding that finds shortest dist,
  // or less (4, 5 or 6) to do like the fast modes of zlib do (= don't update
  // the hash chain if length longer than this).
  int max_insert_length  = kMaxDeflateLength;
  uint32_t maxchainlength = kWindowSize;  // kWindowSize to allow all
  int good_length = kMaxDeflateLength;
  int nice_length = kMaxDeflateLength;

  int lazy_len = 0;
  int lazy_dist = 0;
  int lazy_longest_possible = 0;
  int lazy_stored = -1;
  bool lazy_prev = false;
  int toofar = 4096;  // see official deflate.c "TOO_FAR"
};

// The encoded choices as separate stream, each with different entropy
struct ChoicesEncoded {
  std::vector<uint8_t> headers;
  std::vector<uint8_t> lencodes;
  std::vector<uint8_t> distcodes;
  std::vector<uint8_t> lenextra;
  std::vector<uint8_t> distextra;
};


std::vector<uint8_t> Combine(const ChoicesEncoded& encoded) {
  std::vector<uint8_t> result;
  Write32Bit(encoded.headers.size(), &result);
  AppendTo(encoded.headers, &result);
  Write32Bit(encoded.lencodes.size(), &result);
  AppendTo(encoded.lencodes, &result);
  Write32Bit(encoded.distcodes.size(), &result);
  AppendTo(encoded.distcodes, &result);
  Write32Bit(encoded.lenextra.size(), &result);
  AppendTo(encoded.lenextra, &result);
  Write32Bit(encoded.distextra.size(), &result);
  AppendTo(encoded.distextra, &result);
  return result;
}

bool Split(const uint8_t* data, size_t size, ChoicesEncoded* result) {
  size_t pos = 0;
  uint32_t subsize;

  if (!Read32Bit(data, size, &pos, &subsize)) return FAILURE;
  if (pos + subsize > size) return FAILURE;
  result->headers.assign(data + pos, data + pos + subsize);
  pos += subsize;

  if (!Read32Bit(data, size, &pos, &subsize)) return FAILURE;
  if (pos + subsize > size) return FAILURE;
  result->lencodes.assign(data + pos, data + pos + subsize);
  pos += subsize;

  if (!Read32Bit(data, size, &pos, &subsize)) return FAILURE;
  if (pos + subsize > size) return FAILURE;
  result->distcodes.assign(data + pos, data + pos + subsize);
  pos += subsize;

  if (!Read32Bit(data, size, &pos, &subsize)) return FAILURE;
  if (pos + subsize > size) return FAILURE;
  result->lenextra.assign(data + pos, data + pos + subsize);
  pos += subsize;

  if (!Read32Bit(data, size, &pos, &subsize)) return FAILURE;
  if (pos + subsize > size) return FAILURE;
  result->distextra.assign(data + pos, data + pos + subsize);
  pos += subsize;

  return true;
}

static const int kMaxDistTries = 16;

int GuessZlibLevel(const uint8_t* data, size_t size,
                   const DeflateChoices& stream) {
  int bestlevel = 1;
  int bestcorrect = 0;
  int maxpredictions = 65536;

  for (int level = 1; level <= 10; level++) {
    int numcorrect = 0;
    int numdone = 0;
    Predictor predictor(data, size);
    predictor.SetZlibLevel(level);
    for (size_t i = 0; i < stream.blocks.size(); i++) {
      const BlockChoices& block = stream.blocks[i];
      if (block.type == 0) continue;
      for (size_t j = 0; j < block.lengths.size(); j++) {
        int actual_dist = block.dists[j];
        int actual_len = block.lengths[j];

        int pred_len, pred_dist, longest_possible;
        if (!predictor.PredictLength(
            &pred_len, &pred_dist, &longest_possible)) {
          return 0;
        }
        predictor.Update(actual_len, pred_len);
        if (pred_len == actual_len && pred_dist == actual_dist) numcorrect++;
        numdone++;
        if (numdone > maxpredictions) break;
      }
      if (numdone > maxpredictions) break;
    }

    if (numcorrect > bestcorrect) {
      bestlevel = level;
      bestcorrect = numcorrect;
    }
  }
  return bestlevel;
}

bool EncodeChoices(const uint8_t* data, size_t size,
                   const DeflateChoices& choices,
                   ChoicesEncoded* encoded) {
  int level = GuessZlibLevel(data, size, choices);
  if (level == 0) return FAILURE;
  encoded->headers.push_back(level);
  Write32Bit(choices.blocks.size(), &encoded->headers);
  if (choices.blocks.empty()) return true;
  Predictor predictor(data, size);
  predictor.SetZlibLevel(level);
  for (size_t i = 0; i < choices.blocks.size(); i++) {
    bool bfinal = (i + 1) == choices.blocks.size();
    const BlockChoices& block = choices.blocks[i];

    encoded->headers.push_back(block.type);
    Write32Bit(block.blocksize, &encoded->headers);
    if (bfinal || block.type == 0) {
      encoded->headers.push_back(block.filler_bits);
    }
    size_t bitpos = encoded->headers.size() * 8;
    if (block.type == 2) {
      if (!DeflateEncodeDynamicHuffmanHeader(
          block, &bitpos, &encoded->headers)) {
        return FAILURE;
      }
    }
    if (block.type == 0) {
      predictor.SkipBytes(block.blocksize);
      continue;
    }
    for (size_t j = 0; j < block.lengths.size(); j++) {
      int actual_dist = block.dists[j];
      int actual_len = block.lengths[j];

      int pred_len, pred_dist, longest_possible;
      if (!predictor.PredictLength(
          &pred_len, &pred_dist, &longest_possible)) {
        return FAILURE;
      }
      int encoded_len = 0;

      if (longest_possible >= kMinDeflateLength) {
        // encoded_len meaning:
        // 0: prediction correct
        // 1: actual len is 1
        // [2..pred_len-1]: actual len is encoded_len
        // [pred_len..254]: actual len is pred_len - encoded_len + 1
        // 255: actual len encoded exactly in lenextra

        if (pred_len == actual_len) {
          encoded_len = 0;
        } else if (actual_len == 1) {
          encoded_len = 1;
        } else {
          encoded_len = (actual_len > pred_len)
            ? actual_len : (pred_len - actual_len + 1);
          if (encoded_len < 2 || encoded_len > 254) encoded_len = 255;
        }

        encoded->lencodes.push_back(encoded_len);
        if (encoded_len == 255) {
          encoded->lenextra.push_back(actual_len - kMinDeflateLength);
        }
      }

      if (actual_len > 1) {
        // only relevant for predicting low qualities, which use unoptimal
        // dists. The higher, the slower. In high qualities, dist always
        // predicted correctly so tries never increments and it's fast.
        if (pred_len != actual_len) {
          predictor.PredictDist(actual_len, &pred_dist, 0);
        }
        int tries = 0;
        while (tries < kMaxDistTries && pred_dist != actual_dist) {
          predictor.PredictDist(actual_len, &pred_dist, pred_dist);
          tries++;
        }
        // encoded_dist meaning:
        // 0: prediction correct
        // [1..kMaxDistTries-1]: hop this many times to next possible dists
        // kMaxDistTries: actual dist encoded in 2 bytes of distextra
        int encoded_dist = (tries < kMaxDistTries) ? tries : kMaxDistTries;
        encoded->distcodes.push_back(encoded_dist);
        if (encoded_dist == kMaxDistTries) {
          encoded->distextra.push_back((actual_dist - 1) & 255);
          encoded->distextra.push_back(((actual_dist - 1) >> 8) & 255);
        }
      }

      predictor.Update(actual_len, pred_len);
    }
  }

  return true;
}

bool DecodeChoices(const uint8_t* data, size_t size,
                   const ChoicesEncoded& encoded,
                   DeflateChoices* result) {
  size_t headerpos = 0;
  size_t lenpos = 0;
  size_t distpos = 0;
  size_t lenextrapos = 0;
  size_t distextrapos = 0;

  if (headerpos >= encoded.headers.size()) return FAILURE;
  int level = encoded.headers[headerpos++];
  if (level < 1 || level > 10) return FAILURE;
  uint32_t numblocks;
  if (!Read32Bit(encoded.headers.data(), encoded.headers.size(),
      &headerpos, &numblocks)) {
    return FAILURE;
  }
  if (numblocks == 0) return true;
  Predictor predictor(data, size);
  predictor.SetZlibLevel(level);

  for (uint32_t ib = 0; ib < numblocks; ib++) {
    bool bfinal = (ib + 1) == numblocks;
    result->blocks.resize(result->blocks.size() + 1);
    BlockChoices* block = &result->blocks.back();
    if (headerpos >= encoded.headers.size()) return FAILURE;
    block->type = encoded.headers[headerpos++];
    if (!Read32Bit(encoded.headers.data(), encoded.headers.size(),
        &headerpos, &block->blocksize)) {
      return FAILURE;
    }
    if (headerpos >= encoded.headers.size()) return FAILURE;
    if (bfinal || block->type == 0) {
      block->filler_bits = encoded.headers[headerpos++];
    }

    size_t bitpos = headerpos * 8;
    if (block->type == 2) {
      if (!DeflateDecodeHuffmanHeader(encoded.headers.data(),
          encoded.headers.size(), &bitpos, block)) {
        return FAILURE;
      }
    }
    headerpos = (bitpos + 7) / 8;
    if (block->type == 0) {
      predictor.SkipBytes(block->blocksize);
      continue;
    }

    for (int ie = 0; ie < block->blocksize; ie++) {
      int pred_len, pred_dist, longest_possible;
      if (!predictor.PredictLength(
          &pred_len, &pred_dist, &longest_possible)) {
        return FAILURE;
      }

      int actual_len = 1;
      int encoded_len = 1;
      if (longest_possible >= kMinDeflateLength) {
        if (lenpos >= encoded.lencodes.size()) return FAILURE;

        encoded_len = encoded.lencodes[lenpos++];

        if (encoded_len == 255) {
          if (lenextrapos >= encoded.lenextra.size()) return FAILURE;
          actual_len = (encoded.lenextra[lenextrapos++] + kMinDeflateLength);
        } else if (encoded_len == 0) {
          actual_len = pred_len;
        } else if (encoded_len == 1) {
          actual_len = 1;
        } else if (encoded_len >= pred_len) {
          actual_len = encoded_len;
        } else {
          actual_len = pred_len - encoded_len + 1;
        }

        if (actual_len < 0 || actual_len > kMaxDeflateLength) {
          return FAILURE;
        }
      }

      int actual_dist = 0;
      if (actual_len > 1) {
        if (distpos >= encoded.distcodes.size()) return FAILURE;
        int encoded_dist = encoded.distcodes[distpos++];

        if (encoded_dist < 0 || encoded_dist > 255) {
          return FAILURE;
        }
        if (pred_len != actual_len) {
          predictor.PredictDist(actual_len, &pred_dist, 0);
        }

        if (encoded_dist == kMaxDistTries) {
          if (distextrapos + 2 > encoded.distextra.size()) {
            return FAILURE;
          }
          actual_dist = encoded.distextra[distextrapos] +
              (encoded.distextra[distextrapos + 1] << 8) + 1;
          distextrapos += 2;
        } else {
          for (int tries = 0; tries < encoded_dist; tries++) {
            predictor.PredictDist(actual_len, &pred_dist, pred_dist);
          }
          actual_dist = pred_dist;
        }
      }

      if (actual_len == 0) return FAILURE;
      if (actual_len == 1 && actual_dist != 0) return FAILURE;
      if (actual_len > 1 && actual_dist == 0) return FAILURE;

      predictor.Update(actual_len, pred_len);

      block->lengths.push_back(actual_len);
      block->dists.push_back(actual_dist);
    }
  }

  return true;
}

bool EncodeChoices(const DeflateChoices& choices,
                   const uint8_t* data, size_t size,
                   std::vector<uint8_t>* result) {
  ChoicesEncoded encoded;
  if (!EncodeChoices(data, size, choices, &encoded)) return FAILURE;
  *result = Combine(encoded);
  return true;
}

bool DecodeChoices(const uint8_t* data, size_t size,
                   const uint8_t* encoded, size_t encoded_size,
                   DeflateChoices* result) {
  if (encoded_size == 0) return FAILURE;
  ChoicesEncoded choices_encoded;
  if (!Split(encoded, encoded_size, &choices_encoded)) return FAILURE;
  return DecodeChoices(data, size, choices_encoded, result);
}

}  // namespace

bool Grittibanzli(const uint8_t* deflated, size_t size,
                  std::vector<uint8_t>* uncompressed,
                  std::vector<uint8_t>* choices_encoded) {
  if (size > 0xffffffff) {
    // Larger than 32 bit sizes not yet supported
    return FAILURE;
  }

  DeflateChoices choices;
  // deflate *de*code is for *en*coding for us
  if (!DeflateDecode(deflated, size, uncompressed, &choices)) {
    return FAILURE;
  }

  // quick verify (not full verification)
  std::vector<uint8_t> test;
  if (!DeflateEncode(uncompressed->data(), uncompressed->size(), choices, &test)
      || test.size() != size || memcmp(test.data(), deflated, size) != 0) {
#ifdef GRITTIBANZLI_CRASH_ON_INTERNAL_ERROR
    std::exit(1);  // for fuzzing, to detect deflate roundtrip mismatch
#endif  // GRITTIBANZLI_CRASH_ON_INTERNAL_ERROR
    return FAILURE;
  }

  if (!EncodeChoices(choices, uncompressed->data(), uncompressed->size(),
      choices_encoded)) {
    return FAILURE;
  }

  return true;
}

bool Ungrittibanzli(const uint8_t* uncompressed, size_t size,
                    const uint8_t* choices_encoded, size_t choices_size,
                    std::vector<uint8_t>* deflated) {
  DeflateChoices choices;
  if (!DecodeChoices(uncompressed, size,
      choices_encoded, choices_size, &choices)) {
    return FAILURE;
  }
  // deflate *en*code is for *de*coding for us
  if (!DeflateEncode(uncompressed, size,
      choices, deflated)) {
    return FAILURE;
  }

  return true;
}

}  // namespace grittibanzli