source upload

2022-01-17 22:16:47 +02:00
parent 12936d065b
commit 098e8c48de
1778 changed files with 1206749 additions and 0 deletions
--- a/contrib/preflate/dllmain.cpp
+++ b/contrib/preflate/dllmain.cpp
@@ -0,0 +1,69 @@
+// dllmain.cpp : Defines the entry point for the DLL application.
+#include "pch.h"
+#include <algorithm>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string>
+#include <string.h>
+#include <cstdint>
+#include <iterator>
+#include <vector>
+
+#include "preflate_decoder.h"
+#include "preflate_reencoder.h"
+
+BOOL APIENTRY DllMain( HMODULE hModule,
+                       DWORD  ul_reason_for_call,
+                       LPVOID lpReserved
+                     )
+{
+    switch (ul_reason_for_call)
+    {
+    case DLL_PROCESS_ATTACH:
+    case DLL_THREAD_ATTACH: 
+    case DLL_THREAD_DETACH:
+    case DLL_PROCESS_DETACH:
+        break;
+    }
+    return TRUE;
+}
+
+extern "C" __declspec(dllexport) bool decode(const unsigned char* src,
+	int srcSize, unsigned char* dst1, int* dst1Capacity, unsigned char* dst2,
+	int* dst2Capacity) {
+	std::vector<unsigned char>deflate_raw(srcSize);
+	std::vector<unsigned char>unpacked_output;
+	std::vector<unsigned char>preflate_diff;
+	memcpy(deflate_raw.data(), src, srcSize);
+	if ((preflate_decode(unpacked_output, preflate_diff, deflate_raw, *dst2Capacity) == true) && (unpacked_output.size() <= *dst1Capacity)
+		&& (preflate_diff.size() <= *dst2Capacity)) {
+		*dst1Capacity = unpacked_output.size();
+		memcpy(dst1, unpacked_output.data(), unpacked_output.size());
+		*dst2Capacity = preflate_diff.size();
+		memcpy(dst2, preflate_diff.data(), preflate_diff.size());
+		return true;
+	}
+	else {
+		return false;
+	}
+}
+
+extern "C" __declspec(dllexport) bool reencode(const unsigned char* src1,
+	int src1Size, const unsigned char* src2,
+	int src2Size, unsigned char* dst, int* dstCapacity) {
+	std::vector<unsigned char>unpacked_input(src1Size);
+	std::vector<unsigned char>preflate_diff(src2Size);
+	std::vector<unsigned char>deflate_raw;
+	memcpy(unpacked_input.data(), src1, src1Size);
+	memcpy(preflate_diff.data(), src2, src2Size);
+	if ((preflate_reencode(deflate_raw, preflate_diff, unpacked_input) == true) && (deflate_raw.size() <= *dstCapacity)) {
+		*dstCapacity = deflate_raw.size();
+		memcpy(dst, deflate_raw.data(), deflate_raw.size());
+		return true;
+	}
+	else {
+		return false;
+	}
+}
+
+
--- a/contrib/preflate/framework.h
+++ b/contrib/preflate/framework.h
@@ -0,0 +1,5 @@
+#pragma once
+
+#define WIN32_LEAN_AND_MEAN             // Exclude rarely-used stuff from Windows headers
+// Windows Header Files
+#include <windows.h>
--- a/contrib/preflate/pch.cpp
+++ b/contrib/preflate/pch.cpp
@@ -0,0 +1,5 @@
+// pch.cpp: source file corresponding to the pre-compiled header
+
+#include "pch.h"
+
+// When you are using pre-compiled headers, this source file is necessary for compilation to succeed.
--- a/contrib/preflate/pch.h
+++ b/contrib/preflate/pch.h
@@ -0,0 +1,13 @@
+// pch.h: This is a precompiled header file.
+// Files listed below are compiled only once, improving build performance for future builds.
+// This also affects IntelliSense performance, including code completion and many code browsing features.
+// However, files listed here are ALL re-compiled if any one of them is updated between builds.
+// Do not add files here that you will be updating frequently as this negates the performance advantage.
+
+#ifndef PCH_H
+#define PCH_H
+
+// add headers that you want to pre-compile here
+#include "framework.h"
+
+#endif //PCH_H
--- a/contrib/preflate/preflate.h
+++ b/contrib/preflate/preflate.h
@@ -0,0 +1,21 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef PREFLATE_H
+#define PREFLATE_H
+
+#include "preflate_decoder.h"
+#include "preflate_reencoder.h"
+
+#endif /* PREFLATE_H */
--- a/contrib/preflate/preflate_block_decoder.cpp
+++ b/contrib/preflate/preflate_block_decoder.cpp
@@ -0,0 +1,192 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+#include "pch.h"
+#include <algorithm>
+#include <string.h>
+#include "preflate_block_decoder.h"
+#include "preflate_block_trees.h"
+#include "support/bit_helper.h"
+
+PreflateBlockDecoder::PreflateBlockDecoder(
+    BitInputStream& input,
+    OutputCacheStream& output) 
+  : _input(input)
+  , _output(output)
+  , _errorCode(OK)
+  , _dynamicLitLenDecoder(nullptr, 0, false, 0)
+  , _dynamicDistDecoder(nullptr, 0, false, 0) {
+}
+
+bool PreflateBlockDecoder::_error(const ErrorCode code) {
+  _errorCode = code;
+  return false;
+}
+
+bool PreflateBlockDecoder::readBlock(PreflateTokenBlock &block, bool &last) {
+  block.uncompressedStartPos = _output.cacheEndPos();
+  int32_t earliest_reference = INT32_MAX, curPos = 0;
+
+  if (_input.eof()) {
+    return false;
+  }
+
+  last = _readBit() != 0;
+  unsigned char mode = _readBits(2);
+  switch (mode) {
+  default:
+    return false;
+  case 0: {
+    block.type = PreflateTokenBlock::STORED;
+    block.paddingBitCount = (_input.bitPos()) & 7;
+    block.paddingBits = _input.get(block.paddingBitCount);
+    size_t len = _readBits(16);
+    size_t ilen = _readBits(16);
+    if ((len ^ ilen) != 0xffff) {
+      return _error(STORED_BLOCK_LEN_MISMATCH);
+    }
+    block.uncompressedLen = len;
+    block.contextLen = 0;
+    return _input.copyBytesTo(_output, len) == len;
+  }
+  case 1:
+  case 2:
+    if (mode == 1) {
+      block.type = PreflateTokenBlock::STATIC_HUFF;
+      _setupStaticTables();
+    } else {
+      block.type = PreflateTokenBlock::DYNAMIC_HUFF;
+      if (!_readDynamicTables(block)) {
+        return false;
+      }
+    }
+    while (true) {
+      if (_input.eof()) {
+        return false;
+      }
+      unsigned litLen = _litLenDecoder->decode(_input);
+      if (litLen < 256) {
+        _writeLiteral(litLen);
+        block.tokens.push_back(PreflateToken(PreflateToken::LITERAL));
+        curPos++;
+      } else if (litLen == 256) {
+        block.uncompressedLen = _output.cacheEndPos() - block.uncompressedStartPos;
+        block.contextLen = -earliest_reference;
+        return true;
+      } else {
+        unsigned lcode = litLen - PreflateConstants::NONLEN_CODE_COUNT;
+        if (lcode >= PreflateConstants::LEN_CODE_COUNT) {
+          return false;
+        }
+        unsigned len = PreflateConstants::MIN_MATCH
+          + PreflateConstants::lengthBaseTable[lcode]
+          + _readBits(PreflateConstants::lengthExtraTable[lcode]);
+        bool irregular258 = len == 258 && lcode != PreflateConstants::LEN_CODE_COUNT - 1;
+        unsigned dcode = _distDecoder->decode(_input);
+        if (dcode >= PreflateConstants::DIST_CODE_COUNT) {
+          return false;
+        }
+        unsigned dist = 1
+          + PreflateConstants::distBaseTable[dcode]
+          + _readBits(PreflateConstants::distExtraTable[dcode]);
+        if (dist > _output.cacheEndPos()) {
+          return false;
+        }
+        _writeReference(dist, len);
+        block.tokens.push_back(PreflateToken(PreflateToken::REFERENCE, len, dist, irregular258));
+        earliest_reference = min(earliest_reference, curPos - (int32_t)dist);
+        curPos += len;
+      }
+    }
+  }
+}
+
+void PreflateBlockDecoder::_setupStaticTables() {
+  _litLenDecoder = PreflateBlockTrees::staticLitLenTreeDecoder();
+  _distDecoder = PreflateBlockTrees::staticDistTreeDecoder();
+}
+
+bool PreflateBlockDecoder::_readDynamicTables(PreflateTokenBlock& block) {
+  block.nlen = PreflateConstants::NONLEN_CODE_COUNT + _readBits(5);
+  block.ndist = 1 + _readBits(5);
+  block.ncode = 4 + _readBits(4);
+  if (block.nlen > PreflateConstants::LITLEN_CODE_COUNT || block.ndist > PreflateConstants::DIST_CODE_COUNT) {
+    return false;
+  }
+  block.treecodes.clear();
+  block.treecodes.reserve(block.nlen + block.ndist + block.ncode);
+
+  unsigned char tcBitLengths[PreflateConstants::CODETREE_CODE_COUNT];
+  unsigned char ldBitLengths[PreflateConstants::LITLENDIST_CODE_COUNT];
+  memset(tcBitLengths, 0, sizeof(tcBitLengths));
+  memset(ldBitLengths, 0, sizeof(ldBitLengths));
+  for (unsigned i = 0, n = block.ncode; i < n; ++i) {
+    unsigned char tc = _readBits(3);
+    block.treecodes.push_back(tc);
+    tcBitLengths[PreflateConstants::treeCodeOrderTable[i]] = tc;
+  }
+  HuffmanDecoder tcTree(tcBitLengths, PreflateConstants::CODETREE_CODE_COUNT, true, 7);
+  if (tcTree.error()) {
+    return false;
+  }
+  for (unsigned i = 0, n = block.nlen + block.ndist; i < n; ++i) {
+    unsigned char code = tcTree.decode(_input);
+    if (code > 18) {
+      return false;
+    }
+    block.treecodes.push_back(code);
+    if (code < 16) {
+      ldBitLengths[i] = code;
+      continue;
+    }
+    unsigned char len = 0, tocopy = 0;
+    switch (code) {
+    case 16:
+      if (i == 0) {
+        return false;
+      }
+      tocopy = ldBitLengths[i - 1];
+      len = 3 + _readBits(2);
+      break;
+    case 17:
+      tocopy = 0;
+      len = 3 + _readBits(3);
+      break;
+    case 18:
+      tocopy = 0;
+      len = 11 + _readBits(7);
+      break;
+    }
+    if (i + len > n) {
+      return false;
+    }
+    block.treecodes.push_back(len);
+    memset(ldBitLengths + i, tocopy, len);
+    i += len - 1;
+  }
+  if (!ldBitLengths[256]) {
+    return false;
+  }
+  _dynamicLitLenDecoder = HuffmanDecoder(ldBitLengths, block.nlen, true, 15);
+  if (_dynamicLitLenDecoder.error()) {
+    return false;
+  }
+  _litLenDecoder = &_dynamicLitLenDecoder;
+
+  _dynamicDistDecoder = HuffmanDecoder(ldBitLengths + block.nlen, block.ndist, true, 15);
+  if (_dynamicDistDecoder.error()) {
+    return false;
+  }
+  _distDecoder = &_dynamicDistDecoder;
+  return true;
+}
--- a/contrib/preflate/preflate_block_decoder.h
+++ b/contrib/preflate/preflate_block_decoder.h
@@ -0,0 +1,83 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef PREFLATE_BLOCK_DECODER_H
+#define PREFLATE_BLOCK_DECODER_H
+
+#include "preflate_constants.h"
+#include "preflate_hash_chain.h"
+#include "preflate_input.h"
+#include "preflate_token.h"
+#include "support/bitstream.h"
+#include "support/huffman_decoder.h"
+#include "support/outputcachestream.h"
+
+class PreflateBlockDecoder {
+public:
+  enum ErrorCode {
+    OK,
+    STORED_BLOCK_LEN_MISMATCH,
+    STORED_BLOCK_PADDING_MISMATCH,
+    BADLY_CODED_MAX_LENGTH
+  };
+  PreflateBlockDecoder(BitInputStream& input,
+                       OutputCacheStream& output);
+
+  bool readBlock(PreflateTokenBlock&, bool& last);
+  ErrorCode status() const {
+    return _errorCode;
+  }
+
+private:
+  bool _error(const ErrorCode);
+
+  unsigned char _readBit() {
+    return _input.get(1);
+  }
+  unsigned _readBits(const unsigned bits) {
+    return _input.get(bits);
+  }
+  void _skipToByte() {
+    _input.skipToByte();
+  }
+  bool _checkLastBitsOfByte() {
+    return _input.checkLastBitsOfByteAreZero();
+  }
+  void _writeLiteral(const unsigned char l) {
+    _output.write(&l, 1);
+  }
+  void _writeReference(const size_t dist, const size_t len) {
+    _output.reserve(len);
+    if (len <= dist) {
+      _output.write(_output.cacheEnd() - dist, len);
+    } else {
+      const uint8_t* ptr = _output.cacheEnd() - dist;
+      for (size_t i = 0; i < len; ++i) {
+        _output.write(&ptr[i], 1);
+      }
+    }
+  }
+  void _setupStaticTables();
+  bool _readDynamicTables(PreflateTokenBlock&);
+
+  BitInputStream& _input;
+  OutputCacheStream& _output;
+  ErrorCode _errorCode;
+  const HuffmanDecoder* _litLenDecoder;
+  const HuffmanDecoder* _distDecoder;
+  HuffmanDecoder _dynamicLitLenDecoder;
+  HuffmanDecoder _dynamicDistDecoder;
+};
+
+#endif /* PREFLATE_BLOCK_DECODER_H */
--- a/contrib/preflate/preflate_block_reencoder.cpp
+++ b/contrib/preflate/preflate_block_reencoder.cpp
@@ -0,0 +1,195 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include <algorithm>
+#include <string.h>
+#include "preflate_block_reencoder.h"
+#include "preflate_block_trees.h"
+#include "support/bit_helper.h"
+
+PreflateBlockReencoder::PreflateBlockReencoder(
+    BitOutputStream& bos,
+    const std::vector<unsigned char>& uncompressedData,
+    const size_t uncompressedOffset)
+  : _output(bos)
+  , _uncompressedData(uncompressedData)
+  , _uncompressedDataPos(uncompressedOffset)
+  , _errorCode(OK)
+  , _dynamicLitLenEncoder(nullptr, 0, false)
+  , _dynamicDistEncoder(nullptr, 0, false) {
+}
+
+bool PreflateBlockReencoder::_error(const ErrorCode code) {
+  _errorCode = code;
+  return false;
+}
+
+void PreflateBlockReencoder::_setupStaticTables() {
+  _litLenEncoder = PreflateBlockTrees::staticLitLenTreeEncoder();
+  _distEncoder   = PreflateBlockTrees::staticDistTreeEncoder();
+}
+
+bool PreflateBlockReencoder::_buildAndWriteDynamicTables(const PreflateTokenBlock& block) {
+  if (block.ncode < 4 || block.ncode > PreflateConstants::CODETREE_CODE_COUNT
+      || block.treecodes.size() < (size_t)block.ncode
+      || block.nlen < PreflateConstants::NONLEN_CODE_COUNT
+      || block.nlen > PreflateConstants::LITLEN_CODE_COUNT
+      || block.ndist < 1 || block.ndist > PreflateConstants::DIST_CODE_COUNT) {
+    return _error(TREE_OUT_OF_RANGE);
+  }
+  unsigned char tcBitLengths[PreflateConstants::CODETREE_CODE_COUNT];
+  unsigned char ldBitLengths[PreflateConstants::LITLENDIST_CODE_COUNT];
+  memset(tcBitLengths, 0, sizeof(tcBitLengths));
+  memset(ldBitLengths, 0, sizeof(ldBitLengths));
+
+  for (unsigned i = 0, n = block.ncode; i < n; ++i) {
+    unsigned char tc = block.treecodes[i];
+    _output.put(tc, 3);
+    tcBitLengths[PreflateConstants::treeCodeOrderTable[i]] = tc;
+  }
+  HuffmanEncoder tcTree(tcBitLengths, PreflateConstants::CODETREE_CODE_COUNT, true);
+  if (tcTree.error()) {
+    return _error(BAD_CODE_TREE);
+  }
+  // unpack tree codes
+  unsigned o = 0, maxo = block.nlen + block.ndist;
+  for (auto i = block.treecodes.begin() + block.ncode, e = block.treecodes.end(); i != e; ++i) {
+    unsigned char code = *i;
+    if (code > 18) {
+      return _error(BAD_LD_TREE);
+    }
+    tcTree.encode(_output, code);
+    if (code < 16) {
+      if (o >= maxo) {
+        return _error(BAD_LD_TREE);
+      }
+      ldBitLengths[o++] = code;
+      continue;
+    }
+    if (i + 1 == e) {
+      return _error(BAD_LD_TREE);
+    }
+    if (code == 16 && o == 0) {
+      return _error(BAD_LD_TREE);
+    }
+    unsigned char len = *++i;
+    unsigned char tocopy = code == 16 ? ldBitLengths[o - 1] : 0;
+    static unsigned char repExtraBits[3] = {2, 3, 7};
+    static unsigned char repOffset[3] = {3, 3, 11};
+    _output.put(len - repOffset[code - 16], repExtraBits[code - 16]);
+    if (o + len > maxo) {
+      return _error(BAD_LD_TREE);
+    }
+    memset(ldBitLengths + o, tocopy, len);
+    o += len;
+  }
+  if (o != maxo) {
+    return _error(BAD_LD_TREE);
+  }
+  if (!ldBitLengths[256]) {
+    return _error(BAD_LD_TREE);
+  }
+  _dynamicLitLenEncoder = HuffmanEncoder(ldBitLengths, block.nlen, true);
+  if (_dynamicLitLenEncoder.error()) {
+    return _error(BAD_LD_TREE);
+  }
+  _litLenEncoder = &_dynamicLitLenEncoder;
+
+  _dynamicDistEncoder = HuffmanEncoder(ldBitLengths + block.nlen, block.ndist, true);
+  if (_dynamicDistEncoder.error()) {
+    return _error(BAD_LD_TREE);
+  }
+  _distEncoder = &_dynamicDistEncoder;
+  return true;
+}
+
+bool PreflateBlockReencoder::_writeTokens(const std::vector<PreflateToken>& tokens) {
+  for (size_t i = 0; i < tokens.size(); ++i) {
+    PreflateToken token = tokens[i];
+    if (token.len == 1) {
+      if (_uncompressedDataPos >= _uncompressedData.size()) {
+        return _error(LITERAL_OUT_OF_BOUNDS);
+      }
+      unsigned char literal = _uncompressedData[_uncompressedDataPos++];
+      _litLenEncoder->encode(_output, literal);
+    } else {
+      // handle irregular length of 258
+      if (token.irregular258) {
+        _litLenEncoder->encode(_output, PreflateConstants::LITLEN_CODE_COUNT - 2);
+        _output.put(31, 5);
+      } else {
+        unsigned lencode = PreflateConstants::LCode(token.len);
+        _litLenEncoder->encode(_output, PreflateConstants::NONLEN_CODE_COUNT + lencode);
+        unsigned lenextra = PreflateConstants::lengthExtraTable[lencode];
+        if (lenextra) {
+          _output.put(token.len - PreflateConstants::MIN_MATCH - PreflateConstants::lengthBaseTable[lencode], lenextra);
+        }
+      }
+      unsigned distcode = PreflateConstants::DCode(token.dist);
+      _distEncoder->encode(_output, distcode);
+      unsigned distextra = PreflateConstants::distExtraTable[distcode];
+      if (distextra) {
+        _output.put(token.dist - 1 - PreflateConstants::distBaseTable[distcode], distextra);
+      }
+      _uncompressedDataPos += token.len;
+    }
+  }
+  _litLenEncoder->encode(_output, PreflateConstants::LITERAL_COUNT); // EOB
+  return true;
+}
+
+bool PreflateBlockReencoder::writeBlock(const PreflateTokenBlock& block, bool last) {
+  if (status() != OK) {
+    return false;
+  }
+  _output.put(last, 1); //
+  switch (block.type) {
+  case PreflateTokenBlock::DYNAMIC_HUFF:
+    _output.put(2, 2); //
+    _output.put(block.nlen - PreflateConstants::NONLEN_CODE_COUNT, 5);
+    _output.put(block.ndist - 1, 5);
+    _output.put(block.ncode - 4, 4);
+    if (!_buildAndWriteDynamicTables(block)) {
+      return false;
+    }
+    if (!_writeTokens(block.tokens)) {
+      return false;
+    }
+    break;
+  case PreflateTokenBlock::STATIC_HUFF:
+    _output.put(1, 2); //
+    _setupStaticTables();
+    if (!_writeTokens(block.tokens)) {
+      return false;
+    }
+    break;
+  case PreflateTokenBlock::STORED:
+    _output.put(0, 2); //
+    _output.put(block.paddingBits, block.paddingBitCount);
+    _output.fillByte();
+    _output.put(block.uncompressedLen, 16); //
+    _output.put(~block.uncompressedLen, 16); //
+    if (_uncompressedDataPos + block.uncompressedLen > _uncompressedData.size()) {
+      return _error(LITERAL_OUT_OF_BOUNDS);
+    }
+    _output.putBytes(_uncompressedData.data() + _uncompressedDataPos, block.uncompressedLen);
+    _uncompressedDataPos += block.uncompressedLen;
+    break;
+  }
+  return true;
+}
+void PreflateBlockReencoder::flush() {
+  _output.flush();
+}
--- a/contrib/preflate/preflate_block_reencoder.h
+++ b/contrib/preflate/preflate_block_reencoder.h
@@ -0,0 +1,77 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef PREFLATE_BLOCK_REENCODER_H
+#define PREFLATE_BLOCK_REENCODER_H
+
+#include "preflate_constants.h"
+#include "preflate_token.h"
+#include "support/bitstream.h"
+#include "support/huffman_encoder.h"
+
+class PreflateBlockReencoder {
+public:
+  enum ErrorCode {
+    OK,
+    LITERAL_OUT_OF_BOUNDS,
+    TREE_OUT_OF_RANGE,
+    BAD_CODE_TREE,
+    BAD_LD_TREE,
+  };
+  /*  enum {
+    BUFSIZE = 1024
+  };
+
+  std::vector<unsigned char> output;
+  unsigned char buffer[BUFSIZE];
+  unsigned bufferpos;
+  unsigned bitbuffer;
+  unsigned bitbuffersize;*/
+
+/*  unsigned short litLenDistCodeStorage[PreflateConstants::LD_CODES];
+  unsigned short treeCodeStorage[PreflateConstants::BL_CODES];
+  unsigned char litLenDistBitStorage[PreflateConstants::LD_CODES];
+  unsigned char treeBitStorage[PreflateConstants::BL_CODES];
+  const unsigned short *litLenCode, *distCode, *treeCode;
+  const unsigned char *litLenBits, *distBits, *treeBits;*/
+
+  PreflateBlockReencoder(BitOutputStream& bos, 
+                         const std::vector<unsigned char>& uncompressedData,
+                         const size_t uncompressedOffset);
+  bool writeBlock(const PreflateTokenBlock&, const bool last);
+  void flush();
+
+  ErrorCode status() const {
+    return _errorCode;
+  }
+
+private:
+  bool _error(const ErrorCode);
+
+  void _setupStaticTables();
+  bool _buildAndWriteDynamicTables(const PreflateTokenBlock&);
+  bool _writeTokens(const std::vector<PreflateToken>& tokens);
+
+  BitOutputStream& _output;
+  const std::vector<unsigned char>& _uncompressedData;
+  size_t _uncompressedDataPos;
+  ErrorCode _errorCode;
+
+  const HuffmanEncoder* _litLenEncoder;
+  const HuffmanEncoder* _distEncoder;
+  HuffmanEncoder _dynamicLitLenEncoder;
+  HuffmanEncoder _dynamicDistEncoder;
+};
+
+#endif /* PREFLATE_BLOCK_REENCODER_H */
--- a/contrib/preflate/preflate_block_trees.cpp
+++ b/contrib/preflate/preflate_block_trees.cpp
@@ -0,0 +1,66 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include <algorithm>
+#include "preflate_block_trees.h"
+#include "support/bit_helper.h"
+
+static HuffmanDecoder* staticLitLenDecoder;
+static HuffmanDecoder* staticDistDecoder;
+static HuffmanEncoder* staticLitLenEncoder;
+static HuffmanEncoder* staticDistEncoder;
+
+static void setLitLenBitLengths(unsigned char(&a)[288]) {
+  std::fill(a +   0, a + 144, 8);
+  std::fill(a + 144, a + 256, 9);
+  std::fill(a + 256, a + 280, 7);
+  std::fill(a + 280, a + 288, 8);
+}
+static void setDistBitLengths(unsigned char(&a)[32]) {
+  std::fill(a, a + 32, 5);
+}
+
+const HuffmanDecoder* PreflateBlockTrees::staticLitLenTreeDecoder() {
+  if (!staticLitLenDecoder) {
+    unsigned char l_lengths[288];
+    setLitLenBitLengths(l_lengths);
+    staticLitLenDecoder = new HuffmanDecoder(l_lengths, 288, true, 15);
+  }
+  return staticLitLenDecoder;
+}
+const HuffmanDecoder* PreflateBlockTrees::staticDistTreeDecoder() {
+  if (!staticDistDecoder) {
+    unsigned char d_lengths[32];
+    setDistBitLengths(d_lengths);
+    staticDistDecoder = new HuffmanDecoder(d_lengths, 32, true, 15);
+  }
+  return staticDistDecoder;
+}
+const HuffmanEncoder* PreflateBlockTrees::staticLitLenTreeEncoder() {
+  if (!staticLitLenEncoder) {
+    unsigned char l_lengths[288];
+    setLitLenBitLengths(l_lengths);
+    staticLitLenEncoder = new HuffmanEncoder(l_lengths, 288, true);
+  }
+  return staticLitLenEncoder;
+}
+const HuffmanEncoder* PreflateBlockTrees::staticDistTreeEncoder() {
+  if (!staticDistEncoder) {
+    unsigned char d_lengths[32];
+    setDistBitLengths(d_lengths);
+    staticDistEncoder = new HuffmanEncoder(d_lengths, 32, true);
+  }
+  return staticDistEncoder;
+}
--- a/contrib/preflate/preflate_block_trees.h
+++ b/contrib/preflate/preflate_block_trees.h
@@ -0,0 +1,32 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef PREFLATE_BLOCK_TREES_H
+#define PREFLATE_BLOCK_TREES_H
+
+#include "preflate_constants.h"
+#include "preflate_hash_chain.h"
+#include "preflate_input.h"
+#include "preflate_token.h"
+#include "support/huffman_decoder.h"
+#include "support/huffman_encoder.h"
+
+struct PreflateBlockTrees {
+  static const HuffmanDecoder* staticLitLenTreeDecoder();
+  static const HuffmanDecoder* staticDistTreeDecoder();
+  static const HuffmanEncoder* staticLitLenTreeEncoder();
+  static const HuffmanEncoder* staticDistTreeEncoder();
+};
+
+#endif /* PREFLATE_BLOCK_TREES_H */
--- a/contrib/preflate/preflate_checker.cpp
+++ b/contrib/preflate/preflate_checker.cpp
@@ -0,0 +1,289 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include <stdio.h>
+#include <string.h>
+#include "preflate_block_decoder.h"
+#include "preflate_block_reencoder.h"
+#include "preflate_checker.h"
+#include "preflate_parameter_estimator.h"
+#include "preflate_statistical_model.h"
+#include "preflate_token_predictor.h"
+#include "preflate_tree_predictor.h"
+#include "support/bitstream.h"
+#include "support/memstream.h"
+#include "support/outputcachestream.h"
+
+#include <algorithm>
+#include <chrono>
+
+bool preflate_checker(const std::vector<unsigned char>& deflate_raw) {
+  printf("Checking raw deflate file of size %d\n", (int)deflate_raw.size());
+
+  MemStream decIn(deflate_raw);
+  MemStream decUnc;
+  BitInputStream decInBits(decIn);
+  OutputCacheStream decOutCache(decUnc);
+  std::vector<PreflateTokenBlock> blocks;
+
+  auto ts_start = std::chrono::steady_clock::now();
+  PreflateBlockDecoder bdec(decInBits, decOutCache);
+  if (bdec.status() != PreflateBlockDecoder::OK) {
+    return false;
+  }
+  bool last;
+  unsigned i = 0;
+  do {
+    PreflateTokenBlock newBlock;
+    bool ok = bdec.readBlock(newBlock, last);
+    if (!ok) {
+      printf("inflating error (preflate)\n");
+      return false;
+    }
+    blocks.push_back(newBlock);
+    ++i;
+  } while (!last);
+  uint8_t remaining_bit_count = (8 - decInBits.bitPos()) & 7;
+  uint8_t remaining_bits = decInBits.get(remaining_bit_count);
+  decOutCache.flush();
+  std::vector<unsigned char> unpacked_output = decUnc.extractData();
+  auto ts_end = std::chrono::steady_clock::now();
+  printf("Unpacked data has size %d\n", (int)unpacked_output.size());
+  printf("Unpacking took %g seconds\n", std::chrono::duration<double>(ts_end - ts_start).count());
+
+  // Encode
+  PreflateParameters paramsE = estimatePreflateParameters(unpacked_output, 0, blocks);
+  printf("prediction parameters: w %d, c %d, m %d, zlib %d, farL3M %d, very far M %d, M2S %d, log2CD %d\n",
+         paramsE.windowBits, paramsE.compLevel, paramsE.memLevel,
+         paramsE.zlibCompatible, paramsE.farLen3MatchesDetected,
+         paramsE.veryFarMatchesDetected, paramsE.matchesToStartDetected,
+         paramsE.log2OfMaxChainDepthM1);
+
+
+  ts_start = std::chrono::steady_clock::now();
+  PreflateStatisticsCounter counterE;
+  memset(&counterE, 0, sizeof(counterE));
+  PreflateTokenPredictor tokenPredictorE(paramsE, unpacked_output, 0);
+  PreflateTreePredictor treePredictorE(unpacked_output, 0);
+  for (unsigned i = 0, n = blocks.size(); i < n; ++i) {
+    tokenPredictorE.analyzeBlock(i, blocks[i]);
+    if (tokenPredictorE.predictionFailure) {
+      printf("block %d: compress failed token prediction\n", i);
+      return false;
+    }
+    treePredictorE.analyzeBlock(i, blocks[i]);
+    if (treePredictorE.predictionFailure) {
+      printf("block %d: compress failed tree prediction\n", i);
+      return false;
+    }
+    tokenPredictorE.updateCounters(&counterE, i);
+    treePredictorE.updateCounters(&counterE, i);
+  }
+  counterE.block.incNonZeroPadding(remaining_bits != 0);
+  ts_end = std::chrono::steady_clock::now();
+  printf("Prediction took %g seconds\n", std::chrono::duration<double>(ts_end - ts_start).count());
+
+  counterE.print();
+
+  ts_start = std::chrono::steady_clock::now();
+  PreflateMetaEncoder codecE;
+  if (codecE.error()) {
+    return false;
+  }
+  PreflatePredictionEncoder pcodecE;
+  unsigned modelId = codecE.addModel(counterE, paramsE);
+  if (!codecE.beginMetaBlockWithModel(pcodecE, modelId)) {
+    return false;
+  }
+  for (unsigned i = 0, n = blocks.size(); i < n; ++i) {
+    tokenPredictorE.encodeBlock(&pcodecE, i);
+    if (tokenPredictorE.predictionFailure) {
+      printf("block %d: compress failed token encoding\n", i);
+      return false;
+    }
+    treePredictorE.encodeBlock(&pcodecE, i);
+    if (treePredictorE.predictionFailure) {
+      printf("block %d: compress failed tree encoding\n", i);
+      return false;
+    }
+    tokenPredictorE.encodeEOF(&pcodecE, i, i + 1 == blocks.size());
+  }
+  pcodecE.encodeNonZeroPadding(remaining_bits != 0);
+  if (remaining_bits != 0) {
+    unsigned bitsToSave = bitLength(remaining_bits);
+    pcodecE.encodeValue(bitsToSave, 3);
+    if (bitsToSave > 1) {
+      pcodecE.encodeValue(remaining_bits & ((1 << (bitsToSave - 1)) - 1), bitsToSave - 1);
+    }
+  }
+  if (!codecE.endMetaBlock(pcodecE, unpacked_output.size())) {
+    return false;
+  }
+  std::vector<unsigned char> preflate_diff = codecE.finish();
+  ts_end = std::chrono::steady_clock::now();
+  printf("Prediction diff has size %d\n", (int)preflate_diff.size());
+  printf("Encoding diff took %g seconds\n", std::chrono::duration<double>(ts_end - ts_start).count());
+
+  // Decode
+  ts_start = std::chrono::steady_clock::now();
+  PreflateMetaDecoder codecD(preflate_diff, unpacked_output.size());
+  PreflatePredictionDecoder pcodecD;
+  PreflateParameters paramsD;
+
+  if (codecD.error() || codecD.metaBlockCount() != 1) {
+    return false;
+  }
+  if (!codecD.beginMetaBlock(pcodecD, paramsD, 0)) {
+    return false;
+  }
+
+  PreflateTokenPredictor tokenPredictorD(paramsD, unpacked_output, 0);
+  PreflateTreePredictor treePredictorD(unpacked_output, 0);
+
+  MemStream mem;
+  BitOutputStream bos(mem);
+
+  std::vector<PreflateTokenBlock> dblocks;
+  unsigned blockno = 0;
+  bool eof = true;
+  do {
+    PreflateTokenBlock block = tokenPredictorD.decodeBlock(&pcodecD);
+    if (tokenPredictorD.predictionFailure) {
+      printf("block %d: token uncompress failed\n", blockno);
+      return false;
+    }
+    if (!treePredictorD.decodeBlock(block, &pcodecD)) {
+      printf("block %d: tree uncompress failed\n", blockno);
+      return false;
+    }
+    if (treePredictorD.predictionFailure) {
+      printf("block %d: tree uncompress failed\n", blockno);
+      return false;
+    }
+    eof = tokenPredictorD.decodeEOF(&pcodecD);
+    dblocks.push_back(block);
+    ++blockno;
+  } while (!eof);
+  ts_end = std::chrono::steady_clock::now();
+  printf("Decoding diff and reprediction took %g seconds\n", std::chrono::duration<double>(ts_end - ts_start).count());
+
+  if (paramsD.windowBits != paramsE.windowBits) {
+    printf("parameter decoding failed: windowBits mismatch\n");
+    return false;
+  }
+  if (paramsD.memLevel != paramsE.memLevel) {
+    printf("parameter decoding failed: memLevel mismatch\n");
+    return false;
+  }
+  if (paramsD.compLevel != paramsE.compLevel) {
+    printf("parameter decoding failed: compLevel mismatch\n");
+    return false;
+  }
+  if (paramsD.zlibCompatible != paramsE.zlibCompatible) {
+    printf("parameter decoding failed: zlib compatible flag mismatch\n");
+    return false;
+  }
+  if (!paramsD.zlibCompatible && (0
+                                  //      || paramsD.farLen3MatchesDetected != paramsE.farLen3MatchesDetected
+                                  || paramsD.veryFarMatchesDetected != paramsE.veryFarMatchesDetected
+                                  || paramsD.matchesToStartDetected != paramsE.matchesToStartDetected
+                                  || paramsD.log2OfMaxChainDepthM1 != paramsE.log2OfMaxChainDepthM1)) {
+    printf("parameter decoding failed: non-zlib flag mismatch\n");
+    return false;
+  }
+
+  if (!isEqual(pcodecD, pcodecE)) {
+    printf("decoded model differs from original\n");
+    return false;
+  }
+
+  for (size_t blockno = 0, n = min(blocks.size(), dblocks.size()); blockno < n; ++blockno) {
+    if (dblocks[blockno].type != blocks[blockno].type) {
+      printf("block %zu: type differs: org %d, new %d\n", blockno, blocks[blockno].type, dblocks[blockno].type);
+      return false;
+    }
+    for (unsigned i = 0, n = min(dblocks[blockno].tokens.size(), blocks[blockno].tokens.size()); i < n; ++i) {
+      PreflateToken orgToken = blocks[blockno].tokens[i];
+      PreflateToken newToken = dblocks[blockno].tokens[i];
+      if (newToken.len != orgToken.len || newToken.dist != orgToken.dist) {
+        printf("block %zu: generated token %d differs: org(%d,%d), new(%d,%d)\n",
+               blockno, i, orgToken.len, orgToken.dist, newToken.len, newToken.dist);
+        return false;
+      }
+    }
+    if (dblocks[blockno].tokens.size() != blocks[blockno].tokens.size()) {
+      printf("block %zu: differing token count: org %d, new %d\n",
+             blockno, (int)blocks[blockno].tokens.size(), (int)dblocks[blockno].tokens.size());
+      return false;
+    }
+    if (dblocks[blockno].type == PreflateTokenBlock::DYNAMIC_HUFF) {
+      if (dblocks[blockno].nlen != blocks[blockno].nlen) {
+        printf("block %zu: literal/len count differs: org %d, new %d\n",
+               blockno, blocks[blockno].nlen, dblocks[blockno].nlen);
+        return false;
+      }
+      if (dblocks[blockno].ndist != blocks[blockno].ndist) {
+        printf("block %zu: dist count differs: org %d, new %d\n",
+               blockno, blocks[blockno].ndist, dblocks[blockno].ndist);
+        return false;
+      }
+      if (dblocks[blockno].ncode != blocks[blockno].ncode) {
+        printf("block %zu: tree code count differs: org %d, new %d\n",
+               blockno, blocks[blockno].ncode, dblocks[blockno].ncode);
+        return false;
+      }
+      if (dblocks[blockno].treecodes != blocks[blockno].treecodes) {
+        printf("block %zu: generated tree codes differs\n", blockno);
+        return false;
+      }
+    }
+  }
+
+  ts_start = std::chrono::steady_clock::now();
+  PreflateBlockReencoder deflater(bos, unpacked_output, 0);
+  for (size_t i = 0; i < dblocks.size(); ++i) {
+    deflater.writeBlock(dblocks[i], i + 1 == dblocks.size());
+  }
+  bool non_zero_bits = pcodecD.decodeNonZeroPadding();
+  if (non_zero_bits) {
+    unsigned bitsToLoad = pcodecD.decodeValue(3);
+    unsigned padding = 0;
+    if (bitsToLoad > 0) {
+      padding = (1 << (bitsToLoad - 1)) + pcodecD.decodeValue(bitsToLoad - 1);
+    }
+    bos.put(padding, bitsToLoad);
+  }
+  if (!codecD.endMetaBlock(pcodecD)) {
+    return false;
+  }
+  deflater.flush();
+  std::vector<unsigned char> deflate_raw_out = mem.extractData();
+  ts_end = std::chrono::steady_clock::now();
+  printf("Reencoding deflate stream took %g seconds\n", std::chrono::duration<double>(ts_end - ts_start).count());
+
+  for (unsigned i = 0, n = min(deflate_raw.size(), deflate_raw_out.size()); i < n; ++i) {
+    if (deflate_raw[i] != deflate_raw_out[i]) {
+      printf("created deflate stream differs at offset %d\n", i);
+      return false;
+    }
+  }
+  if (deflate_raw.size() != deflate_raw_out.size()) {
+    printf("created deflate streams differs in size: org %d, new %d\n", 
+           (int)deflate_raw.size(), (int)deflate_raw_out.size());
+    return false;
+  }
+  printf("Success\n");
+  return true;
+}
--- a/contrib/preflate/preflate_checker.h
+++ b/contrib/preflate/preflate_checker.h
@@ -0,0 +1,22 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef PREFLATE_CHECKER_H
+#define PREFLATE_CHECKER_H
+
+#include <vector>
+
+bool preflate_checker(const std::vector<unsigned char>& deflate_raw);
+
+#endif /* PREFLATE_CHECKER_H */
--- a/contrib/preflate/preflate_complevel_estimator.cpp
+++ b/contrib/preflate/preflate_complevel_estimator.cpp
@@ -0,0 +1,222 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include <algorithm>
+#include <string.h>
+#include "preflate_complevel_estimator.h"
+#include "preflate_constants.h"
+
+PreflateCompLevelEstimatorState::PreflateCompLevelEstimatorState(
+    const int wbits,
+    const int mbits,
+    const std::vector<unsigned char>& unpacked_output_,
+    const size_t off0_,
+    const std::vector<PreflateTokenBlock>& blocks_)
+  : slowHash(unpacked_output_, mbits)
+  , fastL1Hash(unpacked_output_, mbits)
+  , fastL2Hash(unpacked_output_, mbits)
+  , fastL3Hash(unpacked_output_, mbits)
+  , blocks(blocks_)
+  , wsize(1 << wbits)
+  , off0(off0_)
+{
+  memset(&info, 0, sizeof(info));
+  info.possibleCompressionLevels = (1 << 10) - (1 << 1);
+  updateHash(off0);
+}
+
+void PreflateCompLevelEstimatorState::updateHash(const unsigned len) {
+  if (info.possibleCompressionLevels & (1 << 1)) {
+    fastL1Hash.updateHash(len);
+  }
+  if (info.possibleCompressionLevels & (1 << 2)) {
+    fastL2Hash.updateHash(len);
+  }
+  if (info.possibleCompressionLevels & (1 << 3)) {
+    fastL3Hash.updateHash(len);
+  }
+  slowHash.updateHash(len);
+}
+void PreflateCompLevelEstimatorState::updateOrSkipSingleFastHash(
+    PreflateHashChainExt& hash,
+    const unsigned len,
+    const PreflateParserConfig& config) {
+  if (len <= config.max_lazy) {
+    hash.updateHash(len);
+  } else {
+    hash.skipHash(len);
+  }
+}
+
+void PreflateCompLevelEstimatorState::updateOrSkipHash(const unsigned len) {
+  if (info.possibleCompressionLevels & (1 << 1)) {
+    updateOrSkipSingleFastHash(fastL1Hash, len, fastPreflateParserSettings[0]);
+  }
+  if (info.possibleCompressionLevels & (1 << 2)) {
+    updateOrSkipSingleFastHash(fastL2Hash, len, fastPreflateParserSettings[1]);
+  }
+  if (info.possibleCompressionLevels & (1 << 3)) {
+    updateOrSkipSingleFastHash(fastL3Hash, len, fastPreflateParserSettings[2]);
+  }
+  slowHash.updateHash(len);
+}
+
+unsigned short PreflateCompLevelEstimatorState::matchDepth(
+  const unsigned hashHead,
+  const PreflateToken& targetReference,
+  const PreflateHashChainExt& hash) {
+  unsigned curPos = hash.input().pos();
+  unsigned curMaxDist = min(curPos, windowSize());
+
+  unsigned startDepth = hash.getNodeDepth(hashHead);
+  PreflateHashIterator chainIt = hash.iterateFromPos(curPos - targetReference.dist, curPos, curMaxDist);
+  if (!chainIt.curPos || targetReference.dist > curMaxDist) {
+    return 0xffffu;
+  }
+  unsigned endDepth = chainIt.depth();
+  return min(startDepth - endDepth, 0xffffu);
+}
+
+
+bool PreflateCompLevelEstimatorState::checkMatchSingleFastHash(
+    const PreflateToken& token,
+    const PreflateHashChainExt& hash, 
+    const PreflateParserConfig& config,
+    const unsigned hashHead) {
+  unsigned mdepth = matchDepth(hash.getHead(hashHead), token, hash);
+  if (mdepth > config.max_chain) {
+    return false;
+  }
+  return true;
+}
+void PreflateCompLevelEstimatorState::checkMatch(const PreflateToken& token) {
+  unsigned hashHead = slowHash.curHash();
+  if (slowHash.input().pos() >= token.dist + off0) {
+    if (info.possibleCompressionLevels & (1 << 1)) {
+      if (!checkMatchSingleFastHash(token, fastL1Hash, fastPreflateParserSettings[0], hashHead)) {
+        info.possibleCompressionLevels &= ~(1 << 1);
+      }
+    }
+    if (info.possibleCompressionLevels & (1 << 2)) {
+      if (!checkMatchSingleFastHash(token, fastL2Hash, fastPreflateParserSettings[1], hashHead)) {
+        info.possibleCompressionLevels &= ~(1 << 2);
+      }
+    }
+    if (info.possibleCompressionLevels & (1 << 3)) {
+      if (!checkMatchSingleFastHash(token, fastL3Hash, fastPreflateParserSettings[2], hashHead)) {
+        info.possibleCompressionLevels &= ~(1 << 3);
+      }
+    }
+  }
+
+  if (slowHash.input().pos() >= token.dist) {
+    info.referenceCount++;
+
+    unsigned short mdepth = matchDepth(slowHash.getHead(hashHead), token, slowHash);
+    if (mdepth >= 0x8001) {
+      info.unfoundReferences++;
+    } else {
+      info.maxChainDepth = max(info.maxChainDepth, mdepth);
+    }
+    if (token.dist == slowHash.input().pos()) {
+      info.matchToStart = true;
+    }
+    if (mdepth == 0) {
+      info.longestDistAtHop0 = max(info.longestDistAtHop0, token.dist);
+    } else {
+      info.longestDistAtHop1Plus = max(info.longestDistAtHop1Plus, token.dist);
+    }
+    if (token.len == 3) {
+      info.longestLen3Dist = max(info.longestLen3Dist, token.dist);
+    }
+    if (info.possibleCompressionLevels & ((1 << 10) - (1 << 4))) {
+      for (unsigned i = 0; i < 6; ++i) {
+        if (!(info.possibleCompressionLevels & (1 << (4 + i)))) {
+          continue;
+        }
+        const PreflateParserConfig& config = slowPreflateParserSettings[i];
+        if (mdepth > config.max_chain) {
+          info.possibleCompressionLevels &= ~(1 << (4 + i));
+        }
+      }
+    }
+  }
+}
+
+void PreflateCompLevelEstimatorState::checkDump(bool early_out) {
+  for (unsigned i = 0, n = blocks.size(); i < n; ++i) {
+    const PreflateTokenBlock& b = blocks[i];
+    if (b.type == PreflateTokenBlock::STORED) {
+      updateHash(b.uncompressedLen);
+      continue;
+    }
+    for (unsigned j = 0, m = b.tokens.size(); j < m; ++j) {
+      const PreflateToken& t = b.tokens[j];
+      if (t.len == 1) {
+        updateHash(1);
+      } else {
+        checkMatch(t);
+        updateOrSkipHash(t.len);
+      }
+      if (early_out && (info.possibleCompressionLevels & (info.possibleCompressionLevels - 1)) == 0) {
+        return;
+      }
+    }
+  }
+}
+void PreflateCompLevelEstimatorState::recommend() {
+  info.recommendedCompressionLevel = 9;
+  info.veryFarMatches = !(info.longestDistAtHop0 <= windowSize() - PreflateConstants::MIN_LOOKAHEAD
+                          && info.longestDistAtHop1Plus < windowSize() - PreflateConstants::MIN_LOOKAHEAD);
+  info.farLen3Matches = info.longestLen3Dist > 4096;
+
+  info.zlibCompatible = info.possibleCompressionLevels > 1
+                        && !info.matchToStart
+                        && !info.veryFarMatches
+                        && (!info.farLen3Matches || (info.possibleCompressionLevels & 0xe) != 0);
+  if (info.unfoundReferences) {
+    return;
+  }
+
+  if (info.zlibCompatible && info.possibleCompressionLevels > 1) {
+    unsigned l = info.possibleCompressionLevels >> 1;
+    info.recommendedCompressionLevel = 1;
+    while ((l & 1) == 0) {
+      info.recommendedCompressionLevel++;
+      l >>= 1;
+    }
+    return;
+  }
+  for (int i = 0; i < 6; ++i) {
+    const PreflateParserConfig& config = slowPreflateParserSettings[i];
+    if (info.maxChainDepth <= config.max_chain) {
+      info.recommendedCompressionLevel = 4 + i;
+      return;
+    }
+  }
+}
+
+PreflateCompLevelInfo estimatePreflateCompLevel(
+    const int wbits, 
+    const int mbits,
+    const std::vector<unsigned char>& unpacked_output,
+    const size_t off0,
+    const std::vector<PreflateTokenBlock>& blocks,
+    const bool early_out) {
+  PreflateCompLevelEstimatorState state(wbits, mbits, unpacked_output, off0, blocks);
+  state.checkDump(early_out);
+  state.recommend();
+  return state.info;
+}
--- a/contrib/preflate/preflate_complevel_estimator.h
+++ b/contrib/preflate/preflate_complevel_estimator.h
@@ -0,0 +1,76 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef PREFLATE_COMPLEVEL_ESTIMATOR_H
+#define PREFLATE_COMPLEVEL_ESTIMATOR_H
+
+#include "preflate_predictor_state.h"
+#include "preflate_token.h"
+
+struct PreflateCompLevelInfo {
+  unsigned possibleCompressionLevels;
+  unsigned recommendedCompressionLevel;
+  bool zlibCompatible;
+
+  unsigned referenceCount;
+  unsigned unfoundReferences;
+  unsigned short maxChainDepth;
+  unsigned short longestLen3Dist;
+  unsigned short longestDistAtHop0;
+  unsigned short longestDistAtHop1Plus;
+  bool matchToStart;
+  bool veryFarMatches;
+  bool farLen3Matches;
+};
+
+struct PreflateCompLevelEstimatorState {
+  PreflateHashChainExt slowHash;
+  PreflateHashChainExt fastL1Hash;
+  PreflateHashChainExt fastL2Hash;
+  PreflateHashChainExt fastL3Hash;
+  const std::vector<PreflateTokenBlock>& blocks;
+  PreflateCompLevelInfo info;
+  uint16_t wsize;
+  size_t off0;
+
+  PreflateCompLevelEstimatorState(const int wbits, const int mbits,
+                                  const std::vector<unsigned char>& unpacked_output,
+                                  const size_t off0,
+                                  const std::vector<PreflateTokenBlock>& blocks);
+  void updateHash(const unsigned len);
+  void updateOrSkipHash(const unsigned len);
+  void checkMatch(const PreflateToken& token);
+  void checkDump(bool early_out);
+  void recommend();
+
+private:
+  void updateOrSkipSingleFastHash(PreflateHashChainExt&, const unsigned len, const PreflateParserConfig&);
+  bool checkMatchSingleFastHash(const PreflateToken& token, const PreflateHashChainExt&, const PreflateParserConfig&,
+                                const unsigned hashHead);
+  uint16_t matchDepth(const unsigned hashHead, const PreflateToken& targetReference,
+                      const PreflateHashChainExt& hash);
+  unsigned windowSize() const {
+    return wsize;
+  }
+};
+
+PreflateCompLevelInfo estimatePreflateCompLevel(
+    const int wbits, 
+    const int mbits,
+    const std::vector<unsigned char>& unpacked_output,
+    const size_t off0,
+    const std::vector<PreflateTokenBlock>& blocks,
+    const bool early_out);
+
+#endif /* PREFLATE_COMPLEVEL_ESTIMATOR_H */
--- a/contrib/preflate/preflate_constants.cpp
+++ b/contrib/preflate/preflate_constants.cpp
@@ -0,0 +1,88 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include <algorithm>
+#include "preflate_block_reencoder.h"
+#include "support/bit_helper.h"
+
+/* tables taken from zlib */
+
+const unsigned char PreflateConstants::distCodeTable[512] = {
+  0,  1,  2,  3,  4,  4,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,  8,  8,  8,  8,
+  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,  0,  0, 16, 17,
+  18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22,
+  23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+  26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+  26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
+  27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+  27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+  29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+  29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+  29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29
+};
+
+const unsigned char PreflateConstants::lengthCodeTable[MAX_MATCH - MIN_MATCH + 1 ] = {
+  0,  1,  2,  3,  4,  5,  6,  7,  8,  8,  9,  9, 10, 10, 11, 11, 12, 12, 12, 12,
+  13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
+  17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
+  19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
+  22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
+  23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+  25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
+  26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+  26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+  27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28
+};
+
+const unsigned char PreflateConstants::lengthBaseTable[LEN_CODE_COUNT] = {
+  0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
+  64, 80, 96, 112, 128, 160, 192, 224, 255
+};
+
+const unsigned short PreflateConstants::distBaseTable[DIST_CODE_COUNT] = {
+  0,     1,     2,     3,     4,     6,     8,    12,    16,    24,
+  32,    48,    64,    96,   128,   192,   256,   384,   512,   768,
+  1024,  1536,  2048,  3072,  4096,  6144,  8192, 12288, 16384, 24576
+};
+
+const unsigned char PreflateConstants::lengthExtraTable[LEN_CODE_COUNT] = { /* extra bits for each length code */
+  0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0
+};
+
+const unsigned char PreflateConstants::distExtraTable[DIST_CODE_COUNT] = { /* extra bits for each distance code */
+  0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13
+};
+
+const unsigned char PreflateConstants::treeCodeOrderTable[CODETREE_CODE_COUNT] = {
+  16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15
+};
--- a/contrib/preflate/preflate_constants.h
+++ b/contrib/preflate/preflate_constants.h
@@ -0,0 +1,53 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef PREFLATE_CONSTANTS_H
+#define PREFLATE_CONSTANTS_H
+
+struct PreflateConstants {
+  enum {
+    LITERAL_COUNT = 256,
+    NONLEN_CODE_COUNT = LITERAL_COUNT + 1, // EOB
+    LEN_CODE_COUNT = 29,
+    LITLEN_CODE_COUNT = NONLEN_CODE_COUNT + LEN_CODE_COUNT,
+    DIST_CODE_COUNT = 30,
+    LITLENDIST_CODE_COUNT = LITLEN_CODE_COUNT + DIST_CODE_COUNT,
+    CODETREE_CODE_COUNT = 19,
+
+    MIN_MATCH = 3,
+    MAX_MATCH = 258,
+
+    MAX_BITS = 15,
+
+    MIN_LOOKAHEAD = MAX_MATCH + MIN_MATCH + 1,
+  };
+
+  static const unsigned char distCodeTable[512];
+  static const unsigned char lengthCodeTable[MAX_MATCH - MIN_MATCH + 1];
+  static const unsigned char lengthBaseTable[LEN_CODE_COUNT];
+  static const unsigned short distBaseTable[DIST_CODE_COUNT];
+
+  static const unsigned char lengthExtraTable[LEN_CODE_COUNT];
+  static const unsigned char distExtraTable[DIST_CODE_COUNT];
+  static const unsigned char treeCodeOrderTable[CODETREE_CODE_COUNT];
+
+  static inline unsigned DCode(const unsigned dist) {
+    return distCodeTable[dist <= 256 ? dist - 1 : 256 + ((dist - 1) >> 7)];
+  }
+  static inline unsigned LCode(const unsigned len) {
+    return lengthCodeTable[len - MIN_MATCH];
+  }
+};
+
+#endif /* PREFLATE_CONSTANTS_H */
--- a/contrib/preflate/preflate_decoder.cpp
+++ b/contrib/preflate/preflate_decoder.cpp
@@ -0,0 +1,278 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include <string.h>
+#include <functional>
+#include "preflate_block_decoder.h"
+#include "preflate_decoder.h"
+#include "preflate_parameter_estimator.h"
+#include "preflate_statistical_model.h"
+#include "preflate_token_predictor.h"
+#include "preflate_tree_predictor.h"
+#include "support/bitstream.h"
+#include "support/memstream.h"
+#include "support/outputcachestream.h"
+
+class PreflateDecoderHandler : public PreflateDecoderTask::Handler {
+public:
+  PreflateDecoderHandler(std::function<void(void)> progressCallback_)
+    : progressCallback(progressCallback_) {}
+
+  bool finish(std::vector<uint8_t>& reconstructionData) {
+    reconstructionData = encoder.finish();
+    return !encoder.error();
+  }
+  bool error() const {
+    return encoder.error();
+  }
+
+  virtual uint32_t setModel(const PreflateStatisticsCounter& counters, const PreflateParameters& parameters) {
+    return encoder.addModel(counters, parameters);
+  }
+  virtual bool beginEncoding(const uint32_t metaBlockId, PreflatePredictionEncoder& codec, const uint32_t modelId) {
+    return encoder.beginMetaBlockWithModel(codec, modelId);
+  }
+  virtual bool endEncoding(const uint32_t metaBlockId, PreflatePredictionEncoder& codec, const size_t uncompressedSize) {
+    return encoder.endMetaBlock(codec, uncompressedSize);
+  }
+  virtual void markProgress() {
+    std::unique_lock<std::mutex> lock(this->_mutex);
+    progressCallback();
+  }
+
+private:
+  PreflateMetaEncoder encoder;
+  std::function<void(void)> progressCallback;
+  std::mutex _mutex;
+};
+
+PreflateDecoderTask::PreflateDecoderTask(PreflateDecoderTask::Handler& handler_,
+                                         const uint32_t metaBlockId_,
+                                         std::vector<PreflateTokenBlock>&& tokenData_,
+                                         std::vector<uint8_t>&& uncompressedData_,
+                                         const size_t uncompressedOffset_,
+                                         const bool lastMetaBlock_,
+                                         const uint32_t paddingBits_)
+  : handler(handler_)
+  , metaBlockId(metaBlockId_)
+  , tokenData(tokenData_)
+  , uncompressedData(uncompressedData_)
+  , uncompressedOffset(uncompressedOffset_)
+  , lastMetaBlock(lastMetaBlock_)
+  , paddingBits(paddingBits_) {
+}
+
+bool PreflateDecoderTask::analyze() {
+  params = estimatePreflateParameters(uncompressedData, uncompressedOffset, tokenData);
+  memset(&counter, 0, sizeof(counter));
+  tokenPredictor.reset(new PreflateTokenPredictor(params, uncompressedData, uncompressedOffset));
+  treePredictor.reset(new PreflateTreePredictor(uncompressedData, uncompressedOffset));
+  for (unsigned i = 0, n = tokenData.size(); i < n; ++i) {
+    tokenPredictor->analyzeBlock(i, tokenData[i]);
+    treePredictor->analyzeBlock(i, tokenData[i]);
+    if (tokenPredictor->predictionFailure || treePredictor->predictionFailure) {
+      return false;
+    }
+    tokenPredictor->updateCounters(&counter, i);
+    treePredictor->updateCounters(&counter, i);
+    handler.markProgress();
+  }
+  counter.block.incNonZeroPadding(paddingBits != 0);
+  return true;
+}
+
+bool PreflateDecoderTask::encode() {
+  PreflatePredictionEncoder pcodec;
+  unsigned modelId = handler.setModel(counter, params);
+  if (!handler.beginEncoding(metaBlockId, pcodec, modelId)) {
+    return false;
+  }
+  for (unsigned i = 0, n = tokenData.size(); i < n; ++i) {
+    tokenPredictor->encodeBlock(&pcodec, i);
+    treePredictor->encodeBlock(&pcodec, i);
+    if (tokenPredictor->predictionFailure || treePredictor->predictionFailure) {
+      return false;
+    }
+    if (lastMetaBlock) {
+      tokenPredictor->encodeEOF(&pcodec, i, i + 1 == tokenData.size());
+    }
+  }
+  if (lastMetaBlock) {
+    pcodec.encodeNonZeroPadding(paddingBits != 0);
+    if (paddingBits != 0) {
+      unsigned bitsToSave = bitLength(paddingBits);
+      pcodec.encodeValue(bitsToSave, 3);
+      if (bitsToSave > 1) {
+        pcodec.encodeValue(paddingBits & ((1 << (bitsToSave - 1)) - 1), bitsToSave - 1);
+      }
+    }
+  }
+  return handler.endEncoding(metaBlockId, pcodec, uncompressedData.size() - uncompressedOffset);
+}
+
+bool preflate_decode(OutputStream& unpacked_output,
+                     std::vector<unsigned char>& preflate_diff,
+                     uint64_t& deflate_size,
+                     InputStream& deflate_raw,
+                     std::function<void(void)> block_callback,
+                     const size_t min_deflate_size,
+                     const size_t metaBlockSize) {
+  deflate_size = 0;
+  uint64_t deflate_bits = 0;
+  size_t prevBitPos = 0;
+  BitInputStream decInBits(deflate_raw);
+  OutputCacheStream decOutCache(unpacked_output);
+  PreflateBlockDecoder bdec(decInBits, decOutCache);
+  if (bdec.status() != PreflateBlockDecoder::OK) {
+    return false;
+  }
+  bool last;
+  unsigned i = 0;
+  std::vector<PreflateTokenBlock> blocks;
+  std::vector<uint32_t> blockSizes;
+  uint64_t sumBlockSizes = 0;
+  uint64_t lastEndPos = 0;
+  uint64_t uncompressedMetaStart = 0;
+  size_t MBSize = std::min<size_t>(std::max<size_t>(metaBlockSize, 1u << 18), (1u << 31) - 1);
+  size_t MBThreshold = (MBSize * 3) >> 1;
+  PreflateDecoderHandler encoder(block_callback);
+  size_t MBcount = 0;
+
+  std::queue<std::future<std::shared_ptr<PreflateDecoderTask>>> futureQueue;
+  size_t queueLimit = 0;
+  bool fail = false;
+
+  do {
+    PreflateTokenBlock newBlock;
+
+    bool ok = bdec.readBlock(newBlock, last);
+    if (!ok) {
+      fail = true;
+      break;
+    }
+
+    uint64_t blockSize = decOutCache.cacheEndPos() - lastEndPos;
+    lastEndPos = decOutCache.cacheEndPos();
+    if (blockSize >= (1 << 31)) {
+      // No mega blocks
+      fail = true;
+      break;
+    }
+
+    blocks.push_back(newBlock);
+    blockSizes.push_back(blockSize);
+    ++i;
+    block_callback();
+
+    deflate_bits += decInBits.bitPos() - prevBitPos;
+    prevBitPos = decInBits.bitPos();
+
+    sumBlockSizes += blockSize;
+    if (last || sumBlockSizes >= MBThreshold) {
+      size_t blockCount, blockSizeSum;
+      if (last) {
+        blockCount = blockSizes.size();
+        blockSizeSum = sumBlockSizes;
+      } else {
+        blockCount = 0;
+        blockSizeSum = 0;
+        for (const auto bs : blockSizes) {
+          blockSizeSum += bs;
+          ++blockCount;
+          if (blockSizeSum >= MBSize) {
+            break;
+          }
+        }
+      }
+      std::vector<PreflateTokenBlock> blocksForMeta;
+      for (size_t j = 0; j < blockCount; ++j) {
+        blocksForMeta.push_back(std::move(blocks[j]));
+      }
+      blocks.erase(blocks.begin(), blocks.begin() + blockCount);
+      blockSizes.erase(blockSizes.begin(), blockSizes.begin() + blockCount);
+      sumBlockSizes -= blockSizeSum;
+
+      size_t uncompressedOffset = MBcount == 0 ? 0 : 1 << 15;
+
+      std::vector<uint8_t> uncompressedDataForMeta(
+        decOutCache.cacheData(uncompressedMetaStart - uncompressedOffset),
+        decOutCache.cacheData(uncompressedMetaStart - uncompressedOffset) + blockSizeSum + uncompressedOffset);
+      uncompressedMetaStart += blockSizeSum;
+
+      size_t paddingBits = 0;
+      if (last) {
+        uint8_t remaining_bit_count = (8 - deflate_bits) & 7;
+        paddingBits = decInBits.get(remaining_bit_count);
+
+        deflate_bits += decInBits.bitPos() - prevBitPos;
+        prevBitPos = decInBits.bitPos();
+      }
+      if (futureQueue.empty() && (queueLimit == 0 || last)) {
+        PreflateDecoderTask task(encoder, MBcount,
+                                 std::move(blocksForMeta),
+                                 std::move(uncompressedDataForMeta),
+                                 uncompressedOffset,
+                                 last, paddingBits);
+        if (!task.analyze() || !task.encode()) {
+          fail = true;
+          break;
+        }
+      }
+      if (!last) {
+        decOutCache.flushUpTo(uncompressedMetaStart - (1 << 15));
+      }
+      MBcount++;
+    }
+  } while (!fail && !last);
+  while (!futureQueue.empty()) {
+    std::future<std::shared_ptr<PreflateDecoderTask>> first = std::move(futureQueue.front());
+    futureQueue.pop();
+    std::shared_ptr<PreflateDecoderTask> data = first.get();
+    if (fail || !data || !data->encode()) {
+      fail = true;
+    }
+  }
+  decOutCache.flush();
+  deflate_size = (deflate_bits + 7) >> 3;
+  if (deflate_size < min_deflate_size) {
+    return false;
+  }
+  return !fail && encoder.finish(preflate_diff);
+}
+
+bool preflate_decode(std::vector<unsigned char>& unpacked_output,
+                     std::vector<unsigned char>& preflate_diff,
+                     uint64_t& deflate_size,
+                     InputStream& deflate_raw,
+                     std::function<void(void)> block_callback,
+                     const size_t min_deflate_size,
+                     const size_t metaBlockSize) {
+  MemStream uncompressedOutput;
+  bool result = preflate_decode(uncompressedOutput, preflate_diff, deflate_size, deflate_raw,
+                                block_callback, min_deflate_size, metaBlockSize);
+  unpacked_output = uncompressedOutput.extractData();
+  return result;
+}
+
+bool preflate_decode(std::vector<unsigned char>& unpacked_output,
+                     std::vector<unsigned char>& preflate_diff,
+                     const std::vector<unsigned char>& deflate_raw,
+                     const size_t metaBlockSize) {
+  MemStream mem(deflate_raw);
+  uint64_t raw_size;
+  return preflate_decode(unpacked_output, preflate_diff,
+                         raw_size, mem, [] {}, 0, metaBlockSize) 
+          && raw_size == deflate_raw.size();
+}
--- a/contrib/preflate/preflate_decoder.h
+++ b/contrib/preflate/preflate_decoder.h
@@ -0,0 +1,90 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef PREFLATE_DECODER_H
+#define PREFLATE_DECODER_H
+
+#include <functional>
+#include <queue>
+#include <vector>
+#include "preflate_statistical_codec.h"
+#include "preflate_token.h"
+#include "support/stream.h"
+#include "support/task_pool.h"
+
+struct PreflateTokenPredictor;
+struct PreflateTreePredictor;
+
+class PreflateDecoderTask {
+public:
+  class Handler {
+  public:
+    virtual ~Handler() {}
+    virtual uint32_t setModel(const PreflateStatisticsCounter&, const PreflateParameters&) = 0;
+    virtual bool beginEncoding(const uint32_t metaBlockId, PreflatePredictionEncoder&, const uint32_t modelId) = 0;
+    virtual bool endEncoding(const uint32_t metaBlockId, PreflatePredictionEncoder&, const size_t uncompressedSize) = 0;
+    virtual void markProgress() = 0;
+  };
+
+  PreflateDecoderTask(Handler& handler,
+                      const uint32_t metaBlockId, 
+                      std::vector<PreflateTokenBlock>&& tokenData,
+                      std::vector<uint8_t>&& uncompressedData,
+                      const size_t uncompressedOffset,
+                      const bool lastMetaBlock,
+                      const uint32_t paddingBits);
+
+  bool analyze();
+  bool encode();
+  uint32_t id() {
+    return metaBlockId;
+  }
+
+private:
+  Handler& handler;
+  uint32_t metaBlockId;
+  std::vector<PreflateTokenBlock> tokenData;
+  std::vector<uint8_t> uncompressedData;
+  size_t uncompressedOffset;
+  bool lastMetaBlock;
+  uint32_t paddingBits;
+
+  PreflateParameters params;
+  PreflateStatisticsCounter counter;
+  std::unique_ptr<PreflateTokenPredictor> tokenPredictor;
+  std::unique_ptr<PreflateTreePredictor> treePredictor;
+};
+
+bool preflate_decode(OutputStream& unpacked_output,
+                     std::vector<unsigned char>& preflate_diff,
+                     uint64_t& deflate_size,
+                     InputStream& deflate_raw,
+                     std::function<void(void)> block_callback,
+                     const size_t min_deflate_size,
+                     const size_t metaBlockSize = INT32_MAX);
+
+bool preflate_decode(std::vector<unsigned char>& unpacked_output,
+                     std::vector<unsigned char>& preflate_diff,
+                     const std::vector<unsigned char>& deflate_raw,
+                     const size_t metaBlockSize = INT32_MAX);
+
+bool preflate_decode(std::vector<unsigned char>& unpacked_output,
+                     std::vector<unsigned char>& preflate_diff,
+                     uint64_t& deflate_size,
+                     InputStream& deflate_raw,
+                     std::function<void (void)> block_callback,
+                     const size_t min_deflate_size,
+                     const size_t metaBlockSize = INT32_MAX);
+
+#endif /* PREFLATE_DECODER_H */
--- a/contrib/preflate/preflate_dll.sln
+++ b/contrib/preflate/preflate_dll.sln
@@ -0,0 +1,25 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.30204.135
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "preflate_dll", "preflate_dll.vcxproj", "{C4097C5B-2BFC-499A-BEB4-4B709B576722}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{C4097C5B-2BFC-499A-BEB4-4B709B576722}.Release|x64.ActiveCfg = Release|x64
+		{C4097C5B-2BFC-499A-BEB4-4B709B576722}.Release|x64.Build.0 = Release|x64
+		{C4097C5B-2BFC-499A-BEB4-4B709B576722}.Release|x86.ActiveCfg = Release|Win32
+		{C4097C5B-2BFC-499A-BEB4-4B709B576722}.Release|x86.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+		SolutionGuid = {6A2147EA-B1D0-47D9-8DB6-5B335F718795}
+	EndGlobalSection
+EndGlobal
--- a/contrib/preflate/preflate_dll.vcxproj
+++ b/contrib/preflate/preflate_dll.vcxproj
@@ -0,0 +1,179 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>16.0</VCProjectVersion>
+    <Keyword>Win32Proj</Keyword>
+    <ProjectGuid>{c4097c5b-2bfc-499a-beb4-4b709b576722}</ProjectGuid>
+    <RootNamespace>preflatedll</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>WIN32;NDEBUG;PREFLATEDLL_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <Optimization>Full</Optimization>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>false</GenerateDebugInformation>
+      <EnableUAC>false</EnableUAC>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>NDEBUG;PREFLATEDLL_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <Optimization>Full</Optimization>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>false</GenerateDebugInformation>
+      <EnableUAC>false</EnableUAC>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClInclude Include="framework.h" />
+    <ClInclude Include="pch.h" />
+    <ClInclude Include="preflate.h" />
+    <ClInclude Include="preflate_block_decoder.h" />
+    <ClInclude Include="preflate_block_reencoder.h" />
+    <ClInclude Include="preflate_block_trees.h" />
+    <ClInclude Include="preflate_checker.h" />
+    <ClInclude Include="preflate_complevel_estimator.h" />
+    <ClInclude Include="preflate_constants.h" />
+    <ClInclude Include="preflate_decoder.h" />
+    <ClInclude Include="preflate_hash_chain.h" />
+    <ClInclude Include="preflate_info.h" />
+    <ClInclude Include="preflate_input.h" />
+    <ClInclude Include="preflate_parameter_estimator.h" />
+    <ClInclude Include="preflate_parser_config.h" />
+    <ClInclude Include="preflate_predictor_state.h" />
+    <ClInclude Include="preflate_reencoder.h" />
+    <ClInclude Include="preflate_seq_chain.h" />
+    <ClInclude Include="preflate_statistical_codec.h" />
+    <ClInclude Include="preflate_statistical_model.h" />
+    <ClInclude Include="preflate_token.h" />
+    <ClInclude Include="preflate_token_predictor.h" />
+    <ClInclude Include="preflate_tree_predictor.h" />
+    <ClInclude Include="support\arithmetic_coder.h" />
+    <ClInclude Include="support\array_helper.h" />
+    <ClInclude Include="support\bitstream.h" />
+    <ClInclude Include="support\bit_helper.h" />
+    <ClInclude Include="support\const_division.h" />
+    <ClInclude Include="support\filestream.h" />
+    <ClInclude Include="support\huffman_decoder.h" />
+    <ClInclude Include="support\huffman_encoder.h" />
+    <ClInclude Include="support\huffman_helper.h" />
+    <ClInclude Include="support\memstream.h" />
+    <ClInclude Include="support\outputcachestream.h" />
+    <ClInclude Include="support\stream.h" />
+    <ClInclude Include="support\support_tests.h" />
+    <ClInclude Include="support\task_pool.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="dllmain.cpp" />
+    <ClCompile Include="pch.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
+    </ClCompile>
+    <ClCompile Include="preflate_block_decoder.cpp" />
+    <ClCompile Include="preflate_block_reencoder.cpp" />
+    <ClCompile Include="preflate_block_trees.cpp" />
+    <ClCompile Include="preflate_checker.cpp" />
+    <ClCompile Include="preflate_complevel_estimator.cpp" />
+    <ClCompile Include="preflate_constants.cpp" />
+    <ClCompile Include="preflate_decoder.cpp" />
+    <ClCompile Include="preflate_hash_chain.cpp" />
+    <ClCompile Include="preflate_info.cpp" />
+    <ClCompile Include="preflate_parameter_estimator.cpp" />
+    <ClCompile Include="preflate_parser_config.cpp" />
+    <ClCompile Include="preflate_predictor_state.cpp" />
+    <ClCompile Include="preflate_reencoder.cpp" />
+    <ClCompile Include="preflate_seq_chain.cpp" />
+    <ClCompile Include="preflate_statistical_codec.cpp" />
+    <ClCompile Include="preflate_statistical_debug.cpp" />
+    <ClCompile Include="preflate_statistical_model.cpp" />
+    <ClCompile Include="preflate_token.cpp" />
+    <ClCompile Include="preflate_token_predictor.cpp" />
+    <ClCompile Include="preflate_tree_predictor.cpp" />
+    <ClCompile Include="support\arithmetic_coder.cpp" />
+    <ClCompile Include="support\array_helper.cpp" />
+    <ClCompile Include="support\bitstream.cpp" />
+    <ClCompile Include="support\bit_helper.cpp" />
+    <ClCompile Include="support\const_division.cpp" />
+    <ClCompile Include="support\filestream.cpp" />
+    <ClCompile Include="support\huffman_decoder.cpp" />
+    <ClCompile Include="support\huffman_encoder.cpp" />
+    <ClCompile Include="support\huffman_helper.cpp" />
+    <ClCompile Include="support\memstream.cpp" />
+    <ClCompile Include="support\outputcachestream.cpp" />
+    <ClCompile Include="support\support_tests.cpp" />
+    <ClCompile Include="support\task_pool.cpp" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
--- a/contrib/preflate/preflate_dll.vcxproj.filters
+++ b/contrib/preflate/preflate_dll.vcxproj.filters
@@ -0,0 +1,237 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="framework.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="pch.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="preflate.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="preflate_block_decoder.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="preflate_block_reencoder.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="preflate_block_trees.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="preflate_checker.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="preflate_complevel_estimator.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="preflate_constants.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="preflate_decoder.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="preflate_hash_chain.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="preflate_info.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="preflate_input.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="preflate_parameter_estimator.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="preflate_parser_config.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="preflate_predictor_state.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="preflate_reencoder.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="preflate_seq_chain.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="preflate_statistical_codec.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="preflate_statistical_model.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="preflate_token.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="preflate_token_predictor.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="preflate_tree_predictor.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="support\arithmetic_coder.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="support\array_helper.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="support\bit_helper.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="support\bitstream.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="support\const_division.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="support\filestream.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="support\huffman_decoder.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="support\huffman_encoder.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="support\huffman_helper.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="support\memstream.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="support\outputcachestream.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="support\stream.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="support\support_tests.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="support\task_pool.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="dllmain.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="pch.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="preflate_block_decoder.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="preflate_block_reencoder.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="preflate_block_trees.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="preflate_checker.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="preflate_complevel_estimator.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="preflate_constants.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="preflate_decoder.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="preflate_hash_chain.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="preflate_info.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="preflate_parameter_estimator.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="preflate_parser_config.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="preflate_predictor_state.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="preflate_reencoder.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="preflate_seq_chain.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="preflate_statistical_codec.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="preflate_statistical_debug.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="preflate_statistical_model.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="preflate_token.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="preflate_token_predictor.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="preflate_tree_predictor.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="support\arithmetic_coder.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="support\array_helper.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="support\bit_helper.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="support\bitstream.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="support\const_division.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="support\filestream.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="support\huffman_decoder.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="support\huffman_encoder.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="support\huffman_helper.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="support\memstream.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="support\outputcachestream.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="support\support_tests.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="support\task_pool.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
--- a/contrib/preflate/preflate_dll.vcxproj.user
+++ b/contrib/preflate/preflate_dll.vcxproj.user
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup />
+</Project>
--- a/contrib/preflate/preflate_hash_chain.cpp
+++ b/contrib/preflate/preflate_hash_chain.cpp
@@ -0,0 +1,116 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include <algorithm>
+#include <string.h>
+#include "preflate_constants.h"
+#include "preflate_hash_chain.h"
+
+PreflateHashChainExt::PreflateHashChainExt(
+    const std::vector<unsigned char>& input_,
+    const unsigned char memLevel)
+  : _input(input_)
+  , totalShift(-8) {
+  hashBits = memLevel + 7;
+  hashShift = (hashBits + PreflateConstants::MIN_MATCH - 1) / PreflateConstants::MIN_MATCH;
+  hashMask = (1 << hashBits) - 1;
+  head = new unsigned short[hashMask + 1];
+  prev = new unsigned short[1 << 16];
+  chainDepth = new unsigned[1 << 16];
+  memset(head, 0, sizeof(short) * (hashMask + 1));
+  memset(prev, 0, sizeof(short) * (1 << 16));
+  memset(chainDepth, 0, sizeof(unsigned) * (1 << 16));
+  runningHash = 0;
+  if (_input.remaining() > 2) {
+    updateRunningHash(_input.curChar(0));
+    updateRunningHash(_input.curChar(1));
+  }
+}
+PreflateHashChainExt::~PreflateHashChainExt() {
+  delete[] head;
+  delete[] chainDepth;
+  delete[] prev;
+}
+
+void PreflateHashChainExt::updateHash(const unsigned l) {
+  if (l > 0x180) {
+    unsigned l_ = l;
+    while (l_ > 0) {
+      unsigned blk = min(l_, 0x180u);
+      updateHash(blk);
+      l_ -= blk;
+    }
+    return;
+  }
+
+  const unsigned char* b = _input.curChars();
+  unsigned pos = _input.pos();
+  if (pos - totalShift >= 0xfe08) {
+    reshift();
+  }
+  for (unsigned i = 2; i < min(l + 2, _input.remaining()); ++i) {
+    updateRunningHash(b[i]);
+    unsigned h = runningHash & hashMask;
+    unsigned p = (pos + i - 2) - totalShift;
+    chainDepth[p] = chainDepth[head[h]] + 1;
+    prev[p] = head[h];
+    head[h] = p;
+  }
+  _input.advance(l);
+}
+void PreflateHashChainExt::skipHash(const unsigned l) {
+  const unsigned char* b = _input.curChars();
+  unsigned pos = _input.pos();
+  if (pos - totalShift >= 0xfe08) {
+    reshift();
+  }
+  unsigned remaining = _input.remaining();
+  if (remaining > 2) {
+    updateRunningHash(b[2]);
+    unsigned h = runningHash & hashMask;
+    unsigned p = (pos) - totalShift;
+    chainDepth[p] = chainDepth[head[h]] + 1;
+    prev[p] = head[h];
+    head[h] = p;
+
+    // Skipped data is not inserted into the hash chain,
+    // but we must still update the chainDepth, to avoid
+    // bad analysis results
+    // --------------------
+    for (unsigned i = 1; i < l; ++i) {
+      unsigned p = (pos + i)-totalShift;
+      chainDepth[p] = 0xffff8000;
+    }
+    // l must be at least 3
+    if (remaining > l) {
+      updateRunningHash(b[l]);
+      if (remaining > l + 1) {
+        updateRunningHash(b[l + 1]);
+      }
+    }
+  }
+  _input.advance(l);
+}
+void PreflateHashChainExt::reshift() {
+  const unsigned short delta = 0x7e00;
+  for (unsigned i = 0, n = hashMask + 1; i < n; ++i) {
+    head[i] = max(head[i], delta) - delta;
+  }
+  for (unsigned i = delta + 8, n = 1 << 16; i < n; ++i) {
+    prev[i - delta] = max(prev[i], delta) - delta;
+  }
+  memmove(chainDepth + 8, chainDepth + 8 + delta, (0x10000 - 8 - delta) * sizeof(chainDepth[0]));
+  totalShift += delta;
+}
--- a/contrib/preflate/preflate_hash_chain.h
+++ b/contrib/preflate/preflate_hash_chain.h
@@ -0,0 +1,125 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef PREFLATE_HASH_CHAIN_H
+#define PREFLATE_HASH_CHAIN_H
+
+#include <algorithm>
+#include "preflate_input.h"
+
+struct PreflateHashIterator {
+  const unsigned short* chain;
+  const unsigned * chainDepth;
+  const unsigned refPos;
+  const unsigned maxDist;
+  unsigned curPos, curDist;
+  bool isValid;
+
+  PreflateHashIterator(
+      const unsigned short* chain_,
+      const unsigned * depth_,
+      const unsigned refPos_,
+      const unsigned maxDist_,
+      unsigned startPos_)
+    : chain(chain_)
+    , chainDepth(depth_)
+    , refPos(refPos_)
+    , maxDist(maxDist_)
+    , curPos(startPos_)
+    , curDist(dist(refPos_, startPos_)) {
+    isValid = curDist <= maxDist;
+  }
+
+  inline bool valid() const {
+    return isValid;
+  }
+  inline bool operator !() const {
+    return !isValid;
+  }
+  static inline unsigned dist(const unsigned p1, const unsigned p2) {
+    return p1 - p2;
+  }
+  inline unsigned dist() const {
+    return curDist;
+  }
+  inline unsigned depth() const {
+    return chainDepth[curPos];
+  }
+  inline bool next() {
+    curPos = chain[curPos];
+    curDist = dist(refPos, curPos);
+    isValid = curPos > 0 && curDist <= maxDist;
+    return isValid;
+  }
+};
+
+struct PreflateHashChainExt {
+  PreflateInput _input;
+  unsigned short* head;
+  unsigned * chainDepth;
+  unsigned short* prev;
+  unsigned char hashBits, hashShift;
+  unsigned short runningHash, hashMask;
+  unsigned totalShift;
+
+  PreflateHashChainExt(const std::vector<unsigned char>& input_, const unsigned char memLevel);
+  ~PreflateHashChainExt();
+
+  unsigned nextHash(const unsigned char b) const {
+    return ((runningHash << hashShift) ^ b);
+  }
+  unsigned nextHash(const unsigned char b1, const unsigned char b2) const {
+    return ((((runningHash << hashShift) ^ b1) << hashShift) ^ b2);
+  }
+  void updateRunningHash(const unsigned char b) {
+    runningHash = (runningHash << hashShift) ^ b;
+  }
+  void reshift();
+  unsigned getHead(const unsigned hash) const {
+    return head[hash & hashMask];
+  }
+  unsigned getNodeDepth(const unsigned node) const {
+    return chainDepth[node];
+  }
+  unsigned getRelPosDepth(const unsigned refPos, const unsigned head) const {
+    return chainDepth[head] - chainDepth[refPos - totalShift];
+  }
+
+  PreflateHashIterator iterateFromHead(const unsigned hash, const unsigned refPos, const unsigned maxDist) const {
+    return PreflateHashIterator(prev, chainDepth, refPos - totalShift, maxDist, head[hash & hashMask]);
+  }
+  PreflateHashIterator iterateFromNode(const unsigned node, const unsigned refPos, const unsigned maxDist) const {
+    return PreflateHashIterator(prev, chainDepth, refPos - totalShift, maxDist, node);
+  }
+  PreflateHashIterator iterateFromPos(const unsigned pos, const unsigned refPos, const unsigned maxDist) const {
+    return PreflateHashIterator(prev, chainDepth, refPos - totalShift, maxDist, pos - totalShift);
+  }
+  const PreflateInput& input() const {
+    return _input;
+  }
+  unsigned curHash() const {
+    return nextHash(_input.curChar(2));
+  }
+  unsigned curPlus1Hash() const {
+    return nextHash(_input.curChar(2), _input.curChar(3));
+  }
+  void updateHash(const unsigned l);
+  void updateHashLong(const unsigned l);
+  void skipHash(const unsigned l);
+
+private:
+  void _updateHashSimple(const unsigned l);
+};
+
+#endif /* PREFLATE_HASH_CHAIN_H */
--- a/contrib/preflate/preflate_info.cpp
+++ b/contrib/preflate/preflate_info.cpp
@@ -0,0 +1,56 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include <algorithm>
+#include <string.h>
+#include "preflate_info.h"
+
+// -----------------------------------------
+
+PreflateStreamInfo extractPreflateInfo(const std::vector<PreflateTokenBlock>& blocks) {
+  PreflateStreamInfo result;
+  memset(&result, 0, sizeof(result));
+  result.countBlocks = blocks.size();
+  for (unsigned i = 0, n = result.countBlocks; i < n; ++i) {
+    const PreflateTokenBlock& b = blocks[i];
+    if (b.type == PreflateTokenBlock::STORED) {
+      result.countStoredBlocks++;
+      continue;
+    }
+    if (b.type == PreflateTokenBlock::STATIC_HUFF) {
+      result.countStaticHuffTreeBlocks++;
+    }
+    result.tokenCount += b.tokens.size();
+    result.maxTokensPerBlock = max(result.maxTokensPerBlock, (unsigned)b.tokens.size());
+    unsigned blockMaxDist = 0;
+    for (unsigned j = 0, m = b.tokens.size(); j < m; ++j) {
+      const PreflateToken& t = b.tokens[j];
+      if (t.len == 1) {
+        result.literalCount++;
+      } else {
+        result.referenceCount++;
+        blockMaxDist = max(blockMaxDist, (unsigned)t.dist);
+      }
+    }
+    result.maxDist = max(result.maxDist, blockMaxDist);
+    if (blockMaxDist == 0) {
+      result.countHuffBlocks++;
+    } else if (blockMaxDist == 1) {
+      result.countRLEBlocks++;
+    }
+  }
+  return result;
+}
+
--- a/contrib/preflate/preflate_info.h
+++ b/contrib/preflate/preflate_info.h
@@ -0,0 +1,35 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef PREFLATE_INFO_H
+#define PREFLATE_INFO_H
+
+#include "preflate_token.h"
+
+struct PreflateStreamInfo {
+  unsigned tokenCount;
+  unsigned literalCount;
+  unsigned referenceCount;
+  unsigned maxDist;
+  unsigned maxTokensPerBlock;
+  unsigned countBlocks;
+  unsigned countStoredBlocks;
+  unsigned countHuffBlocks;
+  unsigned countRLEBlocks;
+  unsigned countStaticHuffTreeBlocks;
+};
+
+PreflateStreamInfo extractPreflateInfo(const std::vector<PreflateTokenBlock>& blocks);
+
+#endif /* PREFLATE_INFO_H */
--- a/contrib/preflate/preflate_input.h
+++ b/contrib/preflate/preflate_input.h
@@ -0,0 +1,53 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef PREFLATE_INPUT_H
+#define PREFLATE_INPUT_H
+
+#include <vector>
+
+class PreflateInput {
+public:
+  PreflateInput(const std::vector<unsigned char>& v)
+    : _data(v.size() > 0 ? &v[0] : nullptr), _size(v.size()), _pos(0) {}
+
+  const unsigned pos() const {
+    return _pos;
+  }
+
+  const unsigned size() const {
+    return _size;
+  }
+
+  const unsigned char* curChars(int offset = 0) const {
+    return _data + _pos + offset;
+  }
+  const unsigned char curChar(int offset = 0) const {
+    return _data[_pos + offset];
+  }
+  void advance(const unsigned l) {
+    _pos += l;
+  }
+  const unsigned remaining() const {
+    return _size - _pos;
+  }
+
+private:
+  const unsigned char* _data;
+  unsigned _size;
+  unsigned _pos;
+};
+
+
+#endif /* PREFLATE_INPUT_H */
--- a/contrib/preflate/preflate_parameter_estimator.cpp
+++ b/contrib/preflate/preflate_parameter_estimator.cpp
@@ -0,0 +1,83 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include <algorithm>
+#include "preflate_complevel_estimator.h"
+#include "preflate_constants.h"
+#include "preflate_info.h"
+#include "preflate_parameter_estimator.h"
+#include "preflate_token_predictor.h"
+#include "support/bit_helper.h"
+
+unsigned char estimatePreflateMemLevel(const unsigned maxBlockSize_) {
+  unsigned maxBlockSize = maxBlockSize_;
+  unsigned mbits = 0;
+  while (maxBlockSize > 0) {
+    ++mbits; maxBlockSize >>= 1;
+  }
+  mbits = min(max(mbits, 7u), 15u);
+  return mbits - 6;
+}
+
+unsigned char estimatePreflateWindowBits(const unsigned maxDist_) {
+  unsigned maxDist = maxDist_;
+  maxDist += PreflateConstants::MIN_LOOKAHEAD;
+  unsigned wbits = bitLength(maxDist - 1);
+  wbits = min(max(wbits, 9u), 15u);
+  return wbits;
+}
+
+PreflateStrategy estimatePreflateStrategy(const PreflateStreamInfo& info) {
+  if (info.countStoredBlocks == info.countBlocks) {
+    return PREFLATE_STORE;
+  }
+  if (info.countHuffBlocks == info.countBlocks) {
+    return PREFLATE_HUFF_ONLY;
+  }
+  if (info.countRLEBlocks == info.countBlocks) {
+    return PREFLATE_RLE_ONLY;
+  }
+  return PREFLATE_DEFAULT;
+}
+
+PreflateHuffStrategy estimatePreflateHuffStrategy(const PreflateStreamInfo& info) {
+  if (info.countStaticHuffTreeBlocks == info.countBlocks) {
+    return PREFLATE_HUFF_STATIC;
+  }
+  if (info.countStaticHuffTreeBlocks == 0) {
+    return PREFLATE_HUFF_DYNAMIC;
+  }
+  return PREFLATE_HUFF_MIXED;
+}
+
+PreflateParameters estimatePreflateParameters(const std::vector<unsigned char>& unpacked_output,
+                                              const size_t off0,
+                                              const std::vector<PreflateTokenBlock>& blocks) {
+  PreflateStreamInfo info = extractPreflateInfo(blocks);
+
+  PreflateParameters result;
+  result.windowBits   = estimatePreflateWindowBits(info.maxDist);
+  result.memLevel     = estimatePreflateMemLevel(info.maxTokensPerBlock);
+  result.strategy     = estimatePreflateStrategy(info);
+  result.huffStrategy = estimatePreflateHuffStrategy(info);
+  PreflateCompLevelInfo cl = estimatePreflateCompLevel(result.windowBits, result.memLevel, unpacked_output, off0, blocks, false);
+  result.compLevel    = cl.recommendedCompressionLevel;
+  result.zlibCompatible = cl.zlibCompatible;
+  result.farLen3MatchesDetected = cl.farLen3Matches;
+  result.veryFarMatchesDetected = cl.veryFarMatches;
+  result.matchesToStartDetected = cl.matchToStart;
+  result.log2OfMaxChainDepthM1 = cl.maxChainDepth == 0 ? 0 : bitLength(cl.maxChainDepth - 1);
+  return result;
+}
--- a/contrib/preflate/preflate_parameter_estimator.h
+++ b/contrib/preflate/preflate_parameter_estimator.h
@@ -0,0 +1,110 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef PREFLATE_PARAMETER_ESTIMATOR_H
+#define PREFLATE_PARAMETER_ESTIMATOR_H
+
+/* deflate has four parameters:
+* - strategy: the strategy can usually be guessed by looking on the given deflate stream
+*             (e.g. only stored blocks -> stored,
+*                   max distance = 0 -> huffman-only,
+*                   max distance = 1 -> rle,
+*                   only fixed huffman trees -> fixed-huffman-tree,
+*                   otherwise default)
+* - window bits: known by max distance, less window bits would be impossible, more window
+*                bits would be pointless
+* - mem level: used for hash calculation and number of tokens per block
+*              the latter can be used to put a lower limit on mem level
+* - compression level: parameters for the reference finder
+*
+* When reencoding a deflate stream, the predictor has to make a token proposal (either to
+* encode a literal or a (dist, len) pair. A correction data stream will either accept the
+* proposal, or change it to the correct values. The corrected values are then fed to the
+* deflate encoder, and to the predictor.
+*
+* The main problem is to find the missing deflate parameters (compression level and
+* mem level) to minimize the number and complexity of required corrections.
+* Data streams that were encoded with zlib should get perfect recognition,
+* requiring only the detected deflate parameters to be encoded for perfect reconstruction.
+* Data streams from other encoders (7zip, kzip, ...) should be reconstructible with minimal
+* corrective instructions, similar to reflate.
+*
+* kzip does not limit block size to < 64k tokens, while zlib enforces it for various reasons
+* (and defaults to max 16k tokens).
+* Prediction for end-of-block is therefore independent of literal/reference prediction.
+*
+* Mixing or interpolating the prediction from different parameter packs is
+* possible, but not planned right now.
+*/
+
+#include "preflate_info.h"
+#include "preflate_parser_config.h"
+#include "preflate_token.h"
+
+enum PreflateStrategy {
+  PREFLATE_DEFAULT,
+  PREFLATE_RLE_ONLY,
+  PREFLATE_HUFF_ONLY,
+  PREFLATE_STORE
+};
+enum PreflateHuffStrategy {
+  PREFLATE_HUFF_DYNAMIC,
+  PREFLATE_HUFF_MIXED,
+  PREFLATE_HUFF_STATIC,
+};
+
+struct PreflateParameters {
+  PreflateStrategy strategy;
+  PreflateHuffStrategy huffStrategy;
+  bool zlibCompatible;
+  unsigned char windowBits;
+  unsigned char memLevel;
+  unsigned char compLevel;
+  // true if matches of len 3 with a distance > 4096 are allowed
+  // (disallowed by zlib level 4+)
+  bool farLen3MatchesDetected; 
+  // true if matches of distance >= 32768 - (MAX_MATCH + MIN_MATCH + 1) are allowed
+  // or > 32768 - (MAX_MATCH + MIN_MATCH + 1) if it's the first node in the hash chain
+  // (disallowed by zlib)
+  bool veryFarMatchesDetected;
+  // true if matches to start of stream are allowed
+  // (disallowed by zlib)
+  bool matchesToStartDetected;
+  // log2 of maximal found chain depth - 1
+  // so, 9 to 16 have value 3
+  unsigned char log2OfMaxChainDepthM1;
+
+
+  bool isFastCompressor() const {
+    return compLevel >= 1 && compLevel <= 3;
+  }
+  bool isSlowCompressor() const {
+    return compLevel >= 4 && compLevel <= 9;
+  }
+  const PreflateParserConfig& config() const {
+    return isFastCompressor() ? fastPreflateParserSettings[compLevel - 1]
+      : slowPreflateParserSettings[isSlowCompressor() ? compLevel - 4 : 5];
+  }
+};
+
+unsigned char estimatePreflateMemLevel(const unsigned maxBlockSize);
+PreflateStrategy estimatePreflateStrategy(const PreflateStreamInfo&);
+PreflateHuffStrategy estimatePreflateHuffStrategy(const PreflateStreamInfo&);
+unsigned char estimatePreflateWindowBits(const unsigned maxDist);
+
+PreflateParameters estimatePreflateParameters(const std::vector<unsigned char>& unpacked_output,
+                                              const size_t off0,
+                                              const std::vector<PreflateTokenBlock>& blocks);
+
+#endif /* PREFLATE_PARAMETER_ESTIMATOR_H */
--- a/contrib/preflate/preflate_parser_config.cpp
+++ b/contrib/preflate/preflate_parser_config.cpp
@@ -0,0 +1,35 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include "preflate_parser_config.h"
+
+#include <algorithm>
+
+// -----------------------------------------
+
+/*      good lazy nice chain */
+const PreflateParserConfig fastPreflateParserSettings[3] = {
+  /* 1 */ {4,    4,   8,    4}, /* max speed, no lazy matches */
+  /* 2 */ {4,    5,  16,    8},
+  /* 3 */ {4,    6,  32,   32},
+};
+const PreflateParserConfig slowPreflateParserSettings[6] = {
+  /* 4 */ {4,    4,  16,   16},  /* lazy matches */
+  /* 5 */ {8,   16,  32,   32},
+  /* 6 */ {8,   16, 128,  128},
+  /* 7 */ {8,   32, 128,  256},
+  /* 8 */ {32, 128, 258, 1024},
+  /* 9 */ {32, 258, 258, 4096}, /* max compression */
+};
--- a/contrib/preflate/preflate_parser_config.h
+++ b/contrib/preflate/preflate_parser_config.h
@@ -0,0 +1,35 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef PREFLATE_PARSER_CONFIG
+#define PREFLATE_PARSER_CONFIG
+
+/* Values for max_lazy_match, good_match and max_chain_length, depending on
+* the desired pack level (0..9). The values given below have been tuned to
+* exclude worst case performance for pathological files. Better values may be
+* found for specific files.
+*/
+struct PreflateParserConfig {
+  unsigned char good_length; /* reduce lazy search above this match length */
+  unsigned short max_lazy;    /* do not perform lazy search above this match length */
+  unsigned short nice_length; /* quit search above this match length */
+  unsigned short max_chain;
+};
+
+extern const PreflateParserConfig fastPreflateParserSettings[3];
+extern const PreflateParserConfig slowPreflateParserSettings[6];
+
+
+#endif 
+/* PREFLATE_PARSER_CONFIG */
--- a/contrib/preflate/preflate_predictor_state.cpp
+++ b/contrib/preflate/preflate_predictor_state.cpp
@@ -0,0 +1,449 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include "preflate_constants.h"
+#include "preflate_predictor_state.h"
+#include <algorithm>
+
+PreflatePredictorState::PreflatePredictorState(
+    const PreflateHashChainExt& hash_,
+    const PreflateSeqChain& seq_,
+    const PreflateParserConfig& config_,
+    const int wbits, 
+    const int mbits) 
+  : hash(hash_)
+  , seq(seq_)
+  , windowBytes(1 << wbits)
+  , maxTokenCount((1 << (6 + mbits)) - 1)
+  , config(config_) {
+}
+
+/* deflate has four parameters:
+ * - strategy: the strategy can usually be guessed by looking on the given deflate stream
+ *             (e.g. only stored blocks -> stored, 
+ *                   max distance = 0 -> huffman-only,
+ *                   max distance = 1 -> rle, 
+ *                   only fixed huffman trees -> fixed-huffman-tree,
+ *                   otherwise default)
+ * - window bits: known by max distance, less window bits would be impossible, more window
+ *                bits would be pointless
+ * - mem level: used for hash calculation and number of tokens per block
+ *              the latter can be used to put a lower limit on mem level
+ * - compression level: parameters for the reference finder
+ *
+ * When reencoding a deflate stream, the predictor has to make a token proposal (either to
+ * encode a literal or a (dist, len) pair. A correction data stream will either accept the
+ * proposal, or change it to the correct values. The corrected values are then fed to the
+ * deflate encoder, and to the predictor.
+ *
+ * The main problem is to find the missing deflate parameters (compression level and
+ * mem level) to minimize the number and complexity of required corrections.
+ * Data streams that were encoded with zlib should get perfect recognition,
+ * requiring only the detected deflate parameters to be encoded for perfect reconstruction.
+ * Data streams from other encoders (7zip, kzip, ...) should be reconstructible with minimal
+ * corrective instructions, similar to reflate.
+ *
+ * kzip does not limit block size to < 64k tokens, while zlib enforces it for various reasons
+ * (and defaults to max 16k tokens).
+ * Prediction for end-of-block is therefore independent of literal/reference prediction.
+ *
+ * Mixing or interpolating the prediction from different parameter packs is
+ * possible, but not planned right now.
+ */
+
+unsigned PreflatePredictorState::prefixCompare(
+    const unsigned char* s1, 
+    const unsigned char* s2, 
+    const unsigned bestLen,
+    const unsigned maxLen) {
+  if (s1[bestLen] != s2[bestLen]) {
+    return 0;
+  }
+  if (s1[0] != s2[0] || s1[1] != s2[1] || s1[2] != s2[2]) {
+    return 0;
+  }
+
+  const unsigned char* scan  = s2 + 3; 
+  const unsigned char* match = s1 + 3; 
+  const unsigned char* scanend = s2 + maxLen;
+
+/* while (scan < scanend
+          && *++scan == *++match && *++scan == *++match
+          && *++scan == *++match && *++scan == *++match
+          && *++scan == *++match && *++scan == *++match
+          && *++scan == *++match && *++scan == *++match) {
+ }*/
+  while (scan < scanend
+          && *scan == *match) {
+    ++scan;
+    ++match;
+  }
+
+  return scan - s2;
+}
+
+unsigned PreflatePredictorState::suffixCompare(
+    const unsigned char* s1,
+    const unsigned char* s2,
+    const unsigned bestLen,
+    const unsigned maxLen) {
+  if (s1[bestLen] != s2[bestLen]) {
+    return 0;
+  }
+  unsigned len = 0;
+  while (s1[len] == s2[len] && ++len < maxLen) {
+  }
+  return len;
+}
+
+bool PreflatePredictorState::createMatchHelper(
+    MatchHelper& helper,
+    const unsigned prevLen,
+    const unsigned startPos,
+    const bool veryFarMatches,
+    const bool matchesToStart,
+    const unsigned maxDepth) {
+  helper.maxLen = min(totalInputSize() - startPos, (unsigned)PreflateConstants::MAX_MATCH);
+  if (helper.maxLen < std::max<uint32_t>(prevLen + 1, PreflateConstants::MIN_MATCH)) {
+    return false;
+  }
+  helper.startPos = startPos;
+  unsigned maxDistToStart = startPos - (matchesToStart ? 0 : 1);
+  if (veryFarMatches) {
+    helper.curMaxDistHop1Plus
+      = helper.curMaxDistHop0
+      = min(maxDistToStart, windowSize());
+  } else {
+    unsigned maxDist = windowSize() - PreflateConstants::MIN_LOOKAHEAD;
+    helper.curMaxDistHop0 = min(maxDistToStart, maxDist);
+    helper.curMaxDistHop1Plus = min(maxDistToStart, maxDist - 1);
+  }
+  if (maxDepth > 0) {
+    helper.maxChain = maxDepth;
+    helper.niceLen = helper.maxLen;
+  } else {
+    helper.maxChain = maxChainLength();/* max hash chain length */
+    helper.niceLen = min(niceMatchLength(), helper.maxLen);
+
+    if (prevLen >= goodMatchLength()) {
+      helper.maxChain >>= 2;
+    }
+  }
+  return true;
+}
+
+PreflateToken PreflatePredictorState::match(
+    const unsigned hashHead,
+    const unsigned prevLen,
+    const unsigned offset,
+    const bool veryFarMatches,
+    const bool matchesToStart,
+    const unsigned maxDepth) {
+  PreflateToken bestMatch(PreflateToken::NONE);
+  MatchHelper h;
+  if (!createMatchHelper(h, prevLen, currentInputPos() + offset,
+                         veryFarMatches, matchesToStart, maxDepth)) {
+    return bestMatch;
+  }
+  PreflateHashIterator chainIt = iterateFromNode(hashHead, h.startPos, h.curMaxDistHop1Plus);
+  // Handle ZLIB quirk: the very first entry in the hash chain can have a larger
+  // distance than all following entries
+  if (chainIt.dist() > h.curMaxDistHop0) {
+    return bestMatch;
+  }
+  const unsigned char* input = inputCursor() + offset;
+  unsigned bestLen = prevLen;
+  do {
+    const unsigned char* match = input - chainIt.dist();
+
+    unsigned matchLength = prefixCompare(match, input, bestLen, h.maxLen);
+    if (matchLength > bestLen) {
+      bestLen = matchLength;
+      bestMatch = PreflateToken(PreflateToken::REFERENCE, matchLength, chainIt.dist());
+      if (bestLen >= h.niceLen) {
+        break;
+      }
+    }
+  } while (chainIt.next() && h.maxChain-- > 1);
+  return bestMatch;
+}
+PreflateToken PreflatePredictorState::seqMatch(
+  const unsigned startPos,
+  const unsigned hashHead,
+  const unsigned prevLen,
+  const bool veryFarMatches,
+  const bool matchesToStart,
+  const unsigned maxDepth) {
+  PreflateToken bestMatch(PreflateToken::NONE);
+  MatchHelper h;
+  if (!createMatchHelper(h, prevLen, startPos,
+                         veryFarMatches, matchesToStart, maxDepth)) {
+    return bestMatch;
+  }
+
+  PreflateSeqIterator chainIt = seq.iterateFromPos(startPos);
+  if (!chainIt) {
+    return bestMatch;
+  }
+  unsigned curSeqLen = std::min<uint32_t>(seq.len(startPos), h.maxLen);
+  unsigned curMaxDist = h.curMaxDistHop1Plus;
+  unsigned bestLen = prevLen;
+  if (curSeqLen < PreflateConstants::MIN_MATCH) {
+    // startPos is part of a bigger sequence,
+    // and the ZLIB quirk does not apply, yeah!
+    curSeqLen = min(chainIt.len() - chainIt.dist(), h.maxLen);
+    if (curSeqLen > prevLen && 1 <= h.curMaxDistHop0) {
+      bestLen = curSeqLen;
+      bestMatch = PreflateToken(PreflateToken::REFERENCE, curSeqLen, 1);
+    }
+    if (bestLen >= h.niceLen || !chainIt.next()) {
+      return bestMatch;
+    }
+    if (chainIt.dist() > h.curMaxDistHop1Plus + chainIt.len() - PreflateConstants::MIN_MATCH) {
+      return bestMatch;
+    }
+  } else {
+    unsigned minDistOff = chainIt.len() - PreflateConstants::MIN_MATCH;
+    if (chainIt.dist() > h.curMaxDistHop1Plus + minDistOff) {
+      if (chainIt.dist() > h.curMaxDistHop0 + minDistOff) {
+        return bestMatch;
+      }
+      // Handle ZLIB quirk: the very first entry in the hash chain can have a larger
+      // distance than all following entries
+      unsigned latestPos = h.startPos - chainIt.dist() + minDistOff;
+      unsigned depth = hash.getRelPosDepth(latestPos, hashHead);
+      if (depth == 0) {
+        curMaxDist = h.curMaxDistHop0;
+      }
+    }
+  }
+  const unsigned char* input = inputCursor() + startPos - currentInputPos();
+  unsigned bestSeqLen = min(curSeqLen, bestLen);
+
+  do {
+    if (chainIt.len() < bestSeqLen) {
+      // If we do not even meet the already matched number of sequence bytes,
+      // we can just skip this
+      continue;
+    }
+
+    unsigned oldBestSeqLen = bestSeqLen;
+    bestSeqLen = std::min<uint32_t>(std::min<uint32_t>(curSeqLen, chainIt.len()), h.niceLen);
+    unsigned bestDist = chainIt.dist() - chainIt.len() + bestSeqLen;
+    unsigned error = 0;
+    if (bestDist > curMaxDist) {
+      // best subsequence is already beyond the search range
+      error = bestDist - curMaxDist;
+      if (error > chainIt.len() - PreflateConstants::MIN_MATCH) {
+        break;
+      }
+    }
+    unsigned bestChainDepth = hash.getRelPosDepth(h.startPos - bestDist + error, hashHead);
+    if (bestChainDepth >= h.maxChain) {
+      // best subsequence is already beyond the search range
+      error += bestChainDepth - h.maxChain + 1;
+      if (error > chainIt.len() - PreflateConstants::MIN_MATCH) {
+        break;
+      }
+    }
+    if (error) {
+      if (bestSeqLen > std::max<uint32_t>(oldBestSeqLen, PreflateConstants::MIN_MATCH - 1) + error) {
+        bestMatch = PreflateToken(PreflateToken::REFERENCE, bestSeqLen - error, bestDist - error);
+      }
+      // Since we had to correct the length down, we know that
+      // the comparer cannot find a better match
+      break;
+    }
+    if (bestSeqLen == h.maxLen) {
+      bestMatch = PreflateToken(PreflateToken::REFERENCE, bestSeqLen, bestDist);
+      break;
+    } else {
+      const unsigned char* match = input - bestDist;
+
+      unsigned matchLength = bestSeqLen + suffixCompare(match + bestSeqLen, input + bestSeqLen, max(bestLen, bestSeqLen) - bestSeqLen, h.maxLen - bestSeqLen);
+      if (matchLength > bestLen) {
+        bestLen = matchLength;
+        bestMatch = PreflateToken(PreflateToken::REFERENCE, matchLength, bestDist);
+        if (bestLen >= h.niceLen) {
+          break;
+        }
+      }
+    }
+    curMaxDist = h.curMaxDistHop1Plus;
+  } while (chainIt.next());
+  return bestMatch;
+}
+
+PreflateNextMatchInfo PreflatePredictorState::nextMatchInfo(
+  const unsigned hashHead,
+  const PreflateToken& targetReference,
+  const PreflateHashChainExt& hash) {
+  PreflateNextMatchInfo result;
+  result.nextChainDepth = (unsigned short)~0u;
+  result.nextLen = 0;
+  result.nextDist = 0xffff;
+  unsigned maxLen = min(availableInputSize(), (unsigned)PreflateConstants::MAX_MATCH);
+  if (maxLen < (unsigned)PreflateConstants::MIN_MATCH) {
+    return result;
+  }
+
+  unsigned maxDist = windowSize() - PreflateConstants::MIN_LOOKAHEAD - 1;
+  unsigned curPos = currentInputPos();
+  unsigned curMaxDist = min(curPos - 1, maxDist);
+  unsigned curMaxDistAlt = min(curPos - 1, windowSize() - PreflateConstants::MIN_LOOKAHEAD);
+
+  const unsigned char* input = inputCursor();
+  unsigned startDepth = hash.getNodeDepth(hashHead);
+  unsigned maxChainOrg = maxChainLength();/* max hash chain length */
+  PreflateHashIterator chainIt = hash.iterateFromPos(curPos - targetReference.dist, curPos, curMaxDist);
+  if (!chainIt.curPos || (hashHead == chainIt.curPos && chainIt.dist() > curMaxDistAlt)
+      || (hashHead != chainIt.curPos  && chainIt.dist() > curMaxDist)) {
+    return result;
+  }
+  unsigned endDepth = chainIt.depth();
+  unsigned maxChain = maxChainOrg - min(startDepth - endDepth, 0xffffu);/* max hash chain length */
+
+  unsigned bestLen = targetReference.len;
+
+  while (maxChain > 0) {
+    if (!chainIt.next()) {
+      break;
+    }
+    const unsigned char* match = input - chainIt.dist();
+
+    unsigned matchLength = prefixCompare(match, input, bestLen, maxLen);
+    if (matchLength > bestLen) {
+      result.nextLen = matchLength;
+      result.nextChainDepth = maxChainOrg - maxChain;
+      result.nextDist = chainIt.dist();
+      break;
+    }
+    --maxChain;
+  }
+  return result;
+}
+
+PreflateRematchInfo PreflatePredictorState::rematchInfo(
+    const unsigned hashHead,
+    const PreflateToken& targetReference) {
+  PreflateRematchInfo result;
+  result.firstMatchDepth = 0xffff;
+  result.requestedMatchDepth = 0xffff;
+  result.condensedHops = 0;
+  unsigned maxLen = min(availableInputSize(), (unsigned)PreflateConstants::MAX_MATCH);
+  if (maxLen < targetReference.len) {
+    return result;
+  }
+
+  unsigned maxDist = windowSize();
+  unsigned curPos = currentInputPos();
+  unsigned curMaxDist = min(curPos, maxDist);
+
+  PreflateHashIterator chainIt = hash.iterateFromNode(hashHead, curPos, curMaxDist);
+  if (!chainIt) {
+    return result;
+  }
+  const unsigned char* input = inputCursor();
+
+  unsigned maxChainOrg = 0xffff;/* max hash chain length */
+  unsigned maxChain = maxChainOrg;/* max hash chain length */
+
+  unsigned bestLen = targetReference.len;
+
+  do {
+    const unsigned char* match = input - chainIt.dist();
+
+    unsigned matchLength = prefixCompare(match, input, bestLen - 1, bestLen);
+    if (matchLength >= bestLen) {
+      result.firstMatchDepth = min((unsigned)result.firstMatchDepth, maxChainOrg - maxChain);
+      result.condensedHops++;
+    }
+    if (chainIt.dist() >= targetReference.dist) {
+      if (chainIt.dist() == targetReference.dist) {
+        result.requestedMatchDepth = maxChainOrg - maxChain;
+      }
+      return result;
+    }
+
+    chainIt.next();
+  } while (!!chainIt && maxChain-- > 1);
+  return result;
+}
+unsigned PreflatePredictorState::firstMatch(const unsigned len) {
+  unsigned maxLen = min(availableInputSize(), (unsigned)PreflateConstants::MAX_MATCH);
+  if (maxLen < max(len, (unsigned)PreflateConstants::MIN_MATCH)) {
+    return 0;
+  }
+
+  unsigned curPos = currentInputPos();
+  unsigned curMaxDist = min(curPos, windowSize());
+
+  unsigned hash = calculateHash();
+
+  PreflateHashIterator chainIt = iterateFromHead(hash, curPos, curMaxDist);
+  if (!chainIt) {
+    return 0;
+  }
+  const unsigned char* input = inputCursor();
+
+  do {
+    const unsigned char* match = input - chainIt.dist();
+
+    unsigned matchLength = prefixCompare(match, input, len - 1, len);
+    if (matchLength >= len) {
+      return chainIt.dist();
+    }
+  } while (chainIt.next());
+  return 0;
+}
+
+unsigned PreflatePredictorState::hopMatch(const PreflateToken& targetReference, const unsigned hops) {
+  if (hops == 0) {
+    return targetReference.dist;
+  }
+
+  unsigned curPos   = currentInputPos();
+  unsigned errorDist = 0;
+  unsigned maxLen = min(availableInputSize(), (unsigned)PreflateConstants::MAX_MATCH);
+  if (maxLen < targetReference.len) {
+    return errorDist;
+  }
+  unsigned maxDist = windowSize();
+  unsigned curMaxDist = min(curPos, maxDist);
+
+  PreflateHashIterator chainIt = iterateFromDist(targetReference.dist, curPos, curMaxDist);
+  if (!chainIt) {
+    return 0;
+  }
+
+  const unsigned char* input = inputCursor();
+
+  unsigned bestLen = targetReference.len;
+  for (unsigned todo = hops; todo > 0; ) {
+    if (!chainIt.next()) {
+      break;
+    }
+
+    const unsigned char* match = input - chainIt.dist();
+
+    unsigned matchLength = prefixCompare(match, input - targetReference.dist, bestLen - 1, bestLen);
+    if (matchLength >= bestLen) {
+      if (--todo == 0) {
+        return chainIt.dist();
+      }
+    }
+  }
+  return errorDist;
+}
--- a/contrib/preflate/preflate_predictor_state.h
+++ b/contrib/preflate/preflate_predictor_state.h
@@ -0,0 +1,176 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef PREFLATE_PREDICTOR_STATE_H
+#define PREFLATE_PREDICTOR_STATE_H
+
+#include <vector>
+
+#include "preflate_input.h"
+#include "preflate_hash_chain.h"
+#include "preflate_parser_config.h"
+#include "preflate_seq_chain.h"
+#include "preflate_token.h"
+
+struct PreflatePreviousMatchInfo {
+  PreflateToken previousMatches[256];
+};
+
+struct PreflateNextMatchInfo {
+  unsigned short nextChainDepth;
+  unsigned short nextLen;
+  unsigned short nextDist;
+};
+
+struct PreflateRematchInfo {
+  unsigned short firstMatchDepth;
+  unsigned short firstMatchDist;
+  unsigned short requestedMatchDepth;
+  unsigned short condensedHops;
+};
+
+struct PreflatePredictorState {
+  const PreflateHashChainExt&    hash;
+  const PreflateSeqChain&    seq;
+  unsigned short windowBytes;
+  unsigned maxTokenCount;
+  const PreflateParserConfig& config;
+
+  PreflatePredictorState(const PreflateHashChainExt&,
+                         const PreflateSeqChain&,
+                         const PreflateParserConfig&,
+                         const int wbits, 
+                         const int mbits);
+
+  unsigned currentInputPos() const {
+    return hash.input().pos();
+  }
+  const unsigned char* inputCursor() const {
+    return hash.input().curChars();
+  }
+  unsigned windowSize() const {
+    return windowBytes;
+  }
+  unsigned totalInputSize() const {
+    return hash.input().size();
+  }
+  unsigned availableInputSize() const {
+    return hash.input().remaining();
+  }
+  unsigned maxChainLength() const {
+    return config.max_chain;
+  }
+  unsigned niceMatchLength() const {
+    return config.nice_length;
+  }
+  unsigned goodMatchLength() const {
+    return config.good_length;
+  }
+  unsigned lazyMatchLength() const {
+    return config.max_lazy;
+  }
+  unsigned calculateHash() const {
+    return hash.curHash();
+  }
+  unsigned calculateHashNext() const {
+    return hash.curPlus1Hash();
+  }
+  unsigned getCurrentHashHead(const unsigned hashNext) const {
+    return hash.getHead(hashNext);
+  }
+
+  PreflateHashIterator iterateFromHead(const unsigned hash_, const unsigned refPos, const unsigned maxDist) const {
+    return hash.iterateFromHead(hash_, refPos, maxDist);
+  }
+  PreflateHashIterator iterateFromNode(const unsigned node_, const unsigned refPos, const unsigned maxDist) const {
+    return hash.iterateFromNode(node_, refPos, maxDist);
+  }
+  PreflateHashIterator iterateFromDist(const unsigned dist_, const unsigned refPos, const unsigned maxDist) const {
+    return hash.iterateFromPos(refPos - dist_, refPos, maxDist);
+  }
+
+  static unsigned prefixCompare(
+      const unsigned char* s1,
+      const unsigned char* s2,
+      const unsigned bestLen,
+      const unsigned maxLen);
+
+  static unsigned suffixCompare(
+    const unsigned char* s1,
+    const unsigned char* s2,
+    const unsigned bestLen,
+    const unsigned maxLen);
+
+  bool betterMatchPossible(
+    const unsigned prevLen,
+    const unsigned startPos);
+
+  PreflateToken matchHop0MaxDist(
+    const unsigned hashHead,
+    const unsigned prevLen,
+    const unsigned offset,
+    const bool veryFarMatches,
+    const bool matchesToStart);
+
+  PreflateToken match(
+      const unsigned hashHead, 
+      const unsigned prevLen, 
+      const unsigned offset, 
+      const bool veryFarMatches,
+      const bool matchesToStart,
+      const unsigned maxDepth);
+
+  PreflateToken seqMatch(
+    const unsigned startPos,
+    const unsigned hashHead,
+    const unsigned prevLen,
+    const bool veryFarMatches,
+    const bool matchesToStart,
+    const unsigned maxDepth);
+
+  /*
+  unsigned short matchDepth(const unsigned hashHead, const PreflateToken& targetReference,
+                      const PreflateHashChainExt&);*/
+  PreflateNextMatchInfo nextMatchInfo(const unsigned hashHead, const PreflateToken& targetReference,
+                              const PreflateHashChainExt&);
+  PreflateRematchInfo rematchInfo(const unsigned hashHead, const PreflateToken& targetReference);
+  unsigned firstMatch(const unsigned len);
+  unsigned hopMatch(const PreflateToken& token, const unsigned hops);
+
+private:
+  struct MatchHelper {
+    unsigned startPos;
+    unsigned maxLen;
+    unsigned curMaxDistHop0;
+    unsigned curMaxDistHop1Plus;
+    unsigned maxChain;
+    unsigned niceLen;
+
+    bool validHop0Dist(const unsigned d) const {
+      return d <= curMaxDistHop0;
+    }
+    bool validHop1PlusDist(const unsigned d) const {
+      return d <= curMaxDistHop1Plus;
+    }
+  };
+  bool createMatchHelper(
+    MatchHelper& helper,
+    const unsigned prevLen,
+    const unsigned startPos,
+    const bool veryFarMatches,
+    const bool matchesToStart,
+    const unsigned maxDepth);
+};
+
+#endif /* PREFLATE_PREDICTOR_STATE_H */
--- a/contrib/preflate/preflate_reencoder.cpp
+++ b/contrib/preflate/preflate_reencoder.cpp
@@ -0,0 +1,205 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include <functional>
+#include "preflate_block_reencoder.h"
+#include "preflate_reencoder.h"
+#include "preflate_statistical_codec.h"
+#include "preflate_token_predictor.h"
+#include "preflate_tree_predictor.h"
+#include "support/bitstream.h"
+#include "support/memstream.h"
+
+class PreflateReencoderHandler : public PreflateReencoderTask::Handler {
+public:
+  PreflateReencoderHandler(BitOutputStream& bos_,
+                           const std::vector<uint8_t>& reconData,
+                           const size_t uncompressedSize,
+                           std::function<void(void)> progressCallback_)
+    : decoder(reconData, uncompressedSize)
+    , progressCallback(progressCallback_)
+    , bos(bos_) {}
+
+  size_t metaBlockCount() const {
+    return decoder.metaBlockCount();
+  }
+  size_t metaBlockUncompressedSize(const size_t metaBlockId) const {
+    return decoder.metaBlockUncompressedSize(metaBlockId);
+  }
+  bool error() const {
+    return decoder.error();
+  }
+
+  bool finish() {
+    decoder.finish();
+    return !decoder.error();
+  }
+
+  virtual bool beginDecoding(const uint32_t metaBlockId, 
+                             PreflatePredictionDecoder& codec, PreflateParameters& params) {
+    return decoder.beginMetaBlock(codec, params, metaBlockId);
+  }
+  virtual bool endDecoding(const uint32_t metaBlockId, PreflatePredictionDecoder& codec,
+                           std::vector<PreflateTokenBlock>&& tokenData,
+                           std::vector<uint8_t>&& uncompressedData,
+                           const size_t uncompressedOffset,
+                           const size_t paddingBitCount,
+                           const size_t paddingValue) {
+    if (!decoder.endMetaBlock(codec)) {
+      return false;
+    }
+
+    PreflateBlockReencoder deflater(bos, uncompressedData, uncompressedOffset);
+    for (size_t j = 0, n = tokenData.size(); j < n; ++j) {
+      deflater.writeBlock(tokenData[j],
+                          metaBlockId + 1 == decoder.metaBlockCount() && j + 1 == n);
+      markProgress();
+    }
+    bos.put(paddingValue, paddingBitCount);
+    return true;
+  }
+
+  virtual void markProgress() {
+    std::unique_lock<std::mutex> lock(this->_mutex);
+    progressCallback();
+  }
+
+private:
+  PreflateMetaDecoder decoder;
+  std::function<void(void)> progressCallback;
+  BitOutputStream& bos;
+  std::mutex _mutex;
+};
+
+PreflateReencoderTask::PreflateReencoderTask(PreflateReencoderHandler::Handler& handler_,
+                                             const uint32_t metaBlockId_,
+                                             std::vector<uint8_t>&& uncompressedData_,
+                                             const size_t uncompressedOffset_,
+                                             const bool lastMetaBlock_)
+  : handler(handler_)
+  , metaBlockId(metaBlockId_)
+  , uncompressedData(uncompressedData_)
+  , uncompressedOffset(uncompressedOffset_)
+  , lastMetaBlock(lastMetaBlock_) {}
+
+bool PreflateReencoderTask::decodeAndRepredict() {
+  PreflateParameters params;
+  if (!handler.beginDecoding(metaBlockId, pcodec, params)) {
+    return false;
+  }
+  PreflateTokenPredictor tokenPredictor(params, uncompressedData, uncompressedOffset);
+  PreflateTreePredictor treePredictor(uncompressedData, uncompressedOffset);
+
+  bool eof = true;
+  do {
+    PreflateTokenBlock block = tokenPredictor.decodeBlock(&pcodec);
+    if (!treePredictor.decodeBlock(block, &pcodec)) {
+      return false;
+    }
+    if (tokenPredictor.predictionFailure || treePredictor.predictionFailure) {
+      return false;
+    }
+    tokenData.push_back(std::move(block));
+    if (!lastMetaBlock) {
+      eof = tokenPredictor.inputEOF();
+    } else {
+      eof = tokenPredictor.decodeEOF(&pcodec);
+    }
+    handler.markProgress();
+  } while (!eof);
+  paddingBitCount = 0;
+  paddingBits = 0;
+  if (lastMetaBlock) {
+    bool non_zero_bits = pcodec.decodeNonZeroPadding();
+    if (non_zero_bits) {
+      paddingBitCount = pcodec.decodeValue(3);
+      if (paddingBitCount > 0) {
+        paddingBits = (1 << (paddingBitCount - 1)) + pcodec.decodeValue(paddingBitCount - 1);
+      }
+    }
+  }
+  return true;
+}
+bool PreflateReencoderTask::reencode() {
+  return handler.endDecoding(metaBlockId, pcodec, std::move(tokenData),
+                             std::move(uncompressedData), uncompressedOffset,
+                             paddingBitCount, paddingBits);
+}
+
+bool preflate_reencode(OutputStream& os,
+                       const std::vector<unsigned char>& preflate_diff,
+                       InputStream& is,
+                       const uint64_t unpacked_size,
+                       std::function<void(void)> block_callback) {
+  BitOutputStream bos(os);
+  PreflateReencoderHandler decoder(bos, preflate_diff, unpacked_size, block_callback);
+  if (decoder.error()) {
+    return false;
+  }
+  std::vector<uint8_t> uncompressedData;
+  std::queue<std::future<std::shared_ptr<PreflateReencoderTask>>> futureQueue;
+  size_t maxMetaBlockSize = 1;
+  for (size_t j = 0, n = decoder.metaBlockCount(); j < n; ++j) {
+    maxMetaBlockSize = max(maxMetaBlockSize, decoder.metaBlockUncompressedSize(j));
+  }
+  size_t queueLimit = 0;
+  bool fail = false;
+  for (size_t j = 0, n = decoder.metaBlockCount(); j < n; ++j) {
+    size_t curUncSize = uncompressedData.size();
+    size_t newSize = decoder.metaBlockUncompressedSize(j);
+    uncompressedData.resize(curUncSize + newSize);
+    if (is.read(uncompressedData.data() + curUncSize, newSize) != newSize) {
+      return false;
+    }
+
+    if (futureQueue.empty() && (queueLimit == 0 || j + 1 == n)) {
+      PreflateReencoderTask task(decoder, j, std::vector<uint8_t>(uncompressedData), curUncSize, j + 1 == n);
+      if (!task.decodeAndRepredict() || !task.reencode()) {
+        return false;
+      }
+    }
+
+    if (j + 1 < n) {
+      uncompressedData.erase(uncompressedData.begin(),
+                             uncompressedData.begin() + std::max<size_t>(uncompressedData.size(), 1 << 15) - (1 << 15));
+    }
+  }
+  while (!futureQueue.empty()) {
+    std::future<std::shared_ptr<PreflateReencoderTask>> first = std::move(futureQueue.front());
+    futureQueue.pop();
+    std::shared_ptr<PreflateReencoderTask> data = first.get();
+    if (fail || !data || !data->reencode()) {
+      fail = true;
+    }
+  }
+  bos.flush();
+  return !fail && !decoder.error();
+}
+
+bool preflate_reencode(OutputStream& os,
+                       const std::vector<unsigned char>& preflate_diff,
+                       const std::vector<unsigned char>& unpacked_input,
+                       std::function<void(void)> block_callback) {
+  MemStream is(unpacked_input);
+  return preflate_reencode(os, preflate_diff, is, unpacked_input.size(), block_callback);
+}
+bool preflate_reencode(std::vector<unsigned char>& deflate_raw,
+                       const std::vector<unsigned char>& preflate_diff,
+                       const std::vector<unsigned char>& unpacked_input) {
+  MemStream mem;
+  bool result = preflate_reencode(mem, preflate_diff, unpacked_input, [] {});
+  deflate_raw = mem.extractData();
+  return result;
+}
--- a/contrib/preflate/preflate_reencoder.h
+++ b/contrib/preflate/preflate_reencoder.h
@@ -0,0 +1,79 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef PREFLATE_REENCODER_H
+#define PREFLATE_REENCODER_H
+
+#include <vector>
+#include "preflate_statistical_codec.h"
+#include "support/stream.h"
+#include "support/task_pool.h"
+
+class PreflateReencoderTask {
+public:
+  class Handler {
+  public:
+    virtual ~Handler() {}
+    virtual bool beginDecoding(const uint32_t metaBlockId, 
+                               PreflatePredictionDecoder&, PreflateParameters&) = 0;
+    virtual bool endDecoding(const uint32_t metaBlockId, PreflatePredictionDecoder&,
+                             std::vector<PreflateTokenBlock>&& tokenData,
+                             std::vector<uint8_t>&& uncompressedData, 
+                             const size_t uncompressedOffset,
+                             const size_t paddingBitCount,
+                             const size_t paddingValue) = 0;
+    virtual void markProgress() = 0;
+  };
+
+  PreflateReencoderTask(Handler& handler,
+                        const uint32_t metaBlockId,
+                        std::vector<uint8_t>&& uncompressedData,
+                        const size_t uncompressedOffset,
+                        const bool lastMetaBlock);
+
+  bool decodeAndRepredict();
+  bool reencode();
+
+  uint32_t id() {
+    return metaBlockId;
+  }
+
+private:
+  Handler& handler;
+  uint32_t metaBlockId;
+  std::vector<uint8_t> uncompressedData;
+  size_t uncompressedOffset;
+  bool lastMetaBlock;
+  std::vector<PreflateTokenBlock> tokenData;
+  PreflatePredictionDecoder pcodec;
+  size_t paddingBitCount;
+  size_t paddingBits;
+};
+
+bool preflate_reencode(std::vector<unsigned char>& deflate_raw,
+                       const std::vector<unsigned char>& preflate_diff,
+                       const std::vector<unsigned char>& unpacked_input);
+
+bool preflate_reencode(OutputStream& os,
+                       const std::vector<unsigned char>& preflate_diff,
+                       InputStream& unpacked_input,
+                       const uint64_t unpacked_size,
+                       std::function<void(void)> block_callback);
+
+bool preflate_reencode(OutputStream& os,
+                       const std::vector<unsigned char>& preflate_diff,
+                       const std::vector<unsigned char>& unpacked_input,
+                       std::function<void(void)> block_callback);
+
+#endif /* PREFLATE_REENCODER_H */
--- a/contrib/preflate/preflate_seq_chain.cpp
+++ b/contrib/preflate/preflate_seq_chain.cpp
@@ -0,0 +1,140 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include <algorithm>
+#include <string.h>
+#include "preflate_constants.h"
+#include "preflate_seq_chain.h"
+
+PreflateSeqChain::PreflateSeqChain(
+    const std::vector<unsigned char>& input_)
+  : _input(input_)
+  , totalShift(-8)
+  , curPos(0) {
+  prev = new SeqChainEntry[1 << 16];
+  memset(heads, 0x00, sizeof(heads));
+  _build(8, std::min<uint32_t>((1 << 16) - 8, _input.remaining()));
+}
+PreflateSeqChain::~PreflateSeqChain() {
+  delete[] prev;
+}
+
+void PreflateSeqChain::_reshift() {
+  const unsigned short delta = 0x7e00;
+  unsigned remaining = (1 << 16) - (delta + 8);
+  // If the head of large sequence is shifted out,
+  // but the tail remains in the cache, 
+  // we need to adapt the head and all pointers to it,
+  // that is all members, the next non-member pointing to it
+  // or heads
+  if (prev[delta + 8].distToNext != 0xffff && prev[delta + 8].length < PreflateConstants::MIN_MATCH) {
+    unsigned d = prev[delta + 8].distToNext;
+    prev[delta + 8].distToNext = 0xffff;
+    prev[delta + 8].length = prev[delta + 8 - d].length - d;
+    for (unsigned i = 3; i < prev[delta + 8].length; ++i) {
+      prev[delta + 8 + i - 2].distToNext -= d;
+    }
+    uint8_t c = *_input.curChars(-(int)remaining);
+    if (heads[c] == delta + 8 - d) { 
+      heads[c] += d;
+    } else {
+      for (unsigned i = prev[delta + 8].length; i < remaining;  ++i) {
+        if (prev[delta + 8 + i].distToNext == i + d) {
+          prev[delta + 8 + i].distToNext -= d;
+          break;
+        }
+      }
+    }
+  }
+  for (unsigned i = 0; i < 256; ++i) {
+    heads[i] = max(heads[i], delta) - delta;
+  }
+  memmove(prev + 8, prev + (delta + 8), sizeof(SeqChainEntry) * remaining);
+  totalShift += delta;
+  _build(8 + remaining, std::min<uint32_t>(delta, _input.remaining()));
+}
+void PreflateSeqChain::_build(const unsigned off0, const unsigned size) {
+  if (!size) {
+    return;
+  }
+  const unsigned char* b = _input.curChars();
+  uint8_t curChar = b[0];
+  SeqChainEntry startOfSeq = {0xffff, 0x0}, *ptrToFirstOfSeq;
+  unsigned startOff = off0;
+  prev[off0] = startOfSeq;
+  if (off0 > 8 && curChar == b[-1]) {
+    --startOff;
+    // new block continues the old
+    if (curChar == b[-2]) {
+      --startOff;
+      // this is definitely a sequence
+      if (curChar == b[-3]) {
+        // This was already a sequence in the previous block,
+        // just append
+        startOff = heads[curChar];
+        prev[off0 - 2].distToNext = off0 - startOff - 2;
+        prev[off0 - 1].distToNext = off0 - startOff - 1;
+        prev[off0].distToNext = off0 - startOff;
+        prev[off0].length = 1;
+      } else {
+        // Otherwise enter the sequence in the books
+        prev[startOff].distToNext = startOff - heads[curChar];
+        prev[startOff + 1].distToNext = 1;
+        prev[startOff + 2].distToNext = 2;
+        prev[startOff + 2].length = 1;
+        heads[curChar] = startOff;
+      }
+    } else {
+      prev[startOff + 1].distToNext = 1;
+      prev[startOff + 1].length = 1;
+    }
+  }
+  ptrToFirstOfSeq = &prev[startOff];
+  ++ptrToFirstOfSeq->length;
+
+  uint8_t prevChar = curChar;
+  for (unsigned i = 1; i < size; ++i) {
+    curChar = b[i];
+    if (prevChar == curChar) {
+      if (++ptrToFirstOfSeq->length == 3) {
+        prev[startOff].distToNext = startOff - heads[prevChar];
+        heads[prevChar] = startOff;
+      }
+      prev[off0 + i].distToNext = off0 + i - startOff;
+      prev[off0 + i].length = 1;
+    } else {
+      // Last two of a sequence are not a sequence themselves
+      if (ptrToFirstOfSeq->length >= 2) {
+        if (ptrToFirstOfSeq->length >= 3) {
+          prev[off0 + i - 2].distToNext = 0xffff;
+        }
+        prev[off0 + i - 1].distToNext = 0xffff;
+      }
+      prev[off0 + i] = startOfSeq;
+      startOff = off0 + i;
+      ptrToFirstOfSeq = &prev[startOff];
+      ++ptrToFirstOfSeq->length;
+    }
+    prevChar = curChar;
+  }
+  // Last two of a sequence are not a sequence themselves
+  if (ptrToFirstOfSeq->length >= 2) {
+    if (ptrToFirstOfSeq->length >= 3) {
+      prev[off0 + size - 2].distToNext = 0xffff;
+    }
+    prev[off0 + size - 1].distToNext = 0xffff;
+  }
+  _input.advance(size);
+}
--- a/contrib/preflate/preflate_seq_chain.h
+++ b/contrib/preflate/preflate_seq_chain.h
@@ -0,0 +1,88 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef PREFLATE_SEQ_CHAIN_H
+#define PREFLATE_SEQ_CHAIN_H
+
+#include <algorithm>
+#include "preflate_input.h"
+
+struct SeqChainEntry {
+  uint16_t distToNext;
+  uint16_t length;
+};
+
+struct PreflateSeqIterator {
+  const SeqChainEntry* chain;
+  const unsigned refPos;
+  unsigned curDist;
+
+  PreflateSeqIterator(
+      const SeqChainEntry* chain_,
+      const unsigned refPos_)
+    : chain(chain_)
+    , refPos(refPos_)
+    , curDist(chain_[refPos_].distToNext) {
+  }
+
+  inline bool valid() const {
+    return curDist <= refPos - 8;
+  }
+  inline bool operator !() const {
+    return !valid();
+  }
+  inline unsigned dist() const {
+    return curDist;
+  }
+  inline uint16_t len() const {
+    return chain[refPos - curDist].length;
+  }
+  inline bool next() {
+    curDist += chain[refPos - curDist].distToNext;
+    return valid();
+  }
+};
+
+struct PreflateSeqChain {
+  PreflateInput _input;
+  SeqChainEntry* prev;
+  unsigned totalShift;
+  unsigned curPos;
+  uint16_t heads[256];
+
+  PreflateSeqChain(const std::vector<unsigned char>& input_);
+  ~PreflateSeqChain();
+
+  bool valid(const unsigned refPos) const {
+    return prev[refPos - totalShift].distToNext != 0xffff;
+  }
+  uint16_t len(const unsigned refPos) const {
+    return prev[refPos - totalShift].length;
+  }
+  PreflateSeqIterator iterateFromPos(const unsigned refPos) const {
+    return PreflateSeqIterator(prev, refPos - totalShift);
+  }
+  void updateSeq(const unsigned l) {
+    curPos += l;
+    while (curPos - totalShift >= 0xfe08) {
+      _reshift();
+    }
+  }
+
+private:
+  void _reshift();
+  void _build(const unsigned off0, const unsigned size);
+};
+
+#endif /* PREFLATE_SEQ_CHAIN_H */
--- a/contrib/preflate/preflate_statistical_codec.cpp
+++ b/contrib/preflate/preflate_statistical_codec.cpp
@@ -0,0 +1,795 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include <algorithm>
+#include <string.h>
+#include "preflate_parameter_estimator.h"
+#include "preflate_statistical_codec.h"
+#include "preflate_statistical_model.h"
+#include "support/array_helper.h"
+#include "support/bit_helper.h"
+#include <stdint.h>
+
+template <unsigned N>
+void PreflateSubModel<N>::build_impl(const unsigned* arr, const unsigned defval, const uint8_t prec) {
+  if (N == 0) {
+    isDefault = true;
+    return;
+  }
+  for (unsigned i = 0; i < N; ++i) {
+    ids[i] = i;
+  }
+  std::sort(ids, ids + N, [=](unsigned i1, unsigned i2) {
+    if (arr[i1] != arr[i2]) {
+      return arr[i1] < arr[i2];
+    }
+    return i1 < i2;
+  });
+  for (unsigned i = 0; i < N; ++i) {
+    bounds[i] = arr[ids[i]];
+    rids[ids[i]] = i;
+  }
+  unsigned sum = sumArray(bounds, N), acc, prev;
+  prev = bounds[0];
+  bounds[0] = acc = 0;
+  for (unsigned i = 0; i < N; ++i) {
+    if (prev) {
+      acc += prev;
+      prev = bounds[i + 1];
+      int diff = (((uint64_t)acc) << 16) / sum - bounds[i];
+      unsigned diff_bits = bitLength(diff);
+      const unsigned k = 5;
+      if (diff > 0 && diff_bits > k) {
+        diff = diff & (((1 << k) - 1) << (diff_bits - k));
+      }
+      //        bounds[i + 1] = (((uint64_t)acc) << 16) / sum;
+      bounds[i + 1] = bounds[i] + diff;
+      if (bounds[i + 1] <= bounds[i]) {
+        bounds[i + 1] = bounds[i] + 1;
+      }
+    } else {
+      prev = bounds[i + 1];
+      bounds[i + 1] = bounds[i];
+    }
+  }
+  if (bounds[N] > 0) {
+    bounds[N] = 1 << 16;
+  }
+  isDefault = N == 0 || bounds[N] == 0 || (bounds[N - 1] == 0 && ids[N - 1] == defval);
+
+  build_scale_down();
+}
+template <unsigned N>
+void PreflateSubModel<N>::buildDefault(const unsigned defval) {
+  if (N == 0) {
+    isDefault = true;
+    return;
+  }
+  memset(bounds, 0, N * sizeof(unsigned));
+  memset(scaledDownBounds, 0, N * sizeof(unsigned));
+  bounds[N] = 0x10000;
+  ids[N - 1] = defval;
+  rids[defval] = N - 1;
+  isDefault = true;
+  build_scale_down();
+}
+template <unsigned N>
+void PreflateSubModel<N>::build_scale_down() {
+  unsigned boundBits = ~0xFFFFu; // Make sure that upper bits are all set, to limit the range of zeroJunk
+  for (unsigned i = 0; i <= N; ++i) {
+    boundBits |= bounds[i];
+  }
+  unsigned zeroJunk = bitTrailingZeroes(boundBits);
+  scaleDownBits = (16 - zeroJunk);
+  for (unsigned i = 0; i <= N; ++i) {
+    scaledDownBounds[i] = bounds[i] >> zeroJunk;
+  }
+
+  isFixed = bounds[N - 1] == 0;
+
+/*  for (unsigned i = 0; i <= N; ++i) {
+    scaledDownBounds[i] = bounds[i];
+  }
+  scaleDownBits = 16;*/
+}
+
+
+static void encodeProb(ArithmeticEncoder& codec, const unsigned val) {
+  unsigned bits = bitLength(val);
+  // encode shift
+  codec.encodeBits(bits - 1, 4);
+  // and precision
+  if (bits >= 5) {
+    codec.encodeBits((val >> (bits - 5)) & 0xf, 4);
+  } else {
+    codec.encodeBits(val & ~(1 << (bits - 1)), bits - 1);
+  }
+}
+static void encodeId(ArithmeticEncoder& codec,
+              const unsigned id, const unsigned count) {
+  unsigned bits = bitLength(count - 1);
+  codec.encodeBits(id, bits);
+}
+static unsigned decodeProb(ArithmeticDecoder& codec) {
+  // encode shift
+  unsigned bits = codec.decodeBits(4) + 1;
+  // and precision
+  if (bits >= 5) {
+    return (codec.decodeBits(4) | 0x10) << (bits - 5);
+  } else {
+    return codec.decodeBits(bits - 1) | (1 << (bits - 1));
+  }
+}
+static unsigned decodeId(ArithmeticDecoder& codec, const unsigned count) {
+  unsigned bits = bitLength(count - 1);
+  return codec.decodeBits(bits);
+}
+
+template <unsigned N>
+void PreflateSubModel<N>::write(ArithmeticEncoder& codec, const uint8_t) const {
+  unsigned zeros = 0;
+  for (unsigned i = 1; i < N; ++i) {
+    if (!bounds[i]) {
+      ++zeros;
+    } else {
+      break;
+    }
+  }
+  codec.encodeBits(zeros, bitLength(N - 1));
+  // Transmit values
+  for (unsigned i = 1 + zeros; i < N; ++i) {
+    encodeProb(codec, bounds[i] - bounds[i - 1]);
+  }
+  // Transmit ids
+  for (unsigned i = zeros; i < N; ++i) {
+    encodeId(codec, ids[i], N);
+  }
+}
+template <unsigned N>
+void PreflateSubModel<N>::read(ArithmeticDecoder& codec, const uint8_t) {
+  unsigned zeros = codec.decodeBits(bitLength(N - 1));
+  memset(bounds, 0, sizeof(bounds));
+  // Transmit values
+  for (unsigned i = 1 + zeros; i < N; ++i) {
+    bounds[i] = decodeProb(codec) + bounds[i - 1];
+  }
+  bounds[N] = 1 << 16;
+  // Transmit ids
+  for (unsigned i = zeros; i < N; ++i) {
+    ids[i] = decodeId(codec, N);
+    rids[ids[i]] = i;
+  }
+  build_scale_down();
+}
+
+template <unsigned NEG, unsigned POS>
+void PreflateCorrectionSubModel<NEG, POS>::build_impl(const unsigned* arr, const int defval, const uint8_t prec) {
+  unsigned signArr[3] = {arr[NEG], sumArray(arr + NEG + 1, POS), sumArray(arr, NEG)};
+  sign.build_impl(signArr, defval == 0 ? 0 : (defval > 0 ? 1 : 2), prec);
+  unsigned posArr[POS + 1];
+  for (unsigned i = 0; i < POS; ++i) {
+    posArr[i] = arr[NEG + 1 + i];
+  }
+  pos.build_impl(posArr, defval > 0 && defval <= POS ? defval - 1 : POS - 1, prec);
+  unsigned negArr[NEG + 1];
+  for (unsigned i = 0; i < NEG; ++i) {
+    negArr[i] = arr[NEG - 1 - i];
+  }
+  neg.build_impl(negArr, -defval > 0 && -defval <= NEG ? -defval - 1 : NEG - 1, prec);
+  isDefault = sign.isDefault && pos.isDefault && neg.isDefault;
+}
+
+template <unsigned NEG, unsigned POS>
+void PreflateCorrectionSubModel<NEG, POS>::buildDefault(const unsigned defval) {
+  sign.buildDefault(defval == 0 ? 0 : (defval > 0 ? 1 : 2));
+  pos.buildDefault(defval > 0 && defval <= POS ? defval - 1 : POS - 1);
+  neg.buildDefault(defval > 0 && defval <= NEG ? defval - 1 : NEG - 1);
+  isDefault = sign.isDefault && pos.isDefault && neg.isDefault;
+}
+template <unsigned NEG, unsigned POS>
+void PreflateCorrectionSubModel<NEG, POS>::write(ArithmeticEncoder& codec, const uint8_t prec) const {
+  sign.write(codec, prec);
+  if (POS > 0) {
+    pos.write(codec, prec);
+  }
+  if (NEG > 0) {
+    neg.write(codec, prec);
+  }
+}
+template <unsigned NEG, unsigned POS>
+void PreflateCorrectionSubModel<NEG, POS>::read(ArithmeticDecoder& codec, const uint8_t prec) {
+  sign.read(codec, prec);
+  if (POS > 0) {
+    pos.read(codec, prec);
+  }
+  if (NEG > 0) {
+    neg.read(codec, prec);
+  }
+}
+
+// -------------------------------------
+
+PreflateBaseModel::PreflateBaseModel() 
+  : encoder(nullptr), decoder(nullptr) {}
+
+void PreflateBaseModel::setEncoderStream(ArithmeticEncoder* codec_) {
+  encoder = codec_;
+}
+void PreflateBaseModel::setDecoderStream(ArithmeticDecoder* codec_) {
+  decoder = codec_;
+}
+
+template <unsigned N>
+void PreflateBaseModel::readSubModel(PreflateSubModel<N>& sm, const bool isFullDef, const PreflateModelCodec& cc,
+                  const unsigned defVal, const uint8_t prec) {
+  if (isFullDef || cc.nonDefaultValue.decode(*decoder) == 0) {
+    sm.buildDefault(defVal);
+  } else {
+    sm.read(*decoder, prec);
+  }
+}
+
+template <unsigned N, unsigned M>
+void PreflateBaseModel::readSubModel(PreflateCorrectionSubModel<N, M>& sm, const bool isFullDef, const PreflateModelCodec& cc,
+                  const unsigned defVal, const uint8_t prec) {
+  if (isFullDef || cc.nonDefaultValue.decode(*decoder) == 0) {
+    sm.buildDefault(defVal);
+  } else {
+    sm.read(*decoder, prec);
+  }
+}
+
+template <unsigned N>
+void PreflateBaseModel::writeSubModel(const PreflateSubModel<N>& sm, const bool isFullDef, const PreflateModelCodec& cc,
+                   const unsigned defVal, const uint8_t prec) {
+  if (isFullDef) {
+    return;
+  }
+  bool ndef = !sm.isDefault;
+  cc.nonDefaultValue.encode(*encoder, ndef);
+  if (ndef) {
+    sm.write(*encoder, prec);
+  }
+}
+
+template <unsigned N, unsigned M>
+void PreflateBaseModel::writeSubModel(const PreflateCorrectionSubModel<N, M>& sm, const bool isFullDef, const PreflateModelCodec& cc,
+                   const unsigned defVal, const uint8_t prec) {
+  if (isFullDef) {
+    return;
+  }
+  bool ndef = !sm.isDefault;
+  cc.nonDefaultValue.encode(*encoder, ndef);
+  if (ndef) {
+    sm.write(*encoder, prec);
+  }
+}
+
+void PreflateBlockPredictionModel::read(const PreflateStatisticsCounter::BlockPrediction& blockModel, const PreflateModelCodec& cc) {
+  blockType.build(blockModel.blockType, PreflateTokenBlock::DYNAMIC_HUFF, cc.MBprecision);
+  EOBMisprediction.build(blockModel.EOBMisprediction, 0, cc.MBprecision);
+  nonZeroPadding.build(blockModel.nonZeroPadding, 0, cc.MBprecisionP1);
+}
+void PreflateBlockPredictionModel::readFromStream(const PreflateModelCodec& cc) {
+  readSubModel(blockType, cc.blockFullDefault, cc, PreflateTokenBlock::DYNAMIC_HUFF, cc.MBprecision);
+  readSubModel(EOBMisprediction, cc.blockFullDefault, cc, 0, cc.MBprecision);
+  readSubModel(nonZeroPadding, cc.blockFullDefault, cc, 0, cc.MBprecisionP1);
+}
+void PreflateBlockPredictionModel::writeToStream(const PreflateModelCodec& cc) {
+  writeSubModel(blockType, cc.blockFullDefault, cc, PreflateTokenBlock::DYNAMIC_HUFF, cc.MBprecision);
+  writeSubModel(EOBMisprediction, cc.blockFullDefault, cc, 0, cc.MBprecision);
+  writeSubModel(nonZeroPadding, cc.blockFullDefault, cc, 0, cc.MBprecisionP1);
+}
+
+void PreflateTreeCodePredictionModel::read(const PreflateStatisticsCounter::TreeCodePrediction& treecodeModel, const PreflateModelCodec& cc) {
+  TCCountMisprediction.build(treecodeModel.TCCountMisprediction, 0, cc.MBprecision);
+  LCountMisprediction.build(treecodeModel.LCountMisprediction, 0, cc.MBprecision);
+  DCountMisprediction.build(treecodeModel.DCountMisprediction, 0, cc.MBprecision);
+  for (unsigned i = 0; i < 4; ++i) {
+    LDTypeMisprediction[i].build(treecodeModel.LDTypeMisprediction[i], 0);
+  }
+  LDTypeReplacementBase.build(treecodeModel.LDTypeReplacement, 0);
+  TCBitlengthCorrection.build(treecodeModel.TCBitlengthCorrection, 0);
+  LDBitlengthCorrection.build(treecodeModel.LDBitlengthCorrection, 0);
+  LDRepeatCountCorrection.build(treecodeModel.LDRepeatCountCorrection, 0);
+
+  deriveLDTypeReplacement();
+}
+void PreflateTreeCodePredictionModel::readFromStream(const PreflateModelCodec& cc) {
+  readSubModel(TCCountMisprediction, cc.treecodeFullDefault, cc, 0, cc.MBprecision);
+  readSubModel(LCountMisprediction, cc.treecodeFullDefault, cc, 0, cc.MBprecision);
+  readSubModel(DCountMisprediction, cc.treecodeFullDefault, cc, 0, cc.MBprecision);
+  for (unsigned i = 0; i < 4; ++i) {
+    readSubModel(LDTypeMisprediction[i], cc.treecodeFullDefault, cc, 0);
+  }
+  readSubModel(LDTypeReplacementBase, cc.treecodeFullDefault, cc, 0);
+  readSubModel(TCBitlengthCorrection, cc.treecodeFullDefault, cc, 0);
+  readSubModel(LDBitlengthCorrection, cc.treecodeFullDefault, cc, 0);
+  readSubModel(LDRepeatCountCorrection, cc.treecodeFullDefault, cc, 0);
+
+  deriveLDTypeReplacement();
+}
+void PreflateTreeCodePredictionModel::writeToStream(const PreflateModelCodec& cc) {
+  writeSubModel(TCCountMisprediction, cc.treecodeFullDefault, cc, 0, cc.MBprecision);
+  writeSubModel(LCountMisprediction, cc.treecodeFullDefault, cc, 0, cc.MBprecision);
+  writeSubModel(DCountMisprediction, cc.treecodeFullDefault, cc, 0, cc.MBprecision);
+  for (unsigned i = 0; i < 4; ++i) {
+    writeSubModel(LDTypeMisprediction[i], cc.treecodeFullDefault, cc, 0);
+  }
+  writeSubModel(LDTypeReplacementBase, cc.treecodeFullDefault, cc, 0);
+  writeSubModel(TCBitlengthCorrection, cc.treecodeFullDefault, cc, 0);
+  writeSubModel(LDBitlengthCorrection, cc.treecodeFullDefault, cc, 0);
+  writeSubModel(LDRepeatCountCorrection, cc.treecodeFullDefault, cc, 0);
+}
+void PreflateTreeCodePredictionModel::deriveLDTypeReplacement() {
+  unsigned arr[4], arr_mp[2], miss[4], hit[4], sumhit;
+  LDTypeReplacementBase.extract(arr);
+  for (unsigned i = 0; i < 4; ++i) {
+    LDTypeMisprediction[i].extract(arr_mp);
+    if (arr_mp[1] == 0) {
+      DerivedLDTypeReplacement[i].buildDefault(i);
+    } else {
+      if (arr_mp[0] == 0) {
+        arr_mp[1] = 1;
+      }
+      sumhit = 0;
+      for (unsigned j = 0; j < 4; ++j) {
+        hit[j] = arr[j] * arr_mp[0];
+        miss[j] = arr[j] * arr_mp[1];
+        sumhit += hit[j];
+      }
+      miss[i] = sumhit - hit[i];
+      // Avoid the sum of all entries to exceed 32bit
+      for (unsigned j = 0; j < 4; ++j) {
+        if (miss[j] > 0 && miss[j] < 16) {
+          miss[j] = 1;
+        } else {
+          miss[j] >>= 4;
+        }
+      }
+      DerivedLDTypeReplacement[i].build(miss, i);
+    }
+  }
+}
+
+
+void PreflateTokenPredictionModel::read(const PreflateStatisticsCounter::TokenPrediction& tokenModel, const PreflateModelCodec& cc) {
+  LITMisprediction.build(tokenModel.LITMisprediction, 0);
+  REFMisprediction.build(tokenModel.REFMisprediction, 0);
+  LENCorrection.build(tokenModel.LENCorrection, 0);
+  DISTAfterLenCorrection.build(tokenModel.DISTAfterLenCorrection, 0);
+  DISTOnlyCorrection.build(tokenModel.DISTOnlyCorrection, 0);
+  IrregularLen258Encoding.build(tokenModel.LEN258IrregularEncoding, 0);
+}
+void PreflateTokenPredictionModel::readFromStream(const PreflateModelCodec& cc) {
+  readSubModel(LITMisprediction, cc.tokenFullDefault, cc, 0);
+  readSubModel(REFMisprediction, cc.tokenFullDefault, cc, 0);
+  readSubModel(LENCorrection, cc.tokenFullDefault, cc, 0);
+  readSubModel(DISTAfterLenCorrection, cc.tokenFullDefault, cc, 0);
+  readSubModel(DISTOnlyCorrection, cc.tokenFullDefault, cc, 0);
+  readSubModel(IrregularLen258Encoding, cc.tokenFullDefault, cc, 0);
+}
+void PreflateTokenPredictionModel::writeToStream(const PreflateModelCodec& cc) {
+  writeSubModel(LITMisprediction, cc.tokenFullDefault, cc, 0);
+  writeSubModel(REFMisprediction, cc.tokenFullDefault, cc, 0);
+  writeSubModel(LENCorrection, cc.tokenFullDefault, cc, 0);
+  writeSubModel(DISTAfterLenCorrection, cc.tokenFullDefault, cc, 0);
+  writeSubModel(DISTOnlyCorrection, cc.tokenFullDefault, cc, 0);
+  writeSubModel(IrregularLen258Encoding, cc.tokenFullDefault, cc, 0);
+}
+
+
+PreflatePredictionModel::PreflatePredictionModel() {}
+PreflatePredictionModel::~PreflatePredictionModel() {}
+
+void PreflatePredictionModel::read(const PreflateStatisticsCounter& model, const PreflateModelCodec& cc) {
+  block.read(model.block, cc);
+  treecode.read(model.treecode, cc);
+  token.read(model.token, cc);
+}
+void PreflatePredictionModel::setEncoderStream(ArithmeticEncoder* codec) {
+  block.setEncoderStream(codec);
+  treecode.setEncoderStream(codec);
+  token.setEncoderStream(codec);
+}
+void PreflatePredictionModel::setDecoderStream(ArithmeticDecoder* codec) {
+  block.setDecoderStream(codec);
+  treecode.setDecoderStream(codec);
+  token.setDecoderStream(codec);
+}
+void PreflatePredictionModel::readFromStream(const PreflateModelCodec& cc) {
+  block.readFromStream(cc);
+  treecode.readFromStream(cc);
+  token.readFromStream(cc);
+}
+void PreflatePredictionModel::writeToStream(const PreflateModelCodec& cc) {
+  block.writeToStream(cc);
+  treecode.writeToStream(cc);
+  token.writeToStream(cc);
+}
+
+// ------------------------------------
+
+PreflateModelCodec::PreflateModelCodec() {}
+void PreflateModelCodec::initDefault() {
+  blockFullDefault = true;
+  treecodeFullDefault = true;
+  tokenFullDefault = true;
+  totalModels = 0;
+  defaultingModels = 0;
+
+  unsigned arr[2] = {1, 0};
+  nonDefaultValue.build(arr, 0);
+
+  MBprecision = 16;
+  MBprecisionP1 = 16;
+}
+
+void PreflateModelCodec::read(const PreflateStatisticsCounter& m) {
+  totalModels = 0;
+  defaultingModels = 0;
+  unsigned total_block = m.block.totalModels();
+  unsigned defaulting_block = m.block.checkDefaultModels();
+  blockFullDefault = total_block == defaulting_block;
+  if (!blockFullDefault) {
+    totalModels += total_block;
+    defaultingModels += defaulting_block;
+  }
+
+  unsigned total_tree = m.treecode.totalModels();
+  unsigned defaulting_tree = m.treecode.checkDefaultModels();
+  treecodeFullDefault = total_tree == defaulting_tree;
+  if (!treecodeFullDefault) {
+    totalModels += total_tree;
+    defaultingModels += defaulting_tree;
+  }
+
+  unsigned total_token = m.token.totalModels();
+  unsigned defaulting_token = m.token.checkDefaultModels();
+  tokenFullDefault = total_token == defaulting_token;
+  if (!tokenFullDefault) {
+    totalModels += total_token;
+    defaultingModels += defaulting_token;
+  }
+
+  if (totalModels > 0) {
+    unsigned arr[2] = {defaultingModels, totalModels - defaultingModels};
+    nonDefaultValue.build(arr, 0);
+  }
+  MBprecision   = 16;
+  MBprecisionP1 = 16;
+}
+
+void PreflateModelCodec::readFromStream(ArithmeticDecoder& codec) {
+  blockFullDefault = codec.decodeBits(1); 
+  treecodeFullDefault = codec.decodeBits(1);
+  tokenFullDefault = codec.decodeBits(1);
+  totalModels = 0;
+  if (!blockFullDefault) {
+    totalModels += PreflateStatisticsCounter::BlockPrediction::totalModels();
+  }
+  if (!treecodeFullDefault) {
+    totalModels += PreflateStatisticsCounter::TreeCodePrediction::totalModels();
+  }
+  if (!tokenFullDefault) {
+    totalModels += PreflateStatisticsCounter::TokenPrediction::totalModels();
+  }
+  defaultingModels = PreflateBaseModel::decodeValue(codec, bitLength(totalModels));
+
+  if (totalModels) {
+    unsigned arr[2] = {defaultingModels, totalModels - defaultingModels};
+    nonDefaultValue.build(arr, 0);
+  }
+  MBprecision = 16;
+  MBprecisionP1 = 16;
+}
+void PreflateModelCodec::writeToStream(ArithmeticEncoder& codec) {
+  codec.encodeBits(blockFullDefault, 1);
+  codec.encodeBits(treecodeFullDefault, 1);
+  codec.encodeBits(tokenFullDefault, 1);
+  codec.encodeBits(defaultingModels, bitLength(totalModels));
+}
+
+// ------------------------------------
+
+PreflatePredictionEncoder::PreflatePredictionEncoder() 
+  : storage(nullptr)
+  , bos(nullptr)
+  , encoder(nullptr)
+{}
+
+void PreflatePredictionEncoder::start(const PreflatePredictionModel& model_, const PreflateParameters& params_,
+                                      const unsigned modelId_) {
+  PreflatePredictionModel::operator =(model_);
+  params = params_;
+  modelid = modelId_;
+
+  storage = new MemStream;
+  bos = new BitOutputStream(*storage);
+  encoder = new ArithmeticEncoder(*bos);
+  setEncoderStream(encoder);
+}
+std::vector<uint8_t> PreflatePredictionEncoder::end() {
+  setEncoderStream(nullptr);
+  encoder->flush();
+  delete encoder;
+
+  bos->flush();
+  delete bos;
+
+  std::vector<unsigned char> result = storage->extractData();
+  delete storage;
+  return result;
+}
+
+PreflatePredictionDecoder::PreflatePredictionDecoder()
+  : storage(nullptr)
+  , bis(nullptr)
+  , decoder(nullptr) {}
+
+void PreflatePredictionDecoder::start(const PreflatePredictionModel& model_, const PreflateParameters& params_,
+                                      const std::vector<uint8_t>& storage_, size_t off0, size_t size) {
+  PreflatePredictionModel::operator =(model_);
+  params = params_;
+  storage = new MemStream(storage_, off0, size);
+  bis = new BitInputStream(*storage);
+  decoder = new ArithmeticDecoder(*bis);
+  setDecoderStream(decoder);
+}
+void PreflatePredictionDecoder::end() {
+  setDecoderStream(nullptr);
+  delete decoder;
+  delete bis;
+  delete storage;
+  decoder = nullptr;
+  bis = nullptr;
+  storage = nullptr;
+}
+
+// ------------------------------------
+
+PreflateMetaEncoder::PreflateMetaEncoder()
+  : inError(false) {
+}
+PreflateMetaEncoder::~PreflateMetaEncoder() {}
+
+unsigned PreflateMetaEncoder::addModel(const PreflateStatisticsCounter& counter, const PreflateParameters& params) {
+  unsigned modelId = modelList.size();
+  modelType m;
+  m.counter = counter;
+  m.mcodec.read(counter);
+  m.model.read(counter, m.mcodec);
+  m.params = params;
+  m.writtenId = 0;
+  modelList.push_back(m);
+  return modelId;
+}
+
+bool PreflateMetaEncoder::beginMetaBlockWithModel(PreflatePredictionEncoder& encoder, const unsigned modelId) {
+  if (modelId >= modelList.size()) {
+    return false;
+  }
+  encoder.start(modelList[modelId].model, modelList[modelId].params, modelId);
+  return true;
+}
+bool PreflateMetaEncoder::endMetaBlock(PreflatePredictionEncoder& encoder, const size_t uncompressed) {
+  if (encoder.modelId() >= modelList.size()) {
+    return false;
+  }
+  metaBlockInfo m;
+  std::vector<uint8_t> result = encoder.end();
+  m.modelId = encoder.modelId();
+  m.reconSize = result.size();
+  m.uncompressedSize = uncompressed;
+  blockList.push_back(m);
+  reconData.insert(reconData.end(), result.begin(), result.end());
+  return true;
+}
+std::vector<unsigned char> PreflateMetaEncoder::finish() {
+  MemStream mem;
+  BitOutputStream bos(mem);
+  bos.put(0, 1); // no extension used
+  bos.put(blockList.size() > 1, 1); // 1 or more meta blocks
+  if (blockList.size() > 1) {
+    bos.putVLI(blockList.size() - 2);
+  }
+  enum Mode {
+    CREATE_NEW_MODEL /*, REUSE_LAST_MODEL, REUSE_PREVIOUS_MODEL*/
+  };
+  for (unsigned i = 0, n = blockList.size(); i < n; ++i) {
+    const metaBlockInfo& mb = blockList[i];
+    Mode mode = CREATE_NEW_MODEL;
+    
+    if (i > 0) {
+      bos.put(3, 2); // create new model
+    }
+
+    switch (mode) {
+    case CREATE_NEW_MODEL:
+    {
+      modelType& mt = modelList[mb.modelId];
+      bool perfectZLIB = mt.mcodec.blockFullDefault && mt.mcodec.treecodeFullDefault && mt.mcodec.tokenFullDefault
+        && mt.params.zlibCompatible;
+      bos.put(!perfectZLIB, 1); // perfect zlib model
+      bos.put(mt.params.compLevel, 4);
+      bos.put(mt.params.memLevel, 4);
+      bos.put(mt.params.windowBits - 8, 3);
+      if (!perfectZLIB) {
+        bos.put(mt.params.zlibCompatible, 1);
+        if (!mt.params.zlibCompatible) {
+          bos.put(mt.params.veryFarMatchesDetected, 1);
+          bos.put(mt.params.matchesToStartDetected, 1);
+        }
+        bos.put(mt.params.log2OfMaxChainDepthM1, 4);
+        MemStream tmp_data;
+        {
+          BitOutputStream tmp_bos(tmp_data);
+          ArithmeticEncoder tmp_codec(tmp_bos);
+          mt.mcodec.writeToStream(tmp_codec);
+          mt.model.setEncoderStream(&tmp_codec);
+          mt.model.writeToStream(mt.mcodec);
+          mt.model.setEncoderStream(nullptr);
+          tmp_codec.flush();
+          tmp_bos.flush();
+        }
+        std::vector<uint8_t> tmp_res = tmp_data.extractData();
+        // write length (vli) and model data
+        bos.putVLI(tmp_res.size());
+        bos.putBytes(tmp_res.data(), tmp_res.size());
+      }
+      break;
+    }
+    }
+    // for the last block, the size of the reconstruction data and processed uncompressed data
+    // is implicitly going to end of stream
+    // -------------------
+    if (i != n - 1) {
+      bos.putVLI(mb.reconSize);
+      bos.putVLI(mb.uncompressedSize);
+    }
+  }
+  bos.flush();
+  std::vector<uint8_t> result = mem.extractData();
+  result.insert(result.end(), reconData.begin(), reconData.end());
+  return result;
+}
+
+PreflateMetaDecoder::PreflateMetaDecoder(const std::vector<uint8_t>& reconData_, const uint64_t uncompressedSize_)
+  : inError(false)
+  , reconData(reconData_)
+  , uncompressedSize(uncompressedSize_) {
+  if (reconData.size() == 0) {
+    inError = true;
+    return;
+  }
+  MemStream mem(reconData);
+  BitInputStream bis(mem);
+  bool extension = bis.get(1);
+  if (extension) {
+    inError = true;
+    return;
+  }
+  bool singleBlock = bis.get(1) == 0;
+  size_t blockCount;
+  if (singleBlock) {
+    blockCount = 1;
+  } else {
+    blockCount = 2 + bis.getVLI();
+  }
+  enum Mode {
+    CREATE_NEW_MODEL /*, REUSE_LAST_MODEL, REUSE_PREVIOUS_MODEL*/
+  };
+  for (size_t i = 0; i < blockCount; ++i) {
+    metaBlockInfo mb;
+    Mode mode = CREATE_NEW_MODEL;
+
+    if (i > 0) {
+      if (bis.get(2) != 3) { // must create new model for the moment
+        inError = true;
+        return;
+      }
+    }
+
+    switch (mode) {
+    case CREATE_NEW_MODEL:
+    {
+      modelType mt;
+      memset(&mt, 0, sizeof(mt));
+      bool perfectZLIB = bis.get(1) == 0;
+      mt.params.compLevel = bis.get(4);
+      mt.params.memLevel = bis.get(4);
+      mt.params.windowBits = bis.get(3) + 8;
+      if (perfectZLIB) {
+        mt.params.zlibCompatible = true;
+        mt.mcodec.blockFullDefault = true;
+        mt.mcodec.treecodeFullDefault = true;
+        mt.mcodec.tokenFullDefault = true;
+        mt.model.readFromStream(mt.mcodec); // initialize with default model
+      } else {
+        mt.params.zlibCompatible = bis.get(1);
+        if (!mt.params.zlibCompatible) {
+          mt.params.veryFarMatchesDetected = bis.get(1);
+          mt.params.matchesToStartDetected = bis.get(1);
+        }
+        mt.params.log2OfMaxChainDepthM1 = bis.get(4);
+        // read length (vli) and model data
+        size_t res_size = bis.getVLI();
+        // interpret model data
+        {
+          MemStream tmp_mem;
+          bis.copyBytesTo(tmp_mem, res_size);
+          tmp_mem.seek(0);
+          BitInputStream tmp_bis(tmp_mem);
+          ArithmeticDecoder tmp_codec(tmp_bis);
+          mt.mcodec.readFromStream(tmp_codec);
+          mt.model.setDecoderStream(&tmp_codec);
+          mt.model.readFromStream(mt.mcodec);
+          mt.model.setDecoderStream(nullptr);
+        }
+      }
+      mb.modelId = modelList.size();
+      modelList.push_back(mt);
+      break;
+    }
+    }
+    // for the last block, the size of the reconstruction data and processed uncompressed data
+    // is implicitly going to end of stream
+    // -------------------
+    if (i != blockCount - 1) {
+      mb.reconSize = bis.getVLI();
+      mb.uncompressedSize = bis.getVLI();
+    }
+    blockList.push_back(mb);
+  }
+  bis.skipToByte();
+
+  size_t reconStart = bis.bitPos() >> 3;
+  uint64_t uncStart = 0;
+  for (size_t i = 0; i < blockCount; ++i) {
+    blockList[i].reconStartOfs = reconStart;
+    blockList[i].uncompressedStartOfs = uncStart;
+    if (i != blockCount - 1) {
+      reconStart += blockList[i].reconSize;
+      uncStart += blockList[i].uncompressedSize;
+      if (reconStart > reconData.size() || uncStart > uncompressedSize) {
+        inError = true;
+        return;
+      }
+    } else {
+      blockList[i].reconSize = reconData.size() - blockList[i].reconStartOfs;
+      blockList[i].uncompressedSize = uncompressedSize - blockList[i].uncompressedStartOfs;
+    }
+  }
+}
+PreflateMetaDecoder::~PreflateMetaDecoder() {}
+
+bool PreflateMetaDecoder::beginMetaBlock(PreflatePredictionDecoder& decoder, PreflateParameters& params, const size_t index) {
+  if (index >= blockList.size()) {
+    return false;
+  }
+  const auto& mb = blockList[index];
+  if (mb.modelId >= modelList.size()) {
+    return false;
+  }
+  const auto& model = modelList[mb.modelId];
+  params = model.params;
+  decoder.start(model.model, model.params, reconData, mb.reconStartOfs, mb.reconSize);
+  return true;
+}
+bool PreflateMetaDecoder::endMetaBlock(PreflatePredictionDecoder& decoder) {
+  decoder.end();
+  return true;
+}
+void PreflateMetaDecoder::finish() {}
--- a/contrib/preflate/preflate_statistical_codec.h
+++ b/contrib/preflate/preflate_statistical_codec.h
@@ -0,0 +1,628 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef PREFLATE_STATISTICAL_CODEC_H
+#define PREFLATE_STATISTICAL_CODEC_H
+
+#include <vector>
+#include "support/arithmetic_coder.h"
+#include "support/bit_helper.h"
+#include "support/bitstream.h"
+#include "support/memstream.h"
+#include "preflate_parameter_estimator.h"
+#include "preflate_statistical_model.h"
+
+template <unsigned N>
+struct PreflateSubModel {
+  static const unsigned L = N;
+  PreflateSubModel() {}
+
+  void build(const unsigned(&arr)[N], const unsigned defval, const uint8_t prec = 16) {
+    build_impl(arr, defval, prec);
+  }
+  void buildDefault(const unsigned defval);
+  void extract(unsigned(&arr)[N]) {
+    for (unsigned i = 0; i < N; ++i) {
+      arr[i] = bounds[rids[i] + 1] - bounds[rids[i]];
+    }
+  }
+  void read(ArithmeticDecoder&, const uint8_t);
+  void write(ArithmeticEncoder&, const uint8_t) const;
+  void encode(ArithmeticEncoder& codec, const unsigned item) const {
+    if (!isFixed) {
+      size_t idx = rids[item];
+      codec.encodeShiftScale(scaleDownBits, scaledDownBounds[idx], scaledDownBounds[idx + 1]);
+    }
+  }
+  unsigned decode(ArithmeticDecoder& codec) const {
+    if (isFixed) {
+      return ids[N - 1];
+    }
+    unsigned val = codec.decodeShiftScale(scaleDownBits, scaledDownBounds, N);
+    return ids[val];
+  }
+  bool isEqualTo(const PreflateSubModel<N>& m) const;
+
+  unsigned bounds[N + 1];
+  unsigned scaledDownBounds[N + 1];
+  unsigned short ids[N + 1], rids[N + 1];
+  uint8_t scaleDownBits;
+  bool isDefault, isFixed;
+
+private:
+  void build_impl(const unsigned* arr, const unsigned defval, const uint8_t prec);
+  void build_scale_down();
+  template <unsigned NEG, unsigned POS>
+  friend struct PreflateCorrectionSubModel;
+};
+
+template <>
+struct PreflateSubModel<0u> {
+  static const unsigned L = 0u;
+  PreflateSubModel() {}
+
+//  void build(const unsigned(&arr)[1], const unsigned defval, const uint8_t prec = 16) {}
+  void buildDefault(const unsigned defval) {}
+  void read(ArithmeticDecoder&, const uint8_t) {}
+  void write(ArithmeticEncoder&, const uint8_t) const {}
+  void encode(ArithmeticEncoder& codec, const unsigned item) const {}
+  unsigned decode(ArithmeticDecoder& codec) const { return 0; }
+  bool isEqualTo(const PreflateSubModel<0u>& m) const { return true; }
+
+  enum { isDefault = 1, isFixed = 1 };
+
+private:
+  void build_impl(const unsigned* arr, const unsigned defval, const uint8_t prec) {}
+  template <unsigned NEG, unsigned POS>
+  friend struct PreflateCorrectionSubModel;
+};
+
+template <unsigned NEG, unsigned POS>
+struct PreflateCorrectionSubModel {
+  static const unsigned LNEG = NEG;
+  static const unsigned LPOS = POS;
+  PreflateCorrectionSubModel() {}
+  void build(const unsigned(&arr)[NEG + 1 + POS], const int defval, const uint8_t prec = 16) {
+    build_impl(arr, defval, prec);
+  }
+  void buildDefault(const unsigned defval);
+  void read(ArithmeticDecoder&, const uint8_t);
+  void write(ArithmeticEncoder&, const uint8_t) const;
+  void encode(ArithmeticEncoder& codec, const unsigned actvalue,
+              const unsigned refvalue,
+              const unsigned minvalue,
+              const unsigned maxvalue) {
+    int diff = actvalue - refvalue;
+    if (diff == 0) {
+      sign.encode(codec, 0);
+      return;
+    }
+    if (diff > 0) {
+      sign.encode(codec, 1);
+      if (diff >= (int)POS) {
+        pos.encode(codec, POS - 1);
+        codec.encodeBits(diff - POS, bitLength(maxvalue - POS - refvalue));
+      } else {
+        pos.encode(codec, diff - 1);
+      }
+    } else {
+      sign.encode(codec, 2);
+      if (-diff >= (int)NEG) {
+        neg.encode(codec, NEG - 1);
+        codec.encodeBits(-diff - NEG, bitLength(refvalue - NEG - minvalue));
+      } else {
+        neg.encode(codec, -diff - 1);
+      }
+    }
+  }
+  unsigned decode(ArithmeticDecoder& codec,
+             const unsigned refvalue,
+             const unsigned minvalue,
+             const unsigned maxvalue) {
+    unsigned s = sign.decode(codec);
+    if (s == 0) {
+      return refvalue;
+    }
+    if (s == 1) {
+      int diff = pos.decode(codec);
+      if (diff >= (int)(POS - 1)) {
+        return refvalue + codec.decodeBits(bitLength(maxvalue - POS - refvalue)) + POS;
+      } else {
+        return refvalue + diff + 1;
+      }
+    } else {
+      int diff = neg.decode(codec);
+      if (diff >= (int)(NEG - 1)) {
+        return refvalue - codec.decodeBits(bitLength(refvalue - NEG - minvalue)) - NEG;
+      } else {
+        return refvalue - diff - 1;
+      }
+    }
+  }
+  bool isEqualTo(const PreflateCorrectionSubModel<NEG, POS>& m) const;
+
+  PreflateSubModel<3> sign;
+  PreflateSubModel<POS> pos;
+  PreflateSubModel<NEG> neg;
+  bool isDefault;
+
+private:
+  void build_impl(const unsigned* arr, const int defval, const uint8_t prec);
+};
+
+struct PreflateModelCodec {
+  PreflateSubModel<2> nonDefaultValue;
+  uint8_t MBprecision;
+  uint8_t MBprecisionP1;
+  bool blockFullDefault;
+  bool treecodeFullDefault;
+  bool tokenFullDefault;
+  unsigned totalModels, defaultingModels;
+
+  PreflateModelCodec();
+  void initDefault();
+  void read(const PreflateStatisticsCounter&);
+  void readFromStream(ArithmeticDecoder&);
+  void writeToStream(ArithmeticEncoder&);
+};
+
+struct PreflateBaseModel {
+public:
+  PreflateBaseModel();
+  void setEncoderStream(ArithmeticEncoder*);
+  void setDecoderStream(ArithmeticDecoder*);
+
+  static void encodeValue(ArithmeticEncoder& codec, const unsigned value, const unsigned maxBits) {
+#ifdef _DEBUG
+    _ASSERT(value < (1 << maxBits));
+#endif
+    return codec.encodeBits(value, maxBits);
+  }
+  void encodeValue(const unsigned value, const unsigned maxBits) {
+    encodeValue(*encoder, value, maxBits);
+  }
+  static unsigned decodeValue(ArithmeticDecoder& codec, const unsigned maxBits) {
+    return codec.decodeBits(maxBits);
+  }
+  unsigned decodeValue(const unsigned maxBits) {
+    return decodeValue(*decoder, maxBits);
+  }
+
+protected:
+  template <unsigned N>
+  void readSubModel(PreflateSubModel<N>& sm, const bool isFullDef, const PreflateModelCodec& cc,
+                    const unsigned defVal, const uint8_t prec = 16);
+
+  template <unsigned N, unsigned M>
+  void readSubModel(PreflateCorrectionSubModel<N, M>& sm, const bool isFullDef, const PreflateModelCodec& cc,
+                    const unsigned defVal, const uint8_t prec = 16);
+
+  template <unsigned N>
+  void writeSubModel(const PreflateSubModel<N>& sm, const bool isFullDef, const PreflateModelCodec& cc,
+                     const unsigned defVal, const uint8_t prec = 16);
+
+  template <unsigned N, unsigned M>
+  void writeSubModel(const PreflateCorrectionSubModel<N, M>& sm, const bool isFullDef, const PreflateModelCodec& cc,
+                     const unsigned defVal, const uint8_t prec = 16);
+
+  ArithmeticEncoder* encoder;
+  ArithmeticDecoder* decoder;
+};
+
+struct PreflateBlockPredictionModel : public PreflateBaseModel {
+public:
+  void read(const PreflateStatisticsCounter::BlockPrediction&, const PreflateModelCodec&);
+  void readFromStream(const PreflateModelCodec&);
+  void writeToStream(const PreflateModelCodec&);
+
+  unsigned decodeBlockType() {
+    return blockType.decode(*decoder);
+  }
+  bool decodeEOBMisprediction() {
+    return EOBMisprediction.decode(*decoder);
+  }
+  bool decodeNonZeroPadding() {
+    return nonZeroPadding.decode(*decoder);
+  }
+
+  void encodeBlockType(const unsigned type) {
+    blockType.encode(*encoder, type);
+  }
+  void encodeEOBMisprediction(const bool misprediction) {
+    EOBMisprediction.encode(*encoder, misprediction);
+  }
+  void encodeNonZeroPadding(const bool nonzeropadding) {
+    nonZeroPadding.encode(*encoder, nonzeropadding);
+  }
+
+  bool isEqualTo(const PreflateBlockPredictionModel& m) const;
+
+private:
+  PreflateSubModel<3> blockType;
+  PreflateSubModel<2> EOBMisprediction;
+  PreflateSubModel<2> nonZeroPadding;
+  unsigned precision;
+};
+
+struct PreflateTreeCodePredictionModel : public PreflateBaseModel  {
+public:
+  void read(const PreflateStatisticsCounter::TreeCodePrediction&, const PreflateModelCodec& cc);
+  void readFromStream(const PreflateModelCodec& cc);
+  void writeToStream(const PreflateModelCodec& cc);
+
+  bool decodeTreeCodeCountMisprediction() {
+    return TCCountMisprediction.decode(*decoder);
+  }
+  bool decodeLiteralCountMisprediction() {
+    return LCountMisprediction.decode(*decoder);
+  }
+  bool decodeDistanceCountMisprediction() {
+    return DCountMisprediction.decode(*decoder);
+  }
+  int decodeTreeCodeBitLengthCorrection(unsigned predval) {
+    return TCBitlengthCorrection.decode(*decoder, predval, 0, 7);
+  }
+  unsigned decodeLDTypeCorrection(unsigned predtype) {
+    return DerivedLDTypeReplacement[predtype].decode(*decoder);
+  }
+  unsigned decodeRepeatCountCorrection(const unsigned predval, const unsigned ldtype) {
+    static const uint8_t minVal[4] = {0, 3, 3, 11};
+    static const uint8_t lenVal[4] = {0, 3, 7, 127};
+    return LDRepeatCountCorrection.decode(*decoder, predval, minVal[ldtype], minVal[ldtype] + lenVal[ldtype]);
+  }
+  int decodeLDBitLengthCorrection(unsigned predval) {
+    return LDBitlengthCorrection.decode(*decoder, predval, 0, 15);
+  }
+
+  void encodeTreeCodeCountMisprediction(const bool misprediction) {
+    TCCountMisprediction.encode(*encoder, misprediction);
+  }
+  void encodeLiteralCountMisprediction(const bool misprediction) {
+    LCountMisprediction.encode(*encoder, misprediction);
+  }
+  void encodeDistanceCountMisprediction(const bool misprediction) {
+    DCountMisprediction.encode(*encoder, misprediction);
+  }
+  void encodeTreeCodeBitLengthCorrection(const unsigned predval, const unsigned actval) {
+    TCBitlengthCorrection.encode(*encoder, actval, predval, 0, 7);
+  }
+  void encodeLDTypeCorrection(const unsigned predval, const unsigned actval) {
+    DerivedLDTypeReplacement[predval].encode(*encoder, actval);
+  }
+  void encodeRepeatCountCorrection(const unsigned predval, const unsigned actval, unsigned ldtype) {
+    static const uint8_t minVal[4] = {0, 3, 3, 11};
+    static const uint8_t lenVal[4] = {0, 3, 7, 127};
+    LDRepeatCountCorrection.encode(*encoder, actval, predval, minVal[ldtype], minVal[ldtype] + lenVal[ldtype]);
+  }
+  void encodeLDBitLengthCorrection(const unsigned predval, const unsigned actval) {
+    LDBitlengthCorrection.encode(*encoder, actval, predval, 0, 15);
+  }
+
+  bool isEqualTo(const PreflateTreeCodePredictionModel& m) const;
+
+private:
+  void deriveLDTypeReplacement();
+
+  PreflateSubModel<2> TCCountMisprediction;
+  PreflateSubModel<2> LCountMisprediction;
+  PreflateSubModel<2> DCountMisprediction;
+  PreflateSubModel<2> LDTypeMisprediction[4];
+  PreflateSubModel<4> LDTypeReplacementBase;
+  PreflateCorrectionSubModel<1, 1> LDRepeatCountCorrection;
+  PreflateCorrectionSubModel<3, 3> TCBitlengthCorrection;
+  PreflateCorrectionSubModel<4, 4> LDBitlengthCorrection;
+  PreflateSubModel<4> DerivedLDTypeReplacement[4];
+};
+
+struct PreflateTokenPredictionModel : public PreflateBaseModel {
+public:
+  void read(const PreflateStatisticsCounter::TokenPrediction&, const PreflateModelCodec& cc);
+  void readFromStream(const PreflateModelCodec& cc);
+  void writeToStream(const PreflateModelCodec& cc);
+
+  bool decodeLiteralPredictionWrong() {
+    return LITMisprediction.decode(*decoder);
+  }
+  bool decodeReferencePredictionWrong() {
+    return REFMisprediction.decode(*decoder);
+  }
+  int decodeLenCorrection(const unsigned predval) {
+    return LENCorrection.decode(*decoder, predval, 3, 258);
+  }
+  unsigned decodeDistOnlyCorrection() {
+    return DISTOnlyCorrection.decode(*decoder, 0, 0, 32767);
+  }
+  unsigned decodeDistAfterLenCorrection() {
+    return DISTAfterLenCorrection.decode(*decoder, 0, 0, 32767);
+  }
+  bool decodeIrregularLen258() {
+    return IrregularLen258Encoding.decode(*decoder);
+  }
+
+  void encodeLiteralPredictionWrong(const bool misprediction) {
+    LITMisprediction.encode(*encoder, misprediction);
+  }
+  void encodeReferencePredictionWrong(const bool misprediction) {
+    REFMisprediction.encode(*encoder, misprediction);
+  }
+  void encodeLenCorrection(const unsigned predval, const unsigned actval) {
+    LENCorrection.encode(*encoder, actval, predval, 3, 258);
+  }
+  void encodeDistOnlyCorrection(const unsigned hops) {
+    DISTOnlyCorrection.encode(*encoder, hops, 0, 0, 32767);
+  }
+  void encodeDistAfterLenCorrection(const unsigned hops) {
+    DISTAfterLenCorrection.encode(*encoder, hops, 0, 0, 32767);
+  }
+  void encodeIrregularLen258(const bool irregular) {
+    IrregularLen258Encoding.encode(*encoder, irregular);
+  }
+
+  bool isEqualTo(const PreflateTokenPredictionModel& m) const;
+
+private:
+  PreflateSubModel<2> LITMisprediction;
+  PreflateSubModel<2> REFMisprediction;
+  PreflateCorrectionSubModel<6, 6> LENCorrection;
+  PreflateCorrectionSubModel<0, 3> DISTAfterLenCorrection;
+  PreflateCorrectionSubModel<0, 3> DISTOnlyCorrection;
+  PreflateSubModel<2> IrregularLen258Encoding;
+};
+
+struct PreflatePredictionModel {
+  PreflatePredictionModel();
+  ~PreflatePredictionModel();
+
+  void read(const PreflateStatisticsCounter& model, const PreflateModelCodec& cc);
+  void setEncoderStream(ArithmeticEncoder* codec);
+  void setDecoderStream(ArithmeticDecoder* codec);
+  void readFromStream(const PreflateModelCodec& cc);
+  void writeToStream(const PreflateModelCodec& cc);
+
+  bool isEqualTo(const PreflatePredictionModel& m) const;
+
+protected:
+  // Blocks
+  PreflateBlockPredictionModel block;
+  // Tree codes
+  PreflateTreeCodePredictionModel treecode;
+  // Tokens
+  PreflateTokenPredictionModel token;
+};
+
+struct PreflatePredictionEncoder : public PreflatePredictionModel {
+  PreflatePredictionEncoder();
+
+  void start(const PreflatePredictionModel&, const PreflateParameters&, const unsigned modelId);
+  std::vector<uint8_t> end();
+
+  void encodeValue(const unsigned value, const unsigned maxBits) {
+    encoder->encodeBits(value, maxBits);
+  }
+
+  // Block
+  void encodeBlockType(const unsigned type) {
+    block.encodeBlockType(type);
+  }
+  void encodeEOBMisprediction(const bool misprediction) {
+    block.encodeEOBMisprediction(misprediction);
+  }
+  void encodeNonZeroPadding(const bool nonzeropadding) {
+    block.encodeNonZeroPadding(nonzeropadding);
+  }
+  // Tree codes
+  void encodeTreeCodeCountMisprediction(const bool misprediction) {
+    treecode.encodeTreeCodeCountMisprediction(misprediction);
+  }
+  void encodeLiteralCountMisprediction(const bool misprediction) {
+    treecode.encodeLiteralCountMisprediction(misprediction);
+  }
+  void encodeDistanceCountMisprediction(const bool misprediction) {
+    treecode.encodeDistanceCountMisprediction(misprediction);
+  }
+  void encodeTreeCodeBitLengthCorrection(const unsigned predval, const unsigned actval) {
+    treecode.encodeTreeCodeBitLengthCorrection(predval, actval);
+  }
+  void encodeLDTypeCorrection(const unsigned predval, const unsigned actval) {
+    treecode.encodeLDTypeCorrection(predval, actval);
+  }
+  void encodeRepeatCountCorrection(const unsigned predval, const unsigned actval, unsigned ldtype) {
+    treecode.encodeRepeatCountCorrection(predval, actval, ldtype);
+  }
+  void encodeLDBitLengthCorrection(const unsigned predval, const unsigned actval) {
+    treecode.encodeLDBitLengthCorrection(predval, actval);
+  }
+  // Token
+  void encodeLiteralPredictionWrong(const bool misprediction) {
+    token.encodeLiteralPredictionWrong(misprediction);
+  }
+  void encodeReferencePredictionWrong(const bool misprediction) {
+    token.encodeReferencePredictionWrong(misprediction);
+  }
+  void encodeLenCorrection(const unsigned predval, const unsigned actval) {
+    token.encodeLenCorrection(predval, actval);
+  }
+  void encodeDistOnlyCorrection(const unsigned hops) {
+    token.encodeDistOnlyCorrection(hops);
+  }
+  void encodeDistAfterLenCorrection(const unsigned hops) {
+    token.encodeDistAfterLenCorrection(hops);
+  }
+  void encodeIrregularLen258(const bool irregular) {
+    token.encodeIrregularLen258(irregular);
+  }
+
+  const PreflateParameters& parameters() const {
+    return params;
+  }
+
+  unsigned modelId() const {
+    return modelid;
+  }
+
+private:
+  PreflateParameters  params;
+  unsigned modelid;
+  MemStream* storage;
+  BitOutputStream* bos;
+  ArithmeticEncoder* encoder;
+};
+
+struct PreflatePredictionDecoder : public PreflatePredictionModel {
+  PreflatePredictionDecoder();
+  void start(const PreflatePredictionModel&, const PreflateParameters&, 
+             const std::vector<uint8_t>&, size_t off0, size_t size);
+  void end();
+
+  unsigned decodeValue(const unsigned maxBits) {
+    return decoder->decodeBits(maxBits);
+  }
+  // Block
+  unsigned decodeBlockType() {
+    return block.decodeBlockType();
+  }
+  bool decodeEOBMisprediction() {
+    return block.decodeEOBMisprediction();
+  }
+  bool decodeNonZeroPadding() {
+    return block.decodeNonZeroPadding();
+  }
+  // Tree codes
+  bool decodeTreeCodeCountMisprediction() {
+    return treecode.decodeTreeCodeCountMisprediction();
+  }
+  bool decodeLiteralCountMisprediction() {
+    return treecode.decodeLiteralCountMisprediction();
+  }
+  bool decodeDistanceCountMisprediction() {
+    return treecode.decodeDistanceCountMisprediction();
+  }
+  int decodeTreeCodeBitLengthCorrection(unsigned predval) {
+    return treecode.decodeTreeCodeBitLengthCorrection(predval);
+  }
+  unsigned decodeLDTypeCorrection(unsigned predtype) {
+    return treecode.decodeLDTypeCorrection(predtype);
+  }
+  unsigned decodeRepeatCountCorrection(const unsigned predval, const unsigned ldtype) {
+    return treecode.decodeRepeatCountCorrection(predval, ldtype);
+  }
+  unsigned decodeLDBitLengthCorrection(unsigned predval) {
+    return treecode.decodeLDBitLengthCorrection(predval);
+  }
+  // Token
+  bool decodeLiteralPredictionWrong() {
+    return token.decodeLiteralPredictionWrong();
+  }
+  bool decodeReferencePredictionWrong() {
+    return token.decodeReferencePredictionWrong();
+  }
+  int decodeLenCorrection(const unsigned predval) {
+    return token.decodeLenCorrection(predval);
+  }
+  unsigned decodeDistOnlyCorrection() {
+    return token.decodeDistOnlyCorrection();
+  }
+  unsigned decodeDistAfterLenCorrection() {
+    return token.decodeDistAfterLenCorrection();
+  }
+  bool decodeIrregularLen258() {
+    return token.decodeIrregularLen258();
+  }
+
+private:
+  PreflateParameters  params;
+  MemStream* storage;
+  BitInputStream* bis;
+  ArithmeticDecoder* decoder;
+};
+
+struct PreflateMetaEncoder {
+  PreflateMetaEncoder();
+  ~PreflateMetaEncoder();
+
+  bool error() const {
+    return inError;
+  }
+  unsigned addModel(const PreflateStatisticsCounter&, const PreflateParameters&);
+
+  bool beginMetaBlockWithModel(PreflatePredictionEncoder&, const unsigned modelId);
+  bool endMetaBlock(PreflatePredictionEncoder&, const size_t uncompressed);
+  std::vector<unsigned char> finish();
+
+private:
+  struct modelType {
+    unsigned writtenId;
+    PreflateStatisticsCounter counter;
+    PreflatePredictionModel model;
+    PreflateParameters params;
+    PreflateModelCodec mcodec;
+  };
+  struct metaBlockInfo {
+    unsigned modelId;
+    size_t reconSize;
+    size_t uncompressedSize;
+  };
+
+  bool inError;
+  std::vector<modelType> modelList;
+  std::vector<metaBlockInfo> blockList;
+  std::vector<uint8_t> reconData;
+};
+
+struct PreflateMetaDecoder {
+  PreflateMetaDecoder(const std::vector<uint8_t>& reconData, const uint64_t uncompressedSize);
+  ~PreflateMetaDecoder();
+
+  bool error() const {
+    return inError;
+  }
+  size_t metaBlockCount() const {
+    return blockList.size();
+  }
+  uint64_t metaBlockUncompressedStartOfs(const size_t metaBlockId) const {
+    return blockList[metaBlockId].uncompressedStartOfs;
+  }
+  size_t metaBlockUncompressedSize(const size_t metaBlockId) const {
+    return blockList[metaBlockId].uncompressedSize;
+  }
+
+  bool beginMetaBlock(PreflatePredictionDecoder&, PreflateParameters&, const size_t index);
+  bool endMetaBlock(PreflatePredictionDecoder&);
+  void finish();
+
+private:
+  struct modelType {
+    PreflatePredictionModel model;
+    PreflateParameters params;
+    PreflateModelCodec mcodec;
+  };
+  struct metaBlockInfo {
+    unsigned modelId;
+    size_t reconStartOfs;
+    size_t reconSize;
+    uint64_t uncompressedStartOfs;
+    uint64_t uncompressedSize;
+  };
+
+  bool inError;
+
+  const std::vector<uint8_t>& reconData;
+  const uint64_t uncompressedSize;
+  std::vector<modelType> modelList;
+  std::vector<metaBlockInfo> blockList;
+};
+
+bool isEqual(const PreflatePredictionModel&, const PreflatePredictionModel&);
+
+#endif /* PREFLATE_STATISTICAL_CODEC_H */
--- a/contrib/preflate/preflate_statistical_debug.cpp
+++ b/contrib/preflate/preflate_statistical_debug.cpp
@@ -0,0 +1,187 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include <algorithm>
+#include <string.h>
+#include "preflate_statistical_codec.h"
+#include "preflate_statistical_model.h"
+#include "support/array_helper.h"
+#include "support/bit_helper.h"
+#include <stdint.h>
+
+template <unsigned N>
+bool PreflateSubModel<N>::isEqualTo(const PreflateSubModel<N>& m) const {
+  if (N == 0 || m.bounds[N] == 0) {
+    return true;
+  }
+  for (unsigned i = 0; i < N; ++i) {
+    if (bounds[i] != m.bounds[i]) {
+      return false;
+    }
+    if (bounds[i + 1] > 0 && ids[i] != m.ids[i]) {
+      return false;
+    }
+  }
+  if (bounds[N] != m.bounds[N]) {
+    return false;
+  }
+  return true;
+}
+
+template <unsigned NEG, unsigned POS>
+bool PreflateCorrectionSubModel<NEG, POS>::isEqualTo(const PreflateCorrectionSubModel<NEG, POS>& m) const {
+  return sign.isEqualTo(m.sign)
+    && pos.isEqualTo(m.pos)
+    && neg.isEqualTo(m.neg);
+}
+
+
+bool PreflateBlockPredictionModel::isEqualTo(const PreflateBlockPredictionModel& m) const {
+  return blockType.isEqualTo(m.blockType)
+    && EOBMisprediction.isEqualTo(m.EOBMisprediction)
+    && nonZeroPadding.isEqualTo(m.nonZeroPadding);
+}
+bool PreflateTreeCodePredictionModel::isEqualTo(const PreflateTreeCodePredictionModel& m) const {
+  return TCBitlengthCorrection.isEqualTo(m.TCBitlengthCorrection)
+    && TCCountMisprediction.isEqualTo(m.TCCountMisprediction)
+    && LCountMisprediction.isEqualTo(m.LCountMisprediction)
+    && DCountMisprediction.isEqualTo(m.DCountMisprediction)
+    && LDTypeMisprediction[0].isEqualTo(m.LDTypeMisprediction[0])
+    && LDTypeMisprediction[1].isEqualTo(m.LDTypeMisprediction[1])
+    && LDTypeMisprediction[2].isEqualTo(m.LDTypeMisprediction[2])
+    && LDTypeMisprediction[3].isEqualTo(m.LDTypeMisprediction[3])
+    && LDTypeReplacementBase.isEqualTo(m.LDTypeReplacementBase)
+    && LDRepeatCountCorrection.isEqualTo(m.LDRepeatCountCorrection)
+    && LDBitlengthCorrection.isEqualTo(m.LDBitlengthCorrection);
+}
+bool PreflateTokenPredictionModel::isEqualTo(const PreflateTokenPredictionModel& m) const {
+  return LITMisprediction.isEqualTo(m.LITMisprediction)
+    && REFMisprediction.isEqualTo(m.REFMisprediction)
+    && LENCorrection.isEqualTo(m.LENCorrection)
+    && DISTAfterLenCorrection.isEqualTo(m.DISTAfterLenCorrection)
+    && DISTOnlyCorrection.isEqualTo(m.DISTOnlyCorrection)
+    && IrregularLen258Encoding.isEqualTo(m.IrregularLen258Encoding);
+}
+
+bool PreflatePredictionModel::isEqualTo(const PreflatePredictionModel& m) const {
+  return block.isEqualTo(m.block)
+    && treecode.isEqualTo(m.treecode)
+    && token.isEqualTo(m.token);
+}
+
+bool isEqual(const PreflatePredictionModel& m1, const PreflatePredictionModel& m2) {
+  return m1.isEqualTo(m2);
+}
+
+// ----------------------------
+
+void printFlagStatistics(const char *txt, unsigned(&flag)[2]) {
+  if (flag[1]) {
+    printf("%s %g%% (%d)", txt, flag[1] * 100.0 / (flag[0] + flag[1]), flag[0] + flag[1]);
+  }
+}
+void printCorrectionStatistics(const char *txt,
+                               unsigned data[], unsigned size, unsigned sum, unsigned offset) {
+  if (data[offset] == sum) {
+    return;
+  }
+  bool on = false;
+  for (unsigned i = 0; i < size; ++i) {
+    if (data[i]) {
+      if (!on) {
+        printf("%s:", txt);
+      }
+      on = true;
+      if (i != offset && (i == 0 || i + 1 == size)) {
+        printf(" %sx %g%%", i == 0 ? "-" : "+", data[i] * 100.0 / sum);
+      } else {
+        printf(" %s%d %g%%", i == offset ? "" : (i < offset ? "-" : "+"), (int)labs((int)(i - offset)), data[i] * 100.0 / sum);
+      }
+    }
+  }
+  if (on) {
+    printf(" (%d)", sum);
+  }
+}
+template <unsigned N>
+void printCorrectionStatistics(const char *txt, unsigned (&data)[N], unsigned sum, int offset) {
+  printCorrectionStatistics(txt, data, N, sum, offset);
+}
+
+// ----------------------------
+
+void PreflateStatisticsCounter::BlockPrediction::print() {
+  unsigned sum = sumArray(blockType);
+  if (blockType[0]) {
+    printf(" ->STORE %g%%", blockType[0] * 100.0 / sum);
+  }
+  if (blockType[1] && blockType[1] != sum) {
+    printf(" ->DYNHUF %g%%", blockType[1] * 100.0 / sum);
+  }
+  if (blockType[2]) {
+    printf(" ->STATHUF %g%%", blockType[2] * 100.0 / sum);
+  }
+  printFlagStatistics(", EOB MP", EOBMisprediction);
+  printFlagStatistics(", PAD!=0", nonZeroPadding);
+}
+
+void PreflateStatisticsCounter::TreeCodePrediction::print() {
+  printFlagStatistics(", !CT SZ MP", TCCountMisprediction);
+  printFlagStatistics(", !L SZ MP", LCountMisprediction);
+  printFlagStatistics(", !D SZ MP", DCountMisprediction);
+  printFlagStatistics(", !T B MP", LDTypeMisprediction[0]);
+  printFlagStatistics(", !T R MP", LDTypeMisprediction[1]);
+  printFlagStatistics(", !T 0s MP", LDTypeMisprediction[2]);
+  printFlagStatistics(", !T 0l MP", LDTypeMisprediction[3]);
+  unsigned sum = sumArray(LDTypeReplacement);
+  if (LDTypeReplacement[0]) {
+    printf(" ->T B %g", LDTypeReplacement[0] * 100.0 / sum);
+  }
+  if (LDTypeReplacement[1]) {
+    printf(" ->T R %g", LDTypeReplacement[1] * 100.0 / sum);
+  }
+  if (LDTypeReplacement[2]) {
+    printf(" ->T 0s %g", LDTypeReplacement[2] * 100.0 / sum);
+  }
+  if (LDTypeReplacement[3]) {
+    printf(" ->T 0l %g", LDTypeReplacement[3] * 100.0 / sum);
+  }
+  sum = sumArray(TCBitlengthCorrection);
+  printCorrectionStatistics(", C BL", TCBitlengthCorrection, sum, 3);
+  sum = sumArray(LDRepeatCountCorrection);
+  printCorrectionStatistics(" LD RP", LDRepeatCountCorrection, sum, 1);
+  sum = sumArray(LDBitlengthCorrection);
+  printCorrectionStatistics(", LD BL", LDBitlengthCorrection, sum, 4);
+}
+
+void PreflateStatisticsCounter::TokenPrediction::print() {
+  printFlagStatistics(", !LIT MP", LITMisprediction);
+  printFlagStatistics(", !REF MP", REFMisprediction);
+  unsigned sum = sumArray(LENCorrection);
+  printCorrectionStatistics(" L", LENCorrection, sum, 6);
+  sum = sumArray(DISTAfterLenCorrection);
+  printCorrectionStatistics(" L->D+", DISTAfterLenCorrection, sum, 0);
+  sum = sumArray(DISTOnlyCorrection);
+  printCorrectionStatistics(" ->D+", DISTOnlyCorrection, sum, 0);
+  printFlagStatistics(", !L258 MP", LEN258IrregularEncoding);
+}
+
+
+void PreflateStatisticsCounter::print() {
+  block.print();
+  treecode.print();
+  token.print();
+  printf("\n");
+}
--- a/contrib/preflate/preflate_statistical_model.cpp
+++ b/contrib/preflate/preflate_statistical_model.cpp
@@ -0,0 +1,53 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include "preflate_statistical_model.h"
+#include "preflate_token.h"
+#include "support/array_helper.h"
+#include <stdio.h>
+
+unsigned PreflateStatisticsCounter::BlockPrediction::checkDefaultModels() const {
+  unsigned cnt = 0;
+  cnt += sumArray(blockType) == blockType[PreflateTokenBlock::DYNAMIC_HUFF];
+  cnt += sumArray(EOBMisprediction) == EOBMisprediction[0];
+  cnt += sumArray(nonZeroPadding) == nonZeroPadding[0];
+  return cnt;
+}
+
+unsigned PreflateStatisticsCounter::TreeCodePrediction::checkDefaultModels() const {
+  unsigned cnt = 0;
+  cnt += sumArray(TCCountMisprediction) == TCCountMisprediction[0];
+  cnt += sumArray(TCBitlengthCorrection) == TCBitlengthCorrection[3];
+  cnt += sumArray(LCountMisprediction) == LCountMisprediction[0];
+  cnt += sumArray(DCountMisprediction) == DCountMisprediction[0];
+  for (unsigned i = 0; i < 4; ++i) {
+    cnt += sumArray(LDTypeMisprediction[i]) == LDTypeMisprediction[i][0];
+  }
+  cnt += sumArray(LDTypeReplacement) == 0;
+  cnt += sumArray(LDRepeatCountCorrection) == LDRepeatCountCorrection[1];
+  cnt += sumArray(LDBitlengthCorrection) == LDBitlengthCorrection[4];
+  return cnt;
+}
+
+unsigned PreflateStatisticsCounter::TokenPrediction::checkDefaultModels() const {
+  unsigned cnt = 0;
+  cnt += sumArray(LITMisprediction) == LITMisprediction[0];
+  cnt += sumArray(REFMisprediction) == REFMisprediction[0];
+  cnt += sumArray(LENCorrection) == LENCorrection[6];
+  cnt += sumArray(DISTAfterLenCorrection) == DISTAfterLenCorrection[0];
+  cnt += sumArray(DISTOnlyCorrection) == DISTOnlyCorrection[0];
+  cnt += sumArray(LEN258IrregularEncoding) == LEN258IrregularEncoding[0];
+  return cnt;
+}
--- a/contrib/preflate/preflate_statistical_model.h
+++ b/contrib/preflate/preflate_statistical_model.h
@@ -0,0 +1,143 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef PREFLATE_STATISTICS_COUNTER_H
+#define PREFLATE_STATISTICS_COUNTER_H
+
+#include <algorithm>
+
+struct PreflateStatisticsCounter {
+  struct BlockPrediction {
+  public:
+    void incBlockType(const unsigned bt) {
+      blockType[bt]++;
+    }
+    void incEOBPredictionWrong(const bool mispredicted) {
+      EOBMisprediction[mispredicted]++;
+    }
+    void incNonZeroPadding(const bool nonzeropadding) {
+      nonZeroPadding[nonzeropadding]++;
+    }
+
+    static unsigned totalModels() {
+      return 3;
+    }
+    unsigned checkDefaultModels() const;
+
+    void print();
+
+  private:
+    unsigned blockType[3]; // stored, dynamic huff, static huff
+    unsigned EOBMisprediction[2]; // no, yes
+    unsigned nonZeroPadding[2]; // no, yes
+
+    friend struct PreflateBlockPredictionModel;
+  };
+
+  struct TreeCodePrediction {
+  public:
+    void incTreeCodeCountPredictionWrong(const bool mispredicted) {
+      TCCountMisprediction[mispredicted]++;
+    }
+    void incTreeCodeLengthDiffToPrediction(const int len_diff) {
+      TCBitlengthCorrection[max(min(len_diff, 3), -3) + 3]++;
+    }
+    void incLiteralCountPredictionWrong(const bool mispredicted) {
+      LCountMisprediction[mispredicted]++;
+    }
+    void incDistanceCountPredictionWrong(const bool mispredicted) {
+      DCountMisprediction[mispredicted]++;
+    }
+    void incLDCodeTypePredictionWrong(const unsigned codetype, const bool mispredicted) {
+      LDTypeMisprediction[codetype][mispredicted]++;
+    }
+    void incLDCodeTypeReplacement(const unsigned replacement_codetype) {
+      LDTypeReplacement[replacement_codetype]++;
+    }
+    void incLDCodeRepeatDiffToPrediction(const int len_diff) {
+      LDRepeatCountCorrection[max(min(len_diff, 1), -1) + 1]++;
+    }
+    void incLDCodeLengthDiffToPrediction(const int len_diff) {
+      LDBitlengthCorrection[max(min(len_diff, 4), -4) + 4]++;
+    }
+
+    static unsigned totalModels() {
+      return 11;
+    }
+    unsigned checkDefaultModels() const;
+
+    void print();
+
+  private:
+    unsigned TCCountMisprediction[2]; // no, yes
+    unsigned TCBitlengthCorrection[7]; // -x, -2, -1, 0, +1, +2, +x
+    unsigned LCountMisprediction[2]; // no, yes
+    unsigned DCountMisprediction[2]; // no, yes
+    unsigned LDTypeMisprediction[4][2]; // types: BL,REP,REPZS,REPZL; no, yes
+    unsigned LDTypeReplacement[4];      // replacement type: BL,REP,REPZS,REPZL
+    unsigned LDRepeatCountCorrection[3]; // -x, 0, +x
+    unsigned LDBitlengthCorrection[9]; // -x, -3, -2, -1, 0, +1, +2, +3, +x
+
+    friend struct PreflateTreeCodePredictionModel;
+  };
+  struct TokenPrediction {
+  public:
+    void incLiteralPredictionWrong(const bool mispredicted) {
+      LITMisprediction[mispredicted]++;
+    }
+    void incReferencePredictionWrong(const bool mispredicted) {
+      REFMisprediction[mispredicted]++;
+    }
+    void incLengthDiffToPrediction(const int len_diff) {
+      LENCorrection[max(min(len_diff, 6), -6) + 6]++;
+    }
+    void incIrregularLength258Encoding(const bool irregular) {
+      LEN258IrregularEncoding[irregular]++;
+    }
+    void incDistanceDiffToPredictionAfterIncorrectLengthPrediction(const int len_diff) {
+      DISTAfterLenCorrection[min(len_diff, 3)]++;
+    }
+    void incDistanceDiffToPredictionAfterCorrectLengthPrediction(const int len_diff) {
+      DISTOnlyCorrection[min(len_diff, 3)]++;
+    }
+
+    static unsigned totalModels() {
+      return 6;
+    }
+    unsigned checkDefaultModels() const;
+
+    void print();
+
+  private:
+    unsigned LITMisprediction[2]; // no, yes
+    unsigned REFMisprediction[2]; // no, yes
+    unsigned LENCorrection[13];   // -x, -5, -4, -3, -2, -1, 0, +1, +2, +3, +4, +5, +x (bytes)
+    unsigned LEN258IrregularEncoding[2]; // no, yes
+    unsigned DISTAfterLenCorrection[4]; // +0, +1, +2, +x (hops)
+    unsigned DISTOnlyCorrection[4]; // +0, +1, +2, +x (hops)
+
+    friend struct PreflateTokenPredictionModel;
+  };
+
+public:
+  PreflateStatisticsCounter() {}
+
+  BlockPrediction block;
+  TreeCodePrediction treecode;
+  TokenPrediction token;
+
+  void print();
+};
+
+#endif /* PREFLATE_STATISTICS_COUNTER_H */
--- a/contrib/preflate/preflate_token.cpp
+++ b/contrib/preflate/preflate_token.cpp
@@ -0,0 +1,44 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include "preflate_token.h"
+
+bool isEqual(const PreflateTokenBlock& b1, const PreflateTokenBlock& b2) {
+  if (b1.type != b2.type) {
+    return false;
+  }
+//  if (b1.uncompressedLen != b2.uncompressedLen) {
+//    return false;
+//  }
+  if (b1.type != PreflateTokenBlock::STORED) {
+    if (b1.type == PreflateTokenBlock::DYNAMIC_HUFF) {
+      if (b1.ncode != b2.ncode || b1.nlen != b2.nlen || b1.ndist != b2.ndist) {
+        return false;
+      }
+      if (b1.treecodes != b2.treecodes) {
+        return false;
+      }
+    }
+    if (b1.tokens.size() != b2.tokens.size()) {
+      return false;
+    }
+    for (unsigned i = 0, n = b1.tokens.size(); i < n; ++i) {
+      if (b1.tokens[i].len != b2.tokens[i].len || b1.tokens[i].dist != b2.tokens[i].dist) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
--- a/contrib/preflate/preflate_token.h
+++ b/contrib/preflate/preflate_token.h
@@ -0,0 +1,87 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef PREFLATE_TOKEN_H
+#define PREFLATE_TOKEN_H
+
+#include <stdint.h>
+#include <vector>
+
+/* len: 1 for literal, >= 3 for reference */
+struct PreflateToken {
+  enum typeLit {
+    LITERAL
+  };
+  enum typeRef {
+    REFERENCE
+  };
+  enum typeNon {
+    NONE
+  };
+  unsigned short len : 9;
+  unsigned short irregular258 : 1;
+  unsigned short dist;
+
+  PreflateToken(typeNon n) : len(0), irregular258(0), dist(0) {}
+  PreflateToken(typeLit l) : len(1), irregular258(0), dist(0) {}
+  PreflateToken(typeRef r, unsigned short l, unsigned short d, bool irregular258_ = false) 
+    : len(l), irregular258(irregular258_), dist(d) {}
+};
+
+struct PreflateTokenBlock {
+  enum Type {
+    STORED, DYNAMIC_HUFF, STATIC_HUFF
+  };
+  enum StoredBlockType {
+    STORED_X
+  };
+  enum HuffBlockType {
+    DYNAMIC_HUFF_X, STATIC_HUFF_X
+  };
+
+  Type type;
+  uint64_t uncompressedStartPos;
+  uint64_t uncompressedLen;
+  int32_t contextLen; // prefix size required to handle all references
+  unsigned short nlen, ndist, ncode;
+  uint8_t paddingBitCount, paddingBits;
+  std::vector<unsigned char> treecodes;
+  std::vector<PreflateToken> tokens;
+
+  PreflateTokenBlock()
+    : type(STORED)
+    , uncompressedLen(0) {}
+  PreflateTokenBlock(StoredBlockType, int len_)
+    : type(STORED)
+    , uncompressedLen(len_) {}
+  PreflateTokenBlock(HuffBlockType t)
+    : type(t == DYNAMIC_HUFF_X ? DYNAMIC_HUFF : STATIC_HUFF)
+    , uncompressedLen(0) {}
+  void setHuffLengths(int nlen_, int ndist_, int ncode_) {
+    nlen = nlen_;
+    ndist = ndist_;
+    ncode = ncode_;
+  }
+  void addTreeCode(int code) {
+    treecodes.push_back(code);
+  }
+  void addToken(const PreflateToken& token) {
+    tokens.push_back(token);
+  }
+};
+
+bool isEqual(const PreflateTokenBlock&, const PreflateTokenBlock&);
+
+
+#endif /* PREFLATE_TOKEN_H */
--- a/contrib/preflate/preflate_token_predictor.cpp
+++ b/contrib/preflate/preflate_token_predictor.cpp
@@ -0,0 +1,510 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include <algorithm>
+#include "preflate_constants.h"
+#include "preflate_statistical_model.h"
+#include "preflate_token_predictor.h"
+#include "support/bit_helper.h"
+
+PreflateTokenPredictor::PreflateTokenPredictor(
+    const PreflateParameters& params_,
+    const std::vector<unsigned char>& dump,
+    const size_t offset)
+  : state(hash, seq, params_.config(), params_.windowBits, params_.memLevel)
+  , hash(dump, params_.memLevel)
+  , seq(dump)
+  , params(params_)
+  , predictionFailure(false)
+  , fast(params_.isFastCompressor())
+  , prevLen(0)
+  , pendingToken(PreflateToken::NONE)
+  , emptyBlockAtEnd(false) {
+
+  if (state.availableInputSize() >= 2) {
+    hash.updateRunningHash(state.inputCursor()[0]);
+    hash.updateRunningHash(state.inputCursor()[1]);
+    seq.updateSeq(2);
+  }
+  hash.updateHash(offset);
+  seq.updateSeq(offset);
+}
+
+bool PreflateTokenPredictor::predictEOB() {
+  return state.availableInputSize() == 0 || currentTokenCount == state.maxTokenCount;
+}
+void PreflateTokenPredictor::commitToken(const PreflateToken& t) {
+  if (fast && t.len > state.lazyMatchLength()) {
+    hash.skipHash(t.len);
+  } else {
+    hash.updateHash(t.len);
+  }
+  seq.updateSeq(t.len);
+}
+#  define TOO_FAR 4096
+/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
+
+PreflateToken PreflateTokenPredictor::predictToken() {
+  if (state.currentInputPos() == 0 || state.availableInputSize() < PreflateConstants::MIN_MATCH) {
+    return PreflateToken(PreflateToken::LITERAL);
+  }
+  PreflateToken match(PreflateToken::NONE);
+  unsigned hash = state.calculateHash();
+  if (pendingToken.len > 1) {
+    match = pendingToken;
+  } else {
+    unsigned head = state.getCurrentHashHead(hash);
+    if (!fast && seq.valid(state.currentInputPos())) {
+      match = state.seqMatch(state.currentInputPos(), head, prevLen,
+                             params.veryFarMatchesDetected,
+                             params.matchesToStartDetected,
+                             params.zlibCompatible ? 0 : (1 << params.log2OfMaxChainDepthM1));
+    } else {
+      match = state.match(head, prevLen, 0,
+                          params.veryFarMatchesDetected,
+                          params.matchesToStartDetected,
+                          params.zlibCompatible ? 0 : (1 << params.log2OfMaxChainDepthM1));
+    }
+  }
+  prevLen = 0;
+  pendingToken = PreflateToken(PreflateToken::NONE);
+  if (match.len < PreflateConstants::MIN_MATCH) {
+    return PreflateToken(PreflateToken::LITERAL);
+  }
+  if (fast) {
+    return match;
+  }
+  if (match.len == 3 && match.dist > TOO_FAR) {
+    return PreflateToken(PreflateToken::LITERAL);
+  }
+
+  if (match.len < state.lazyMatchLength() && state.availableInputSize() >= (unsigned)match.len + 2) {
+    PreflateToken matchNext(PreflateToken::NONE);
+    unsigned hashNext = state.calculateHashNext();
+    unsigned headNext = state.getCurrentHashHead(hashNext);
+    if (!fast && seq.valid(state.currentInputPos() + 1)) {
+      matchNext = state.seqMatch(state.currentInputPos() + 1, headNext, match.len,
+                                 params.veryFarMatchesDetected,
+                                 params.matchesToStartDetected,
+                                 params.zlibCompatible ? 0 : (2 << params.log2OfMaxChainDepthM1));
+    } else {
+      matchNext = state.match(headNext, match.len, 1,
+                              params.veryFarMatchesDetected,
+                              params.matchesToStartDetected,
+                              params.zlibCompatible ? 0 : (2 << params.log2OfMaxChainDepthM1));
+
+      if (((hashNext ^ hash) & this->hash.hashMask) == 0) {
+        unsigned maxSize = min(state.availableInputSize() - 1, (unsigned)PreflateConstants::MAX_MATCH);
+        unsigned rle = 0;
+        const unsigned char *c = state.inputCursor();
+        unsigned char b = c[0];
+        while (rle < maxSize && c[1 + rle] == b) {
+          ++rle;
+        }
+        if (rle > match.len && rle >= matchNext.len) {
+          matchNext.len = rle;
+          matchNext.dist = 1;
+        }
+      }
+    }
+    if (matchNext.len > match.len) {
+      prevLen = match.len;
+      pendingToken = matchNext;
+      if (!params.zlibCompatible) {
+        prevLen = 0;
+        pendingToken = PreflateToken(PreflateToken::NONE);
+      }
+      return PreflateToken(PreflateToken::LITERAL);
+    }
+  }
+
+  return match;
+}
+bool PreflateTokenPredictor::repredictReference(PreflateToken& token) {
+  if (state.currentInputPos() == 0 || state.availableInputSize() < PreflateConstants::MIN_MATCH) {
+    return false;
+  }
+  unsigned hash = state.calculateHash();
+  unsigned head = state.getCurrentHashHead(hash);
+  PreflateToken match = state.match(head, /*prevLen*/0, 0, 
+                                    params.veryFarMatchesDetected,
+                                    params.matchesToStartDetected,
+                                    (2 << params.log2OfMaxChainDepthM1));
+  prevLen = 0;
+  pendingToken = PreflateToken(PreflateToken::NONE);
+  if (match.len < PreflateConstants::MIN_MATCH) {
+    return false;
+  }
+  token = match;
+  return true;
+}
+PreflateRematchInfo PreflateTokenPredictor::repredictMatch(const PreflateToken& token) {
+  unsigned hash = state.calculateHash();
+  unsigned head = state.getCurrentHashHead(hash);
+  PreflateRematchInfo i = state.rematchInfo(head, token);
+  return i;
+}
+unsigned PreflateTokenPredictor::recalculateDistance(const PreflateToken& token, const unsigned hops) {
+  return state.hopMatch(token, hops);
+}
+
+void PreflateTokenPredictor::analyzeBlock(
+    const unsigned blockno, 
+    const PreflateTokenBlock& block) {
+  currentTokenCount = 0;
+  prevLen = 0;
+  pendingToken = PreflateToken(PreflateToken::NONE);
+  if (blockno != analysisResults.size() || predictionFailure) {
+    return;
+  }
+  analysisResults.push_back(BlockAnalysisResult());
+  BlockAnalysisResult& analysis = analysisResults[blockno];
+
+  analysis.type = block.type;
+  analysis.tokenCount = block.tokens.size();
+  analysis.tokenInfo.resize(analysis.tokenCount);
+  analysis.blockSizePredicted = true;
+  analysis.inputEOF = false;
+
+  if (analysis.type == PreflateTokenBlock::STORED) {
+    analysis.tokenCount = block.uncompressedLen;
+    hash.updateHash(block.uncompressedLen);
+    seq.updateSeq(block.uncompressedLen);
+    analysis.inputEOF = state.availableInputSize() == 0;
+    analysis.paddingBits = block.paddingBits;
+    analysis.paddingCounts = block.paddingBitCount;
+    return;
+  }
+
+  for (unsigned i = 0, n = block.tokens.size(); i < n; ++i) {
+    PreflateToken targetToken = block.tokens[i];
+
+    //if (blockno == 0 && i == 0x6dd) {
+    //  puts("hi");
+    //}
+
+    if (predictEOB()) {
+      analysis.blockSizePredicted = false;
+    }
+    PreflateToken predictedToken = predictToken();
+#ifdef _DEBUG
+//    printf("B%dT%d: TGT(%d,%d) -> PRD(%d,%d)\n", blockno, i, targetToken.len, targetToken.dist, predictedToken.len, predictedToken.dist);
+#endif
+
+    if (targetToken.len == 1) {
+      if (predictedToken.len > 1) {
+        analysis.tokenInfo[currentTokenCount] = 2; // badly predicted LIT
+      } else {
+        analysis.tokenInfo[currentTokenCount] = 0; // perfectly predicted LIT
+      }
+    } else {
+      if (predictedToken.len == 1) {
+        analysis.tokenInfo[currentTokenCount] = 3; // badly predicted REF
+        if (!repredictReference(predictedToken)) {
+          predictionFailure = true;
+          return;
+        }
+      } else {
+        analysis.tokenInfo[currentTokenCount] = 1; // well predicted REF
+      }
+      PreflateRematchInfo rematch;
+      if (predictedToken.len != targetToken.len) {
+        analysis.tokenInfo[currentTokenCount] += 4; // bad LEN prediction, adds two corrective actions
+        analysis.correctives.push_back(predictedToken.len);
+        analysis.correctives.push_back(targetToken.len - predictedToken.len);
+        rematch = repredictMatch(targetToken);
+
+        if (rematch.requestedMatchDepth >= 0xffff) {
+          predictionFailure = true;
+          return;
+        }
+        analysis.correctives.push_back(rematch.condensedHops - 1);
+      } else {
+        if (targetToken.dist != predictedToken.dist) {
+          analysis.tokenInfo[currentTokenCount] += 8; // bad DIST ONLY prediction, adds one corrective action
+          rematch = repredictMatch(targetToken);
+
+          if (rematch.requestedMatchDepth >= 0xffff) {
+            predictionFailure = true;
+            return;
+          }
+          analysis.correctives.push_back(rematch.condensedHops - 1);
+        }
+      }
+    }
+    if (targetToken.len == 258) {
+      analysis.tokenInfo[currentTokenCount] += 16;
+      if (targetToken.irregular258) {
+        analysis.tokenInfo[currentTokenCount] += 32;
+      }
+    }
+    commitToken(targetToken);
+    ++currentTokenCount;
+  }
+  if (!predictEOB()) {
+    analysis.blockSizePredicted = false;
+  }
+  analysis.inputEOF = state.availableInputSize() == 0;
+}
+
+void PreflateTokenPredictor::encodeBlock(
+    PreflatePredictionEncoder* codec,
+    const unsigned blockno) {
+  BlockAnalysisResult& analysis = analysisResults[blockno];
+
+  codec->encodeBlockType(analysis.type);
+
+  if (analysis.type == PreflateTokenBlock::STORED) {
+    codec->encodeValue(analysis.tokenCount, 16);
+    bool pad = analysis.paddingBits != 0;
+    codec->encodeNonZeroPadding(pad);
+    if (pad) {
+      unsigned bitsToSave = bitLength(analysis.paddingBits);
+      codec->encodeValue(bitsToSave, 3);
+      if (bitsToSave > 1) {
+        codec->encodeValue(analysis.paddingBits & ((1 << (bitsToSave - 1)) - 1), bitsToSave - 1);
+      }
+    }
+    return;
+  }
+
+  codec->encodeEOBMisprediction(!analysis.blockSizePredicted);
+  if (!analysis.blockSizePredicted) {
+    unsigned blocksizeBits = bitLength(analysis.tokenCount);
+    codec->encodeValue(blocksizeBits, 5);
+    if (blocksizeBits >= 2) {
+      codec->encodeValue(analysis.tokenCount, blocksizeBits);
+    }
+  }
+ 
+  unsigned correctivePos = 0;
+  for (unsigned i = 0, n = analysis.tokenCount; i < n; ++i) {
+    unsigned char info = analysis.tokenInfo[i];
+    switch (info & 3) {
+    case 0: // well predicted LIT
+      codec->encodeLiteralPredictionWrong(false);
+      continue;
+    case 2: // badly predicted LIT
+      codec->encodeReferencePredictionWrong(true);
+      continue;
+    case 1: // well predicted REF
+      codec->encodeReferencePredictionWrong(false);
+      break;
+    case 3: // badly predicted REF
+      codec->encodeLiteralPredictionWrong(true);
+      break;
+    }
+    if (info & 4) {
+      int pred = analysis.correctives[correctivePos++];
+      int diff = analysis.correctives[correctivePos++];
+      int hops = analysis.correctives[correctivePos++];
+      codec->encodeLenCorrection(pred, pred + diff);
+      codec->encodeDistAfterLenCorrection(hops);
+    } else {
+      codec->encodeLenCorrection(3, 3);
+      if (info & 8) {
+        int hops = analysis.correctives[correctivePos++];
+        codec->encodeDistOnlyCorrection(hops);
+      } else {
+        codec->encodeDistOnlyCorrection(0);
+      }
+    }
+    if (info & 16) {
+      codec->encodeIrregularLen258((info & 32) != 0);
+    }
+  }
+}
+void PreflateTokenPredictor::encodeEOF(
+    PreflatePredictionEncoder* codec,
+    const unsigned blockno,
+    const bool lastBlock) {
+  BlockAnalysisResult& analysis = analysisResults[blockno];
+
+  if (analysis.inputEOF) {
+    codec->encodeValue(!lastBlock, 1);
+  } else {
+    // If we still have input left, this shouldn't be the last block
+    if (lastBlock) {
+      predictionFailure = true;
+    }
+  }
+}
+
+void PreflateTokenPredictor::updateCounters(
+  PreflateStatisticsCounter* model,
+  const unsigned blockno) {
+  BlockAnalysisResult& analysis = analysisResults[blockno];
+
+  model->block.incBlockType(analysis.type);
+
+  if (analysis.type == PreflateTokenBlock::STORED) {
+    model->block.incNonZeroPadding(analysis.paddingBits != 0);
+    return;
+  }
+
+  model->block.incEOBPredictionWrong(!analysis.blockSizePredicted);
+
+  unsigned correctivePos = 0;
+  for (unsigned i = 0, n = analysis.tokenCount; i < n; ++i) {
+    unsigned char info = analysis.tokenInfo[i];
+    switch (info & 3) {
+    case 0: // well predicted LIT
+      model->token.incLiteralPredictionWrong(false);
+      continue;
+    case 2: // badly predicted LIT
+      model->token.incReferencePredictionWrong(true);
+      continue;
+    case 1: // well predicted REF
+      model->token.incReferencePredictionWrong(false);
+      break;
+    case 3: // badly predicted REF
+      model->token.incLiteralPredictionWrong(true);
+      break;
+    }
+    if (info & 4) {
+      /*int pred = analysis.correctives[*/correctivePos++/*]*/;
+      int diff = analysis.correctives[correctivePos++];
+      int hops = analysis.correctives[correctivePos++];
+      model->token.incLengthDiffToPrediction(diff);
+      model->token.incDistanceDiffToPredictionAfterIncorrectLengthPrediction(hops);
+    } else {
+      model->token.incLengthDiffToPrediction(0);
+      if (info & 8) {
+        int hops = analysis.correctives[correctivePos++];
+        model->token.incDistanceDiffToPredictionAfterCorrectLengthPrediction(hops);
+      } else {
+        model->token.incDistanceDiffToPredictionAfterCorrectLengthPrediction(0);
+      }
+    }
+    if (info & 16) {
+      model->token.incIrregularLength258Encoding((info & 32) != 0);
+    }
+  }
+}
+
+PreflateTokenBlock PreflateTokenPredictor::decodeBlock(
+    PreflatePredictionDecoder* codec) {
+  PreflateTokenBlock block;
+  currentTokenCount = 0;
+  prevLen = 0;
+  pendingToken = PreflateToken(PreflateToken::NONE);
+  unsigned blocksize = 0;
+  bool checkEOB = true;
+  unsigned bt = codec->decodeBlockType();
+  switch (bt) {
+  case PreflateTokenBlock::STORED:
+    block.type = PreflateTokenBlock::STORED;
+    block.uncompressedLen = codec->decodeValue(16);
+    block.paddingBits = 0;
+    block.paddingBitCount = 0;
+    if (codec->decodeNonZeroPadding()) {
+      block.paddingBitCount = codec->decodeValue(3);
+      if (block.paddingBitCount > 0) {
+        block.paddingBits = (1 << (block.paddingBitCount - 1)) + codec->decodeValue(block.paddingBitCount - 1);
+      } else {
+        block.paddingBits = 0;
+      }
+    }
+    hash.updateHash(block.uncompressedLen);
+    seq.updateSeq(block.uncompressedLen);
+    return block;
+  case PreflateTokenBlock::STATIC_HUFF:
+    block.type = PreflateTokenBlock::STATIC_HUFF;
+    break;
+  case PreflateTokenBlock::DYNAMIC_HUFF:
+    block.type = PreflateTokenBlock::DYNAMIC_HUFF;
+    break;
+  }
+
+  if (codec->decodeEOBMisprediction()) {
+    unsigned blocksizeBits = codec->decodeValue(5);
+    if (blocksizeBits >= 2) {
+      blocksize = codec->decodeValue(blocksizeBits);
+    } else {
+      blocksize = blocksizeBits;
+    }
+    block.tokens.reserve(blocksize);
+    checkEOB = false;
+  } else {
+    block.tokens.reserve(1 << (6 + params.memLevel));
+  }
+  while ((checkEOB && !predictEOB())
+         || (!checkEOB && currentTokenCount < blocksize)) {
+    PreflateToken predictedToken = predictToken();
+//    printf("P(%d,%d)\n", predictedToken.len, predictedToken.dist);
+    if (predictedToken.len == 1) {
+      unsigned notok = codec->decodeLiteralPredictionWrong();
+      if (!notok) {
+        block.tokens.push_back(predictedToken);
+        commitToken(predictedToken);
+        ++currentTokenCount;
+        continue;
+      }
+      if (!repredictReference(predictedToken)) {
+        predictionFailure = true;
+        return PreflateTokenBlock();
+      }
+    } else {
+      unsigned notok = codec->decodeReferencePredictionWrong();
+      if (notok) {
+        predictedToken.len = 1;
+        predictedToken.dist = 0;
+        block.tokens.push_back(predictedToken);
+        commitToken(predictedToken);
+        ++currentTokenCount;
+        continue;
+      }
+    }
+    unsigned newLen = codec->decodeLenCorrection(predictedToken.len);
+    if (newLen != predictedToken.len) {
+      unsigned hops = codec->decodeDistAfterLenCorrection();
+      predictedToken.len = newLen;
+      predictedToken.dist = state.firstMatch(predictedToken.len);
+      if (hops) {
+        predictedToken.dist = recalculateDistance(predictedToken, hops);
+      }
+      if (predictedToken.len < 3 || predictedToken.len > 258
+          || predictedToken.dist == 0) {
+        predictionFailure = true;
+        return PreflateTokenBlock();
+      }
+    } else {
+      unsigned hops = codec->decodeDistOnlyCorrection();
+      if (hops) {
+        predictedToken.dist = recalculateDistance(predictedToken, hops);
+        if (predictedToken.dist == 0) {
+          predictionFailure = true;
+          return PreflateTokenBlock();
+        }
+      }
+    }
+    if (predictedToken.len == 258) {
+      predictedToken.irregular258 = codec->decodeIrregularLen258();
+    }
+    block.tokens.push_back(predictedToken);
+    commitToken(predictedToken);
+    ++currentTokenCount;
+  }
+  return block;
+}
+bool PreflateTokenPredictor::decodeEOF(PreflatePredictionDecoder* codec) {
+  if (state.availableInputSize() == 0) {
+    return codec->decodeValue(1) == 0;
+  }
+  return false;
+}
+bool PreflateTokenPredictor::inputEOF() {
+  return state.availableInputSize() == 0;
+}
--- a/contrib/preflate/preflate_token_predictor.h
+++ b/contrib/preflate/preflate_token_predictor.h
@@ -0,0 +1,76 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef PREFLATE_TOKEN_PREDICTOR_H
+#define PREFLATE_TOKEN_PREDICTOR_H
+
+#include <vector>
+
+#include "preflate_parameter_estimator.h"
+#include "preflate_predictor_state.h"
+#include "preflate_statistical_codec.h"
+
+struct PreflateStatisticalModel;
+struct PreflateStatisticalCodec;
+
+struct PreflateTokenPredictor {
+  PreflatePredictorState state;
+  PreflateHashChainExt     hash;
+  PreflateSeqChain     seq;
+  PreflateParameters     params;
+  bool predictionFailure;
+  bool                  fast;
+  unsigned prevLen;
+  PreflateToken pendingToken;
+  unsigned currentTokenCount;
+  bool emptyBlockAtEnd;
+
+  struct BlockAnalysisResult {
+    PreflateTokenBlock::Type type;
+    unsigned tokenCount;
+    bool blockSizePredicted;
+    bool inputEOF;
+    bool lastBlock;
+    uint8_t paddingBits, paddingCounts;
+    std::vector<unsigned char> tokenInfo;
+    std::vector<signed> correctives;
+  };
+  std::vector<BlockAnalysisResult> analysisResults;
+
+  PreflateTokenPredictor(const PreflateParameters& params,
+                        const std::vector<unsigned char>& uncompressed,
+                        const size_t offset);
+  void analyzeBlock(const unsigned blockno, 
+                    const PreflateTokenBlock& block);
+  void updateCounters(PreflateStatisticsCounter*,
+                   const unsigned blockno);
+  void encodeBlock(PreflatePredictionEncoder*,
+                   const unsigned blockno);
+  void encodeEOF(PreflatePredictionEncoder*,
+                 const unsigned blockno,
+                 const bool lastBlock);
+
+  PreflateTokenBlock decodeBlock(PreflatePredictionDecoder*);
+  bool decodeEOF(PreflatePredictionDecoder*);
+  bool inputEOF();
+
+  bool predictEOB();
+  PreflateToken predictToken();
+  bool repredictReference(PreflateToken& token);
+  PreflateRematchInfo repredictMatch(const PreflateToken&);
+  unsigned recalculateDistance(const PreflateToken&, const unsigned hops);
+  void commitToken(const PreflateToken&);
+};
+
+#endif /* PREFLATE_TOKEN_PREDICTOR_H */
--- a/contrib/preflate/preflate_tree_predictor.cpp
+++ b/contrib/preflate/preflate_tree_predictor.cpp
@@ -0,0 +1,647 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include <algorithm>
+#include <string.h>
+#include "preflate_constants.h"
+#include "preflate_statistical_codec.h"
+#include "preflate_statistical_model.h"
+#include "preflate_tree_predictor.h"
+
+PreflateTreePredictor::PreflateTreePredictor(
+    const std::vector<unsigned char>& dump,
+    const size_t off)
+  : input(dump)
+  , predictionFailure(false) {
+  input.advance(off);
+}
+
+struct FreqIdxPair {
+  unsigned freq;
+  unsigned idx;
+};
+struct TreeNode {
+  unsigned parent;
+  unsigned idx;
+};
+
+/* ===========================================================================
+* Compares to subtrees, using the tree depth as tie breaker when
+* the subtrees have equal frequency. This minimizes the worst case length.
+*/
+bool pq_smaller(const FreqIdxPair& p1, const FreqIdxPair& p2, const unsigned char* nodeDepth) {
+  return p1.freq < p2.freq || (p1.freq == p2.freq && nodeDepth[p1.idx] <= nodeDepth[p2.idx]);
+}
+
+/* ===========================================================================
+* Restore the heap property by moving down the tree starting at node k,
+* exchanging a node with the smallest of its two sons if necessary, stopping
+* when the heap property is re-established (each father smaller than its
+* two sons).
+*/
+void pq_downheap(FreqIdxPair* ptr, const unsigned index, const unsigned len, const unsigned char* depth) {
+  unsigned k = index;
+  FreqIdxPair v = ptr[k];
+  unsigned j = k * 2 + 1;  /* left son of k */
+  while (j < len) {
+    /* Set j to the smallest of the two sons: */
+    if (j + 1 < len && pq_smaller(ptr[j + 1], ptr[j], depth)) {
+      j++;
+    }
+    /* Exit if v is smaller than both sons */
+    if (pq_smaller(v, ptr[j], depth)) break;
+
+    /* Exchange v with the smallest son */
+    ptr[k] = ptr[j];
+    k = j;
+
+    /* And continue down the tree, setting j to the left son of k */
+    j = k * 2 + 1;
+  }
+  ptr[k] = v;
+}
+
+void pq_makeheap(FreqIdxPair* ptr, const unsigned len, const unsigned char* depth) {
+  for (unsigned n = (len - 1) / 2 + 1; n > 0; n--) {
+    pq_downheap(ptr, n - 1, len, depth);
+  }
+}
+
+FreqIdxPair pq_remove(FreqIdxPair* ptr, unsigned& len, const unsigned char* depth) {
+  FreqIdxPair result = ptr[0];
+  ptr[0] = ptr[--len];
+  pq_downheap(ptr, 0, len, depth);
+  return result;
+}
+
+unsigned PreflateTreePredictor::calcBitLengths(
+    unsigned char* symBitLen,
+    const unsigned* symFreq,
+    const unsigned symCount,
+    const unsigned maxBits,
+    const unsigned minMaxCode) {
+  FreqIdxPair toSort[PreflateConstants::LITLEN_CODE_COUNT];
+  TreeNode nodes[PreflateConstants::LITLEN_CODE_COUNT * 2 + 1];
+  unsigned char nodeBitLen[PreflateConstants::LITLEN_CODE_COUNT * 2 + 1];
+  unsigned char nodeDepth[PreflateConstants::LITLEN_CODE_COUNT * 2 + 1];
+  memset(nodeBitLen, 0, sizeof(nodeBitLen));
+  memset(nodeDepth, 0, sizeof(nodeDepth));
+  unsigned maxCode = 0, len = 0, nodeCount = 0, nodeId = symCount;
+  for (unsigned i = 0; i < symCount; ++i) {
+    if (symFreq[i]) {
+      toSort[len++] = FreqIdxPair {symFreq[i], maxCode = i};
+    }
+  }
+  if (len < 2) {
+    memset(symBitLen, 0, symCount);
+    symBitLen[maxCode] = 1;
+    symBitLen[maxCode < 2 ? ++maxCode : 0] = 1;
+    return max(minMaxCode, maxCode + 1);
+  }
+
+  pq_makeheap(toSort, len, nodeDepth);
+  while (len > 1) {
+    FreqIdxPair least1 = pq_remove(toSort, len, nodeDepth);
+    FreqIdxPair least2 = toSort[0];
+    toSort[0] = FreqIdxPair {least1.freq + least2.freq, nodeId};
+    nodes[nodeCount++] = TreeNode {nodeId, least1.idx};
+    nodes[nodeCount++] = TreeNode {nodeId, least2.idx};
+    nodeDepth[nodeId] = max(nodeDepth[least1.idx], nodeDepth[least2.idx]) + 1;
+    // note? original code put new entry at top of heap, and moved it downwards
+    // while push_heap pushes it upwards
+    pq_downheap(toSort, 0, len, nodeDepth);
+    nodeId++;
+  }
+  unsigned overflow = 0;
+  unsigned bl_count[16];
+  memset(bl_count, 0, sizeof(bl_count));
+  unsigned orgNodeCount = nodeCount;
+  while (nodeCount-- > 0) {
+    unsigned char newLen = nodeBitLen[nodes[nodeCount].parent] + 1;
+    if (newLen > maxBits) {
+      newLen = maxBits;
+      ++overflow;
+    }
+    unsigned idx = nodes[nodeCount].idx;
+    nodeBitLen[idx] = newLen;
+    if (idx < symCount) {
+      bl_count[newLen]++;
+    }
+  }
+
+  if (overflow) {
+    unsigned bits;
+    do {
+      for (bits = maxBits - 1; bl_count[bits] == 0; bits--) {
+      }
+      bl_count[bits]--;      /* move one leaf down the tree */
+      bl_count[bits + 1] += 2; /* move one overflow item as its brother */
+      bl_count[maxBits]--;
+      /* The brother of the overflow item also moves one step up,
+      * but this does not affect bl_count[max_length]
+      */
+      overflow -= 2;
+    } while (overflow > 0);
+
+    for (bits = maxBits, nodeCount = orgNodeCount; nodeCount > 0; ) {
+      --nodeCount;
+      unsigned idx = nodes[nodeCount].idx;
+      if (idx >= symCount) {
+        continue;
+      }
+      while (bl_count[bits] == 0) {
+        bits--;
+      }
+      nodeBitLen[idx] = bits;
+      bl_count[bits]--;
+    }
+  }
+  memcpy(symBitLen, nodeBitLen, symCount);
+  return max(minMaxCode, maxCode + 1);
+}
+
+TreeCodeType PreflateTreePredictor::predictCodeType(const unsigned char* symBitLen,
+                                                   const unsigned symCount,
+                                                   const bool first) {
+  unsigned char code = symBitLen[0];
+  if (code == 0) {
+    unsigned char curlen = 1;
+    unsigned char maxCurLen = min(symCount, 11u);
+    while (curlen < maxCurLen && symBitLen[curlen] == 0) {
+      ++curlen;
+    }
+    if (curlen >= 11) {
+      return TCT_REPZL;
+    }
+    if (curlen >= 3) {
+      return TCT_REPZS;
+    }
+    return TCT_BITS;
+  }
+  if (!first && code == symBitLen[-1]) {
+    unsigned char curlen = 1;
+    unsigned char maxCurLen = min(symCount, 3u);
+    while (curlen < maxCurLen && symBitLen[curlen] == code) {
+      ++curlen;
+    }
+    if (curlen >= 3) {
+      return TCT_REP;
+    }
+  }
+  return TCT_BITS;
+}
+unsigned char PreflateTreePredictor::predictCodeData(const unsigned char* symBitLen,
+                                                    const TreeCodeType type,
+                                                    const unsigned symCount,
+                                                    const bool first) {
+  unsigned char code = symBitLen[0];
+  switch (type) {
+  default:
+  case TCT_BITS:
+    return code;
+  case TCT_REP:
+  {
+    unsigned char curlen = 3;
+    unsigned char maxCurLen = min(symCount, 6u);
+    while (curlen < maxCurLen && symBitLen[curlen] == code) {
+      ++curlen;
+    }
+    return curlen;
+  }
+  case TCT_REPZS:
+  case TCT_REPZL:
+  {
+    unsigned char curlen = type == TCT_REPZS ? 3 : 11;
+    unsigned char maxCurLen = min(symCount, type == TCT_REPZS ? 10u : 138u);
+    while (curlen < maxCurLen && symBitLen[curlen] == 0) {
+      ++curlen;
+    }
+    return curlen;
+  }
+  }
+}
+
+
+void PreflateTreePredictor::predictLDTrees(
+    BlockAnalysisResult& analysis,
+    unsigned* frequencies,
+    const unsigned char* symBitLen,
+    const unsigned symLCount,
+    const unsigned symDCount,
+    const unsigned char* targetCodes,
+    const unsigned targetCodeSize) {
+  memset(frequencies, 0, sizeof(unsigned) * PreflateConstants::CODETREE_CODE_COUNT);
+  const unsigned char* ptr = symBitLen;
+  const unsigned char* code = targetCodes;
+  unsigned codeSize = targetCodeSize;
+  unsigned count1 = symLCount;
+  unsigned count2 = symDCount;
+  bool first = true;
+  while (codeSize > 0) {
+    TreeCodeType targetTreeCodeType;
+    switch (code[0]) {
+    case 16: targetTreeCodeType = TCT_REP; break;
+    case 17: targetTreeCodeType = TCT_REPZS; break;
+    case 18: targetTreeCodeType = TCT_REPZL; break;
+    default: targetTreeCodeType = TCT_BITS; break;
+    }
+    if (codeSize < 2 && targetTreeCodeType != TCT_BITS) {
+      predictionFailure = true;
+      return;
+    }
+    TreeCodeType predictedTreeCodeType = predictCodeType(ptr, count1, first);
+    unsigned char info = predictedTreeCodeType | ((targetTreeCodeType != predictedTreeCodeType) << 2);
+    if (targetTreeCodeType != predictedTreeCodeType) {
+      analysis.correctives.push_back(targetTreeCodeType);
+    }
+    unsigned char targetTreeCodeData = code[targetTreeCodeType != TCT_BITS];
+    unsigned l = 1 + (targetTreeCodeType != TCT_BITS);
+    code += l;
+    codeSize -= l;
+    unsigned char predictedTreeCodeData = predictCodeData(ptr, targetTreeCodeType, count1, first);
+    first = false;
+    if (targetTreeCodeType != TCT_BITS) {
+      analysis.correctives.push_back(predictedTreeCodeData);
+      if (targetTreeCodeData != predictedTreeCodeData) {
+        info |= 8;
+        analysis.correctives.push_back(targetTreeCodeData);
+      }
+    } else {
+      analysis.correctives.push_back(predictedTreeCodeData);
+      analysis.correctives.push_back(targetTreeCodeData - predictedTreeCodeData);
+    }
+    if (targetTreeCodeType != TCT_BITS) {
+      frequencies[targetTreeCodeType + 15]++;
+      l = targetTreeCodeData;
+    } else {
+      frequencies[targetTreeCodeData]++;
+      l = 1;
+    }
+    ptr += l;
+    if (count1 > l) {
+      count1 -= l;
+    } else {
+      count1 += count2;
+      count2 = 0;
+      first = true;
+      if (count1 >= l) {
+        count1 -= l;
+      } else {
+        predictionFailure = true;
+        return;
+      }
+    }
+    analysis.tokenInfo.push_back(info);
+  }
+  analysis.tokenInfo.push_back(0xff);
+  if (count1 + count2 != 0) {
+    predictionFailure = true;
+  }
+}
+
+void PreflateTreePredictor::collectTokenStatistics(
+    unsigned Lcodes[],
+    unsigned Dcodes[],
+    unsigned& Lcount,
+    unsigned& Dcount,
+    const PreflateTokenBlock& block) {
+  memset(Lcodes, 0, sizeof(unsigned) * PreflateConstants::LITLEN_CODE_COUNT);
+  memset(Dcodes, 0, sizeof(unsigned) * PreflateConstants::DIST_CODE_COUNT);
+  Lcount = 0;
+  Dcount = 0;
+  for (unsigned i = 0, n = block.tokens.size(); i < n; ++i) {
+    PreflateToken targetToken = block.tokens[i];
+    if (targetToken.len == 1) {
+      Lcodes[input.curChar()]++;
+      Lcount++;
+      input.advance(1);
+    } else {
+      Lcodes[PreflateConstants::NONLEN_CODE_COUNT + PreflateConstants::LCode(targetToken.len)]++;
+      Lcount++;
+      Dcodes[PreflateConstants::DCode(targetToken.dist)]++;
+      Dcount++;
+      input.advance(targetToken.len);
+    }
+  }
+  Lcodes[256] = 1;
+}
+unsigned PreflateTreePredictor::buildLBitlenghs(
+    unsigned char bitLengths[],
+    unsigned Lcodes[]) {
+  return calcBitLengths(bitLengths, Lcodes, PreflateConstants::LITLEN_CODE_COUNT, 15, PreflateConstants::NONLEN_CODE_COUNT);
+}
+unsigned PreflateTreePredictor::buildDBitlenghs(
+  unsigned char bitLengths[],
+  unsigned Dcodes[]) {
+  return calcBitLengths(bitLengths, Dcodes, PreflateConstants::DIST_CODE_COUNT, 15, 0);
+}
+unsigned PreflateTreePredictor::buildTCBitlengths(
+    unsigned char (&simpleCodeTree)[PreflateConstants::CODETREE_CODE_COUNT],
+    unsigned (&BLfreqs)[PreflateConstants::CODETREE_CODE_COUNT]) {
+  memset(simpleCodeTree, 0, sizeof(simpleCodeTree));
+  calcBitLengths(simpleCodeTree, BLfreqs, PreflateConstants::CODETREE_CODE_COUNT, 7, 0);
+  unsigned predictedCTreeSize = PreflateConstants::CODETREE_CODE_COUNT;
+  while (predictedCTreeSize > 4 
+         && simpleCodeTree[PreflateConstants::treeCodeOrderTable[predictedCTreeSize - 1]] == 0) {
+    --predictedCTreeSize;
+  }
+  return predictedCTreeSize;
+}
+
+void PreflateTreePredictor::analyzeBlock(
+    const unsigned blockno,
+    const PreflateTokenBlock& block) {
+  if (blockno != analysisResults.size() || predictionFailure) {
+    return;
+  }
+  analysisResults.push_back(BlockAnalysisResult());
+  BlockAnalysisResult& analysis = analysisResults[blockno];
+  analysis.blockType = block.type;
+  if (analysis.blockType != PreflateTokenBlock::DYNAMIC_HUFF) {
+    return;
+  }
+
+  unsigned Lcodes[PreflateConstants::LITLEN_CODE_COUNT], Dcodes[PreflateConstants::DIST_CODE_COUNT];
+  unsigned Lcount = 0, Dcount = 0;
+  collectTokenStatistics(Lcodes, Dcodes, Lcount, Dcount, block);
+
+  unsigned char bitLengths[PreflateConstants::LITLENDIST_CODE_COUNT];
+  memset(bitLengths, 0, sizeof(bitLengths));
+  unsigned predictedLTreeSize = buildLBitlenghs(bitLengths, Lcodes);
+  analysis.tokenInfo.push_back(predictedLTreeSize != block.nlen);
+  if (predictedLTreeSize != block.nlen) {
+    analysis.correctives.push_back(block.nlen);
+  }
+  predictedLTreeSize = block.nlen;
+
+  unsigned predictedDTreeSize = buildDBitlenghs(bitLengths + predictedLTreeSize, Dcodes);
+  analysis.tokenInfo.push_back(predictedDTreeSize != block.ndist);
+  if (predictedDTreeSize != block.ndist) {
+    analysis.correctives.push_back(block.ndist);
+  }
+  predictedDTreeSize = block.ndist;
+
+  unsigned BLfreqs[PreflateConstants::CODETREE_CODE_COUNT];
+  const unsigned char* targetCodes = &block.treecodes[0];
+  unsigned targetCodeSize = block.treecodes.size();
+  predictLDTrees(analysis, BLfreqs, bitLengths, predictedLTreeSize, predictedDTreeSize, targetCodes + block.ncode, targetCodeSize - block.ncode);
+
+  unsigned char simpleCodeTree[PreflateConstants::CODETREE_CODE_COUNT];
+  unsigned predictedCTreeSize = buildTCBitlengths(simpleCodeTree, BLfreqs);
+  analysis.tokenInfo.push_back(block.ncode);
+  analysis.tokenInfo.push_back(predictedCTreeSize != block.ncode);
+  predictedCTreeSize = block.ncode;
+  for (unsigned i = 0; i < predictedCTreeSize; ++i) {
+    unsigned predictedBL = simpleCodeTree[PreflateConstants::treeCodeOrderTable[i]];
+    analysis.correctives.push_back(predictedBL);
+    analysis.correctives.push_back(targetCodes[i] - predictedBL);
+  }
+}
+void PreflateTreePredictor::encodeBlock(
+    PreflatePredictionEncoder* codec,
+    const unsigned blockno) {
+  BlockAnalysisResult& analysis = analysisResults[blockno];
+  if (analysis.blockType != PreflateTokenBlock::DYNAMIC_HUFF) {
+    return;
+  }
+
+  unsigned infoPos = 0, correctivePos = 0;
+  unsigned char info = analysis.tokenInfo[infoPos++];
+  codec->encodeLiteralCountMisprediction(info);
+  if (info) {
+    codec->encodeValue(analysis.correctives[correctivePos++] - PreflateConstants::NONLEN_CODE_COUNT, 5);
+  }
+  info = analysis.tokenInfo[infoPos++];
+  codec->encodeDistanceCountMisprediction(info);
+  if (info) {
+    codec->encodeValue(analysis.correctives[correctivePos++], 5);
+  }
+
+  while ((info = analysis.tokenInfo[infoPos++]) != 0xff) {
+    unsigned type = (info & 3);
+    if (info & 4) {
+      unsigned newType = analysis.correctives[correctivePos++];
+      codec->encodeLDTypeCorrection(type, newType);
+      type = newType;
+    } else {
+      codec->encodeLDTypeCorrection(type, type);
+    }
+    if (type != TCT_BITS) {
+      unsigned predRepeat = analysis.correctives[correctivePos++];
+      if (info & 8) {
+        unsigned newRepeat = analysis.correctives[correctivePos++];
+        codec->encodeRepeatCountCorrection(predRepeat, newRepeat, type);
+      } else {
+        codec->encodeRepeatCountCorrection(predRepeat, predRepeat, type);
+      }
+    } else {
+      unsigned bl_pred = analysis.correctives[correctivePos++];
+      int bl_diff = analysis.correctives[correctivePos++];
+      codec->encodeLDBitLengthCorrection(bl_pred, bl_pred + bl_diff);
+    }
+  }
+  unsigned blcount = analysis.tokenInfo[infoPos++];
+  info = analysis.tokenInfo[infoPos++];
+  codec->encodeTreeCodeCountMisprediction(info);
+  if (info) {
+    codec->encodeValue(blcount - 4, 4);
+  }
+  for (unsigned i = 0; i < blcount; ++i) {
+    int bl_pred = analysis.correctives[correctivePos++];
+    int bl_diff = analysis.correctives[correctivePos++];
+    codec->encodeTreeCodeBitLengthCorrection(bl_pred, bl_pred + bl_diff);
+  }
+}
+
+void PreflateTreePredictor::updateCounters(
+    PreflateStatisticsCounter* model,
+    const unsigned blockno) {
+  BlockAnalysisResult& analysis = analysisResults[blockno];
+  if (analysis.blockType != PreflateTokenBlock::DYNAMIC_HUFF) {
+    return;
+  }
+
+  unsigned infoPos = 0, correctivePos = 0;
+  unsigned char info = analysis.tokenInfo[infoPos++];
+  model->treecode.incLiteralCountPredictionWrong(info);
+  if (info) {
+    correctivePos++;
+  }
+  info = analysis.tokenInfo[infoPos++];
+  model->treecode.incDistanceCountPredictionWrong(info);
+  if (info) {
+    correctivePos++;
+  }
+
+  while ((info = analysis.tokenInfo[infoPos++]) != 0xff) {
+    unsigned type = (info & 3);
+    model->treecode.incLDCodeTypePredictionWrong(type, (info & 4) != 0);
+    if (info & 4) {
+      unsigned newType = analysis.correctives[correctivePos++];
+      model->treecode.incLDCodeTypeReplacement(newType);
+      type = newType;
+    }
+    if (type != TCT_BITS) {
+      unsigned predRepeat = analysis.correctives[correctivePos++];
+      if (info & 8) {
+        unsigned newRepeat = analysis.correctives[correctivePos++];
+        model->treecode.incLDCodeRepeatDiffToPrediction(newRepeat - predRepeat);
+      } else {
+        model->treecode.incLDCodeRepeatDiffToPrediction(0);
+      }
+    } else {
+      /*unsigned bl_pred = analysis.correctives[*/correctivePos++/*]*/;
+      int bl_diff = analysis.correctives[correctivePos++];
+      model->treecode.incLDCodeLengthDiffToPrediction(bl_diff);
+    }
+  }
+  unsigned blcount = analysis.tokenInfo[infoPos++];
+  info = analysis.tokenInfo[infoPos++];
+  model->treecode.incTreeCodeCountPredictionWrong(info);
+  for (unsigned i = 0; i < blcount; ++i) {
+    /*int bl_pred = analysis.correctives[*/correctivePos++/*]*/;
+    int bl_diff = analysis.correctives[correctivePos++];
+    model->treecode.incTreeCodeLengthDiffToPrediction(bl_diff);
+  }
+}
+
+
+unsigned PreflateTreePredictor::reconstructLDTrees(
+    PreflatePredictionDecoder* codec,
+    unsigned* frequencies,
+    unsigned char* targetCodes,
+    const unsigned targetCodeSize,
+    const unsigned char* symBitLen,
+    const unsigned symLCount,
+    const unsigned symDCount) {
+  memset(frequencies, 0, sizeof(unsigned) * PreflateConstants::CODETREE_CODE_COUNT);
+  const unsigned char* ptr = symBitLen;
+  unsigned osize = 0;
+  unsigned count1 = symLCount;
+  unsigned count2 = symDCount;
+  bool first = true;
+  while (count1 + count2 > 0) {
+    TreeCodeType predictedTreeCodeType = predictCodeType(ptr, count1, first);
+    unsigned newType = codec->decodeLDTypeCorrection(predictedTreeCodeType);
+    switch (newType) {
+    case TCT_BITS:
+      predictedTreeCodeType = TCT_BITS;
+      break;
+    case TCT_REP:
+      predictedTreeCodeType = TCT_REP;
+      break;
+    case TCT_REPZS:
+      predictedTreeCodeType = TCT_REPZS;
+      break;
+    case TCT_REPZL:
+      predictedTreeCodeType = TCT_REPZL;
+      break;
+    }
+    unsigned char predictedTreeCodeData = predictCodeData(ptr, predictedTreeCodeType, count1, first);
+    first = false;
+    if (predictedTreeCodeType != TCT_BITS) {
+      predictedTreeCodeData = codec->decodeRepeatCountCorrection(predictedTreeCodeData, predictedTreeCodeType);
+    } else {
+      predictedTreeCodeData = codec->decodeLDBitLengthCorrection(predictedTreeCodeData);;
+    }
+    unsigned l;
+    if (predictedTreeCodeType != TCT_BITS) {
+      frequencies[predictedTreeCodeType + 15]++;
+      l = predictedTreeCodeData;
+      if (osize + 2 > targetCodeSize) {
+        predictionFailure = true;
+        break;
+      }
+      targetCodes[osize++] = predictedTreeCodeType + 15;
+      targetCodes[osize++] = predictedTreeCodeData;
+    } else {
+      frequencies[predictedTreeCodeData]++;
+      l = 1;
+      if (osize >= targetCodeSize) {
+        predictionFailure = true;
+        break;
+      }
+      targetCodes[osize++] = predictedTreeCodeData;
+    }
+    ptr += l;
+    if (count1 > l) {
+      count1 -= l;
+    } else {
+      count1 += count2;
+      count2 = 0;
+      first = true;
+      if (count1 >= l) {
+        count1 -= l;
+      } else {
+        predictionFailure = true;
+        break;
+      }
+    }
+  }
+  if (count1 + count2 != 0) {
+    predictionFailure = true;
+  }
+  return predictionFailure ? 0 : osize;
+}
+
+bool PreflateTreePredictor::decodeBlock(
+    PreflateTokenBlock& block, 
+    PreflatePredictionDecoder* codec) {
+  if (block.type != PreflateTokenBlock::DYNAMIC_HUFF) {
+    return true;
+  }
+
+  unsigned Lcodes[PreflateConstants::LITLEN_CODE_COUNT], Dcodes[PreflateConstants::DIST_CODE_COUNT];
+  unsigned Lcount = 0, Dcount = 0;
+  collectTokenStatistics(Lcodes, Dcodes, Lcount, Dcount, block);
+
+  unsigned char bitLengths[PreflateConstants::LITLENDIST_CODE_COUNT];
+  memset(bitLengths, 0, sizeof(bitLengths));
+  unsigned predictedLTreeSize = buildLBitlenghs(bitLengths, Lcodes);
+  if (codec->decodeLiteralCountMisprediction()) {
+    predictedLTreeSize = codec->decodeValue(5) + PreflateConstants::NONLEN_CODE_COUNT;
+  }
+  block.nlen = predictedLTreeSize;
+
+  unsigned predictedDTreeSize = buildDBitlenghs(bitLengths + predictedLTreeSize, Dcodes);
+  if (codec->decodeDistanceCountMisprediction()) {
+    predictedDTreeSize = codec->decodeValue(5);
+  }
+  block.ndist = predictedDTreeSize;
+
+  unsigned BLfreqs[PreflateConstants::CODETREE_CODE_COUNT];
+  unsigned char compressedLDtrees[PreflateConstants::LITLENDIST_CODE_COUNT];
+  unsigned targetCodeSize = reconstructLDTrees(codec, BLfreqs, compressedLDtrees, PreflateConstants::LITLENDIST_CODE_COUNT,
+                                               bitLengths, predictedLTreeSize, predictedDTreeSize);
+  if (predictionFailure) {
+    return false;
+  }
+
+  unsigned char simpleCodeTree[PreflateConstants::CODETREE_CODE_COUNT];
+  unsigned predictedCTreeSize = buildTCBitlengths(simpleCodeTree, BLfreqs);
+  if (codec->decodeTreeCodeCountMisprediction()) {
+    predictedCTreeSize = codec->decodeValue(4) + 4;
+  }
+  block.ncode = predictedCTreeSize;
+  unsigned char shuffledCodeTree[PreflateConstants::CODETREE_CODE_COUNT];
+  for (unsigned i = 0; i < predictedCTreeSize; ++i) {
+    unsigned predictedBL = simpleCodeTree[PreflateConstants::treeCodeOrderTable[i]];
+    shuffledCodeTree[i] = codec->decodeTreeCodeBitLengthCorrection(predictedBL);
+  }
+  block.treecodes.reserve(predictedCTreeSize + targetCodeSize);
+  block.treecodes.insert(block.treecodes.end(), shuffledCodeTree, shuffledCodeTree + predictedCTreeSize);
+  block.treecodes.insert(block.treecodes.end(), compressedLDtrees, compressedLDtrees + targetCodeSize);
+  return true;
+}
--- a/contrib/preflate/preflate_tree_predictor.h
+++ b/contrib/preflate/preflate_tree_predictor.h
@@ -0,0 +1,99 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef PREFLATE_TREE_PREDICTOR_H
+#define PREFLATE_TREE_PREDICTOR_H
+
+#include <vector>
+
+#include "preflate_constants.h"
+#include "preflate_input.h"
+#include "preflate_parameter_estimator.h"
+
+struct PreflateStatisticsCounter;
+struct PreflatePredictionDecoder;
+struct PreflatePredictionEncoder;
+
+enum TreeCodeType {
+  TCT_BITS = 0, TCT_REP = 1, TCT_REPZS = 2, TCT_REPZL = 3
+};
+
+struct PreflateTreePredictor {
+  PreflateInput input;
+  bool predictionFailure;
+
+  struct BlockAnalysisResult {
+    PreflateTokenBlock::Type blockType;
+    std::vector<unsigned char> tokenInfo;
+    std::vector<signed> correctives;
+  };
+  std::vector<BlockAnalysisResult> analysisResults;
+
+  void collectTokenStatistics(
+      unsigned LcodeFrequencies[],
+      unsigned DcodeFrequencies[],
+      unsigned& Lcount,
+      unsigned& Dcount,
+      const PreflateTokenBlock& block);
+  unsigned buildLBitlenghs(
+      unsigned char bitLengths[],
+      unsigned Lcodes[]);
+  unsigned buildDBitlenghs(
+      unsigned char bitLengths[],
+      unsigned Dcodes[]);
+  unsigned buildTCBitlengths(
+      unsigned char (&bitLengths)[PreflateConstants::CODETREE_CODE_COUNT],
+      unsigned (&BLfreqs)[PreflateConstants::CODETREE_CODE_COUNT]);
+
+
+  unsigned calcBitLengths(unsigned char* symBitLen,
+                          const unsigned* symFreq,
+                          const unsigned symCount,
+                          const unsigned maxBits,
+                          const unsigned minMaxCode);
+
+  TreeCodeType predictCodeType(const unsigned char* symBitLen,
+                       const unsigned symCount,
+                       const bool first);
+  unsigned char predictCodeData(const unsigned char* symBitLen,
+                               const TreeCodeType type,
+                               const unsigned symCount,
+                               const bool first);
+  void predictLDTrees(BlockAnalysisResult& analysis,
+                      unsigned* frequencies,
+                      const unsigned char* symBitLen,
+                      const unsigned symLCount,
+                      const unsigned symDCount,
+                      const unsigned char* targetCodes,
+                      const unsigned targetCodeSize);
+  unsigned reconstructLDTrees(PreflatePredictionDecoder* codec,
+                      unsigned* frequencies,
+                      unsigned char* targetCodes,
+                      unsigned targetCodeSize,
+                      const unsigned char* symBitLen,
+                      const unsigned symLCount,
+                      const unsigned symDCount);
+
+  PreflateTreePredictor(const std::vector<unsigned char>& dump, const size_t offset);
+  void analyzeBlock(const unsigned blockno,
+                    const PreflateTokenBlock& block);
+  void updateCounters(PreflateStatisticsCounter*,
+                   const unsigned blockno);
+  void encodeBlock(PreflatePredictionEncoder*,
+                   const unsigned blockno);
+
+  bool decodeBlock(PreflateTokenBlock& block, PreflatePredictionDecoder*);
+};
+
+#endif /* PREFLATE_TREE_PREDICTOR_H */
--- a/contrib/preflate/support/arithmetic_coder.cpp
+++ b/contrib/preflate/support/arithmetic_coder.cpp
@@ -0,0 +1,232 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include <algorithm>
+#include "arithmetic_coder.h"
+#include "array_helper.h"
+#include "bit_helper.h"
+
+const uint8_t ArithmeticCodecBase::_normCheckLUT[8] = {
+  0x33, 0x77, 0xff, 0xff, 0x33, 0x77, 0xff, 0xff
+};
+
+ArithmeticCodecBase::ArithmeticCodecBase()
+  : _low(0)
+  , _high(0x7fffffff) {}
+
+
+ArithmeticEncoder::ArithmeticEncoder(BitOutputStream& bos)
+  : _bos(bos)
+  , _e3cnt(0) {}
+
+void ArithmeticEncoder::_writeE3(const unsigned w) {
+  while (_e3cnt > 0) {
+    uint32_t todo = min(_e3cnt, 16u);
+    _bos.put(w, todo);
+    _e3cnt -= todo;
+  }
+}
+
+void ArithmeticEncoder::flush() {
+  if (_low < 0x20000000) { // case a.) 
+    _bos.put(2, 2); // write 0, 1, E3
+    _writeE3(~0u);
+  } else {
+    _bos.put(1, 1);
+  }
+  _low = 0;
+  _high = 0x7fffffff;
+}
+
+void ArithmeticEncoder::_normalize() {
+#ifdef _DEBUG
+  _ASSERT(_low <= _high && _high < 0x80000000);
+#endif
+  // write determinated bits
+  // this is the case if _low features 1 bits
+  // or _high features 0 bits
+  uint32_t lh = ~_low & _high;
+  if ((lh & 0x40000000) == 0) {
+    unsigned w = (_low & 0x40000000) != 0;
+    _bos.put(w, 1);
+    _writeE3(w - 1);
+    if ((lh & 0x20000000) == 0) {
+      unsigned l = bitLeadingZeroes((lh << 2) + 3);
+      if (l <= 16) {
+        _bos.putReverse(_low >> (30 - l), l);
+      } else {
+        _bos.putReverse(_low >> (30 - 16), 16);
+        _bos.putReverse(_low >> (30 - l), l - 16);
+      }
+      _low  = (_low << (l + 1)) & 0x7fffffff;
+      _high = (((_high + 1) << (l + 1)) - 1) & 0x7fffffff;
+    } else {
+      _low = (_low << 1) & 0x7fffffff;
+      _high = ((_high << 1) + 1) & 0x7fffffff;
+    }
+  }
+
+  // count indeterminated bits
+  lh = ~_low | _high;
+  if ((lh & 0x20000000) == 0) {
+    // low starts with 01, high starts with 10
+    unsigned l = bitLeadingZeroes((lh << 2) + 3);
+    _e3cnt += l;
+    _low = (_low << l) & 0x3fffffff;
+    _high = ((((_high + 1) << l) - 1) & 0x3fffffff)
+            | 0x40000000;
+  }
+#ifdef _DEBUG
+  _ASSERT(_low <= _high && _high < 0x80000000);
+#endif
+}
+
+ArithmeticDecoder::ArithmeticDecoder(BitInputStream& bis) 
+  : _bis(bis)
+  , _value(0) {
+  _value = _bis.getReverse(16) << 15;
+  _value |= _bis.getReverse(15);
+}
+void ArithmeticDecoder::_normalize() {
+#ifdef _DEBUG
+  _ASSERT(_low <= _value && _value <= _high && _high < 0x80000000);
+#endif
+  // skip determinated bits
+  // this is the case if _low features 1 bits
+  // or _high features 0 bits
+  uint32_t lh = ~_low & _high;
+  if ((lh & 0x40000000) == 0) {
+    //unsigned w = (_low & 0x40000000) != 0;
+    if ((lh & 0x20000000) == 0) {
+      unsigned l = bitLeadingZeroes((lh << 2) + 3);
+      _low = (_low << (l + 1)) & 0x7fffffff;
+      _high = (((_high + 1) << (l + 1)) - 1) & 0x7fffffff;
+      if (l <= 15) {
+        _value = ((_value << (l + 1)) + _bis.getReverse(l + 1)) & 0x7fffffff;
+      } else {
+        _value = ((_value << 16) + _bis.getReverse(16)) & 0x7fffffff;
+        _value = ((_value << (l - 15)) + _bis.getReverse(l - 15)) & 0x7fffffff;
+      }
+    } else {
+      _low = (_low << 1) & 0x7fffffff;
+      _high = ((_high << 1) + 1) & 0x7fffffff;
+      _value = ((_value << 1) + _bis.get(1)) & 0x7fffffff;
+    }
+  }
+
+  // count indeterminated bits
+  lh = ~_low | _high;
+  if ((lh & 0x20000000) == 0) {
+    // low starts with 01, high starts with 10
+    unsigned l = bitLeadingZeroes((lh << 2) + 3);
+    _low = (_low << l) & 0x3fffffff;
+    _high = ((((_high + 1) << l) - 1) & 0x3fffffff)
+      | 0x40000000;
+    if (l <= 16) {
+      _value = (((_value << l) + _bis.getReverse(l)) -0x40000000) & 0x7fffffff;
+    } else {
+      _value = ((_value << 16) + _bis.getReverse(16));
+      _value = (((_value << (l - 16)) + _bis.getReverse(l - 16)) - 0x40000000) & 0x7fffffff;
+    }
+  }
+#ifdef _DEBUG
+  _ASSERT(_low <= _value && _value <= _high && _high < 0x80000000);
+#endif
+}
+
+bool modelCheckFixed(unsigned bounds[], unsigned short ids[], unsigned short rids[],
+                     const unsigned N) {
+  unsigned idx = N;
+  for (unsigned i = 0; i < N; ++i) {
+    if (bounds[i]) {
+      if (idx != N) {
+        return false;
+      }
+      idx = i;
+    }
+  }
+  ids[N - 1] = idx;
+  rids[idx] = N - 1;
+  bounds[idx] = 0;
+  bounds[N] = 1 << 16;
+  return true;
+}
+
+void modelSortBounds(unsigned bounds[], unsigned short ids[], unsigned short rids[],
+                     unsigned backup[], const unsigned N) {
+  for (unsigned i = 0; i < N; ++i) {
+    ids[i] = i;
+    backup[i] = bounds[i];
+  }
+  std::sort(ids, ids + N, [=](unsigned i1, unsigned i2) {
+    if (backup[i1] != backup[i2]) {
+      return backup[i1] < backup[i2];
+    }
+    return i1 < i2;
+  });
+  for (unsigned i = 0; i < N; ++i) {
+    bounds[i] = backup[ids[i]];
+    rids[ids[i]] = i;
+  }
+}
+
+void modelRecreateBounds(unsigned bounds[], const unsigned N) {
+  unsigned sum = sumArray(bounds, N), acc, prev;
+  prev = bounds[0];
+  bounds[0] = acc = 0;
+  for (unsigned i = 0; i < N; ++i) {
+    if (prev) {
+      acc += prev;
+      prev = bounds[i + 1];
+      int diff = (((uint64_t)acc) << 16) / sum - bounds[i];
+      unsigned diff_bits = bitLength(diff);
+      const unsigned k = 5;
+      if (diff > 0 && diff_bits > k) {
+        diff = diff & (((1 << k) - 1) << (diff_bits - k));
+      }
+      bounds[i + 1] = bounds[i] + diff;
+      if (bounds[i + 1] <= bounds[i]) {
+        bounds[i + 1] = bounds[i] + 1;
+      }
+    } else {
+      prev = bounds[i + 1];
+      bounds[i + 1] = bounds[i];
+    }
+  }
+  if (bounds[N] > 0) {
+    bounds[N] = 1 << 16;
+  }
+}
+
+void ACFixedScaleBinaryModel::build() {
+  if (bounds[0] == 0 || bounds[1] == 0) {
+    _fixed = true;
+    ids[1] = bounds[0] == 0;
+    rids[ids[1]] = 1;
+    bounds[1] = bounds[0] = 0;
+    bounds[2] = 1 << 16;
+    return;
+  }
+  ids[0] = 0;
+  ids[1] = 1;
+  if (bounds[1] < bounds[0]) {
+    std::swap(ids[0], ids[1]);
+    std::swap(bounds[0], bounds[1]);
+  }
+  rids[ids[0]] = 0;
+  rids[ids[1]] = 1;
+
+  modelRecreateBounds(bounds, 2);
+}
--- a/contrib/preflate/support/arithmetic_coder.h
+++ b/contrib/preflate/support/arithmetic_coder.h
@@ -0,0 +1,260 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef ARITHMETIC_CODER_H
+#define ARITHMETIC_CODER_H
+
+#include <stdint.h>
+#include <string.h>
+#include "bitstream.h"
+#include "const_division.h"
+
+class ArithmeticCodecBase {
+public:
+  ArithmeticCodecBase();
+  
+  // array for fast check if normalization is required
+  static const uint8_t _normCheckLUT[8];
+  bool _needsNormalization() const {
+    return (_normCheckLUT[_low >> 29] & (1 << (_high >> 29))) != 0;
+  }
+  // arithmetic coding variables
+  uint32_t _low;
+  uint32_t _high;
+};
+
+class ArithmeticEncoder : public ArithmeticCodecBase {
+public:
+  ArithmeticEncoder(BitOutputStream& bos);
+  void flush();
+  void encode(const uint32_t scale, const uint32_t low, const uint32_t high) {
+    // update steps, low count, high count
+    uint32_t step = ((_high - _low) + 1) / scale;
+    _high = _low + step * high - 1;
+    _low += step * low;
+    _checkNormalize();
+  }
+  void encodeShiftScale(const uint32_t shift, const uint32_t low, const uint32_t high) {
+    // update steps, low count, high count
+    uint32_t step = ((_high - _low) + 1) >> shift;
+    _high = _low + step * high - 1;
+    _low += step * low;
+    _checkNormalize();
+  }
+  void encode(const udivider_t<32>& scale, const uint32_t low, const uint32_t high) {
+    // update steps, low count, high count
+    uint32_t step = divide((_high - _low) + 1, scale);
+    _high = _low + step * high - 1;
+    _low += step * low;
+    _checkNormalize();
+  }
+  void encodeBits(const uint32_t value, const uint32_t bits) {
+    uint32_t step = ((_high - _low) + 1) >> bits;
+    _low += step * value;
+    _high = _low + step - 1;
+    _normalize();
+  }
+
+private:
+  void _checkNormalize() {
+    if (_needsNormalization()) {
+      _normalize();
+    }
+  }
+  void _normalize();
+  void _writeE3(const unsigned w);
+
+  BitOutputStream& _bos;
+
+  // arithmetic coding variables
+  uint32_t _e3cnt;
+};
+
+class ArithmeticDecoder : public ArithmeticCodecBase {
+public:
+  ArithmeticDecoder(BitInputStream& bis);
+  unsigned decode(const uint32_t scale, const unsigned bounds[], const unsigned N) {
+    uint32_t step = ((_high - _low) + 1) / scale;
+    return _decode(step, bounds, N);
+  }
+  unsigned decodeShiftScale(const uint32_t shift, const unsigned bounds[], const unsigned N) {
+    uint32_t step = ((_high - _low) + 1) >> shift;
+    return _decode(step, bounds, N);
+  }
+  unsigned decode(const udivider_t<32>& scale, const unsigned bounds[], const unsigned N) {
+    uint32_t step = divide((_high - _low) + 1, scale);
+    return _decode(step, bounds, N);
+  }
+
+  unsigned decodeBinary(const uint32_t scale, const unsigned bounds[]) {
+    uint32_t step = ((_high - _low) + 1) / scale;
+    return _decodeBinary(step, bounds);
+  }
+  unsigned decodeBinaryShiftScale(const uint32_t shift, const unsigned bounds[]) {
+    uint32_t step = ((_high - _low) + 1) >> shift;
+    return _decodeBinary(step, bounds);
+  }
+  unsigned decodeBinary(const udivider_t<32>& scale, const unsigned bounds[]) {
+    uint32_t step = divide((_high - _low) + 1, scale);
+    return _decodeBinary(step, bounds);
+  }
+
+  unsigned decodeBits(const uint32_t bits) {
+    uint32_t step = ((_high - _low) + 1) >> bits;
+    unsigned result = (_value - _low) / step;
+    _low += step * result;
+    _high = _low + step - 1;
+    _normalize();
+    return result;
+  }
+  
+private:
+  unsigned _findIndex(const unsigned bounds[],
+                      const unsigned N,
+                      const unsigned val) {
+    for (unsigned i = N; i > 1; --i) {
+      if (val >= bounds[i - 1]) {
+        return i - 1;
+      }
+    }
+    return 0;
+  }
+
+  unsigned _decode(const uint32_t step, const unsigned bounds[], const unsigned N) {
+    uint32_t val = (_value - _low) / step;
+    unsigned result = _findIndex(bounds, N, val);
+    _high = _low + step * bounds[result + 1] - 1;
+    _low += step * bounds[result];
+    _checkNormalize();
+    return result;
+  }
+  unsigned _decodeBinary(const uint32_t step, const unsigned bounds[]) {
+    unsigned result = (_value >= _low + bounds[1] * step);
+    _high = _low + step * bounds[result + 1] - 1;
+    _low += step * bounds[result];
+    _checkNormalize();
+    return result;
+  }
+  void _checkNormalize() {
+    if (_needsNormalization()) {
+      _normalize();
+    }
+  }
+  void _normalize();
+
+  BitInputStream& _bis;
+
+  // arithmetic coding variables
+  uint32_t _value;
+};
+
+bool modelCheckFixed(unsigned bounds[], unsigned short ids[], unsigned short rids[],
+                     const unsigned N);
+void modelSortBounds(unsigned bounds[], unsigned short ids[], unsigned short rids[],
+                     unsigned backup[], const unsigned N);
+void modelRecreateBounds(unsigned bounds[], const unsigned N);
+
+template <unsigned N>
+struct ACModelBase {
+  static const unsigned L = N;
+  bool isEqualTo(const ACModelBase& m) const {
+    for (unsigned i = 0; i < N; ++i) {
+      if (bounds[i] != m.bounds[i]) {
+        return false;
+      }
+      if (bounds[i + 1] > 0 && ids[i] != m.ids[i]) {
+        return false;
+      }
+    }
+    if (bounds[N] != m.bounds[N]) {
+      return false;
+    }
+    return true;
+  }
+
+  unsigned bounds[N + 1];
+  unsigned short ids[N], rids[N];
+  bool _fixed;
+};
+
+struct ACFixedScaleBinaryModel : public ACModelBase<2> {
+  ACFixedScaleBinaryModel() {}
+  ACFixedScaleBinaryModel(const unsigned(&arr)[2]) {
+    memcpy(this->bounds, arr, sizeof(arr));
+    build();
+  }
+  void build();
+  void encode(ArithmeticEncoder* encoder, const unsigned item) {
+    if (!this->_fixed) {
+      unsigned pos = this->rids[item];
+      encoder->encodeShiftScale(16, this->bounds[pos], this->bounds[pos + 1]);
+    }
+  }
+#if 0
+  unsigned decode(aricoder* codec) {
+    symbol s;
+    s.scale = 1 << 16;
+    unsigned cnt = codec->decode_count(&s);
+    for (unsigned i = 0; i < N; ++i) {
+      if (cnt < bounds[i + 1]) {
+        s.low_count = bounds[i];
+        s.high_count = bounds[i + 1];
+        codec->decode(&s);
+        return ids[i];
+      }
+    }
+    return 0;
+  }
+#endif
+};
+
+template <unsigned N>
+struct ACFixedScaleModel : public ACModelBase<N> {
+  ACFixedScaleModel() {}
+  ACFixedScaleModel(const unsigned(&arr)[N]) {
+    memcpy(this->bounds, arr, sizeof(arr));
+    build();
+  }
+  void build() {
+    unsigned backup[N];
+    if (!(this->_fixed = modelCheckFixed(this->bounds, this->ids, this->rids, N))) {
+      modelSortBounds(this->bounds, this->ids, this->rids, backup, N);
+      modelRecreateBounds(this->bounds, N);
+    }
+  }
+  void encode(ArithmeticEncoder* encoder, const unsigned item) {
+    if (!this->_fixed) {
+      unsigned pos =this->rids[item];
+      encoder->encodeShiftScale(16, this->bounds[pos], this->bounds[pos + 1]);
+    }
+  }
+#if 0
+  unsigned decode(aricoder* codec) {
+    symbol s;
+    s.scale = 1 << 16;
+    unsigned cnt = codec->decode_count(&s);
+    for (unsigned i = 0; i < N; ++i) {
+      if (cnt < bounds[i + 1]) {
+        s.low_count = bounds[i];
+        s.high_count = bounds[i + 1];
+        codec->decode(&s);
+        return this->ids[i];
+      }
+    }
+    return 0;
+  }
+#endif
+};
+
+#endif /* ARITHMETIC_CODER_H */
--- a/contrib/preflate/support/array_helper.cpp
+++ b/contrib/preflate/support/array_helper.cpp
@@ -0,0 +1,24 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include "array_helper.h"
+
+unsigned sumArray(const unsigned* data, const unsigned n) {
+  unsigned sum = 0;
+  for (unsigned i = 0; i < n; ++i) {
+    sum += data[i];
+  }
+  return sum;
+}
--- a/contrib/preflate/support/array_helper.h
+++ b/contrib/preflate/support/array_helper.h
@@ -0,0 +1,25 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef ARRAY_HELPER_H
+#define ARRAY_HELPER_H
+
+unsigned sumArray(const unsigned* data, const unsigned n);
+
+template <unsigned N>
+inline unsigned sumArray(const unsigned (&data)[N]) {
+  return sumArray(data, N);
+}
+
+#endif /* ARRAY_HELPER_H */
--- a/contrib/preflate/support/bit_helper.cpp
+++ b/contrib/preflate/support/bit_helper.cpp
@@ -0,0 +1,73 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include "bit_helper.h"
+
+unsigned bitLength(unsigned value) {
+  unsigned l = 0;
+  while (value > 0) {
+    l++;
+    value >>= 1;
+  }
+  return l;
+}
+
+static unsigned char reverse4[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15};
+static unsigned bitReverse8(const unsigned value) {
+  return (reverse4[value & 0x0f] << 4) | reverse4[(value >> 4) & 0x0f];
+}
+static unsigned bitReverse16(const unsigned value) {
+  return (bitReverse8(value & 0xff) << 8) | bitReverse8(value >> 8);
+}
+static unsigned bitReverse32(const unsigned value) {
+  return (bitReverse16(value & 0xffff) << 16) | bitReverse16(value >> 16);
+}
+unsigned bitReverse(const unsigned value, const unsigned bits) {
+  if (bits <= 8) {
+    return bitReverse8(value) >> (8 - bits);
+  }
+  if (bits <= 16) {
+    return bitReverse16(value) >> (16 - bits);
+  }
+  return bitReverse32(value) >> (32 - bits);
+}
+
+static unsigned char leading4[16] = {4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0};
+unsigned bitLeadingZeroes(const unsigned value_) {
+  if (value_ == 0) {
+    return 32;
+  }
+  unsigned value = value_;
+  unsigned result = 0;
+  while ((value & 0xf0000000) == 0) {
+    value <<= 4;
+    result += 4;
+  }
+  return result + leading4[value >> 28];
+}
+static unsigned char trailing4[16] = {4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0};
+unsigned bitTrailingZeroes(const unsigned value_) {
+  if (value_ == 0) {
+    return 32;
+  }
+  unsigned value = value_;
+  unsigned result = 0;
+  while ((value & 0xf) == 0) {
+    value >>= 4;
+    result += 4;
+  }
+  return result + trailing4[value & 0xf];
+}
+
--- a/contrib/preflate/support/bit_helper.h
+++ b/contrib/preflate/support/bit_helper.h
@@ -0,0 +1,23 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef BIT_HELPER_H
+#define BIT_HELPER_H
+
+unsigned bitLength(unsigned value);
+unsigned bitReverse(const unsigned value, const unsigned bits);
+unsigned bitLeadingZeroes(const unsigned value);
+unsigned bitTrailingZeroes(const unsigned value);
+
+#endif /* BIT_HELPER_H */
--- a/contrib/preflate/support/bitstream.cpp
+++ b/contrib/preflate/support/bitstream.cpp
@@ -0,0 +1,176 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include <algorithm>
+#include <memory.h>
+#include "bitstream.h"
+
+BitInputStream::BitInputStream(InputStream& is)
+  : _input(is)
+  , _bufPos(0)
+  , _bufSize(0)
+  , _bufFastLimit(0)
+  , _eof(false)
+  , _bits(0)
+  , _bitsRemaining(0)
+  , _totalBitPos(0)
+{}
+
+void BitInputStream::_fillBytes() {
+  // free space in bit buffer
+  if (_bufPos >= _bufFastLimit) {
+    if (!_eof) {
+      unsigned remaining = _bufSize - _bufPos;
+      memcpy(_buffer + PRE_BUF_EXTRA - remaining,
+             _buffer + _bufPos, remaining);
+      _bufPos = PRE_BUF_EXTRA - remaining;
+      _bufSize = PRE_BUF_EXTRA + _input.read(_buffer + PRE_BUF_EXTRA, BUF_SIZE);
+      _bufFastLimit = max(_bufPos, _bufSize - PRE_BUF_EXTRA);
+      _eof = _bufSize != PRE_BUF_EXTRA + BUF_SIZE;
+    }
+  }
+}
+void BitInputStream::_fill() {
+  // free space in bit buffer
+  if (_bufPos >= _bufFastLimit) {
+    if (!_eof) {
+      _fillBytes();
+    }
+    while (_bitsRemaining <= BITS - 8 && _bufPos < _bufSize) {
+      _bits |= ((size_t)_buffer[_bufPos++]) << _bitsRemaining;
+      _bitsRemaining += 8;
+    }
+    return;
+  }
+  while (_bitsRemaining <= BITS - 8) {
+    _bits |= ((size_t)_buffer[_bufPos++]) << _bitsRemaining;
+    _bitsRemaining += 8;
+  }
+}
+size_t BitInputStream::copyBytesTo(OutputStream& output, const size_t len) {
+  if (_bitsRemaining & 7) {
+    return 0;
+  }
+  uint8_t a[sizeof(_bits)];
+  size_t l = 0;
+  while (_bitsRemaining > 0 && l < len) {
+    a[l++] = _bits & 0xff;
+    _bitsRemaining -= 8;
+    _bits >>= 8;
+    _totalBitPos += 8;
+  }
+  size_t w = output.write(a, l);
+  if (w != l) {
+    return w;
+  }
+  while (l < len) {
+    unsigned todo = min(len - l, (size_t)(_bufSize - _bufPos));
+    w = output.write(_buffer + _bufPos, todo);
+    _totalBitPos += 8 * w;
+    _bufPos += w;
+    l += w;
+    if (w != todo || eof()) {
+      return l;
+    }
+    _fillBytes();
+  }
+  return l;
+}
+size_t BitInputStream::getBytes(uint8_t* data, const size_t size_) {
+  skipToByte();
+  size_t size = size_;
+  while (_bitsRemaining > 0 && size > 0) {
+    *data++ = _bits & 0xff;
+    _bitsRemaining -= 8;
+    _bits >>= 8;
+    _totalBitPos += 8;
+    size--;
+  }
+  while (size > 0) {
+    unsigned todo = min(size, (size_t)(_bufSize - _bufPos));
+    memcpy(data, _buffer + _bufPos, todo);
+    data += todo;
+    _totalBitPos += 8 * todo;
+    _bufPos += todo;
+    size -= todo;
+    if (eof()) {
+      return size_ - size;
+    }
+    _fillBytes();
+  }
+  return size_;
+}
+uint64_t BitInputStream::getVLI() {
+  uint64_t result = 0, o = 0;
+  unsigned s = 0, c;
+  unsigned bitsRemaining = ((_bitsRemaining - 1) & 7) + 1;
+  unsigned limit = 1 << (bitsRemaining - 1);
+  while ((c = get(bitsRemaining)) >= limit) {
+    result += ((uint64_t)(c & (limit - 1))) << s;
+    s += (bitsRemaining - 1);
+    o = (o + 1) << (bitsRemaining - 1);
+    bitsRemaining = 8;
+    limit = 128;
+  }
+  return result + o + (((uint64_t)c) << s);
+}
+
+BitOutputStream::BitOutputStream(OutputStream& output)
+  : _output(output)
+  , _bufPos(0)
+  , _bits(0)
+  , _bitPos(0) {}
+
+void BitOutputStream::_flush() {
+  while (_bitPos >= 8) {
+    _buffer[_bufPos++] = _bits & 0xff;
+    _bits >>= 8;
+    _bitPos -= 8;
+  }
+  if (_bufPos >= BUF_SIZE) {
+    _output.write(_buffer, BUF_SIZE);
+    memcpy(_buffer, _buffer + BUF_SIZE, _bufPos - BUF_SIZE);
+    _bufPos -= BUF_SIZE;
+  }
+}
+void BitOutputStream::flush() {
+  _flush();
+
+  if (_bitPos > 0) {
+    _buffer[_bufPos++] = _bits & 0xff;
+    _bits   = 0;
+    _bitPos = 0;
+  }
+
+  _output.write(_buffer, _bufPos);
+  _bufPos = 0;
+}
+void BitOutputStream::putBytes(const uint8_t* data, const size_t size) {
+  flush();
+  _output.write(data, size);
+}
+void BitOutputStream::putVLI(const uint64_t size_) {
+  uint64_t size = size_;
+  unsigned bitsRemaining = 8 - (_bitPos & 7);
+  unsigned limit = 1 << (bitsRemaining - 1);
+  while (size >= limit) {
+    put(size | limit, bitsRemaining);
+    size = (size >> (bitsRemaining - 1)) - 1;
+    bitsRemaining = 8;
+    limit = 128;
+  }
+  put(size, bitsRemaining);
+ 
+}
--- a/contrib/preflate/support/bitstream.h
+++ b/contrib/preflate/support/bitstream.h
@@ -0,0 +1,130 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef BITSTREAM_H
+#define BITSTREAM_H
+
+#include <algorithm>
+#include "bit_helper.h"
+#include "stream.h"
+
+// Huffman decoder for little endian 
+class BitInputStream {
+public:
+  BitInputStream(InputStream&);
+
+  bool eof() const {
+    return _eof && _bufPos == _bufSize && !_bitsRemaining;
+  }
+
+  size_t bitPos() const {
+    return _totalBitPos;
+  }
+
+  size_t peek(const unsigned n) {
+    if (_bitsRemaining < n) {
+      _fill();
+    }
+    return _bits & ((1 << n) - 1);
+  }
+  void skip(const unsigned n) {
+    _bitsRemaining -= min(n, _bitsRemaining);
+    _bits >>= n;
+    _totalBitPos += n;
+  }
+  size_t get(const unsigned n) {
+    size_t v = peek(n);
+    skip(n);
+    return v;
+  }
+  size_t getReverse(const unsigned n) {
+    return bitReverse(get(n), n);
+  }
+  void skipToByte() {
+    skip(_bitsRemaining & 7);
+  }
+  bool checkLastBitsOfByteAreZero() {
+    return peek(_bitsRemaining & 7) == 0;
+  }
+  void fastFill(const unsigned n) {
+    if (_bitsRemaining < n) {
+      _fill();
+    }
+  }
+  size_t fastPeek(const unsigned n) {
+    return _bits & ((1 << n) - 1);
+  }
+  size_t fastGet(const unsigned n) {
+    size_t v = fastPeek(n);
+    skip(n);
+    return v;
+  }
+  size_t copyBytesTo(OutputStream& output, const size_t len);
+  size_t getBytes(uint8_t* data, const size_t size);
+  uint64_t getVLI();
+
+private:
+  void _fillBytes();
+  void _fill();
+
+  enum { BUF_SIZE = 1024, PRE_BUF_EXTRA = 16, BITS = sizeof(size_t)*8 };
+
+  InputStream& _input;
+  unsigned char _buffer[PRE_BUF_EXTRA + BUF_SIZE];
+  unsigned _bufPos, _bufSize, _bufFastLimit;
+  bool _eof;
+  size_t _bits;
+  unsigned _bitsRemaining;
+  size_t _totalBitPos;
+};
+
+class BitOutputStream {
+public:
+  BitOutputStream(OutputStream&);
+
+  void put(const size_t value, const unsigned n) {
+    if (_bitPos + n >= BITS) {
+      _flush();
+    }
+    _bits   |= (value & ((1 << n) - 1)) << _bitPos;
+    _bitPos += n;
+  }
+  void putReverse(const size_t value, const unsigned n) {
+    put(bitReverse(value, n), n);
+  }
+  void fillByte() {
+    _bitPos = (_bitPos + 7) & ~7;
+  }
+  void flush();
+  unsigned bitPos() const {
+    return _bitPos;
+  }
+  void putBytes(const uint8_t* data, const size_t size);
+  void putVLI(const uint64_t size);
+
+private:
+  void _flush();
+
+  enum {
+    BUF_SIZE = 1024, BUF_EXTRA = 64, BITS = sizeof(size_t) * 8
+  };
+
+  OutputStream& _output;
+  unsigned char _buffer[BUF_SIZE + BUF_EXTRA];
+  unsigned _bufPos;
+  size_t _bits;
+  unsigned _bitPos;
+};
+
+#endif /* BITSTREAM_H */
--- a/contrib/preflate/support/const_division.cpp
+++ b/contrib/preflate/support/const_division.cpp
@@ -0,0 +1,115 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include "bit_helper.h"
+#include "const_division.h"
+
+// Based on "N-Bit Unsigned Division Via N-Bit Multiply-Add"
+// by Robison
+
+template <unsigned N>
+udivider_t<N> build_udivider(const typename divider_uint_t<N>::type d) {
+  typedef typename divider_uint_t<N * 2>::type T1;
+  typedef typename divider_uint_t<N>::type T2;
+  udivider_t<N> result;
+  result.shift = bitLength(d) - 1;
+  if ((d & (d - 1)) == 0) {
+    result.magic1 = result.magic2 = ~(T2)0;
+  } else {
+    T2 shm = 1 << result.shift;
+    T2 t = (((T1)shm) << N) / d;
+    T2 r = t * d + d;
+    if (r <= shm) {
+      result.magic1 = t + 1;
+      result.magic2 = 0;
+    } else {
+      result.magic1 = t;
+      result.magic2 = t;
+    }
+  }
+  return result;
+}
+
+udivider_t<16> build_udivider_16(const uint16_t d) {
+  return build_udivider<16>(d);
+}
+udivider_t<32> build_udivider_32(const uint32_t d) {
+  return build_udivider<32>(d);
+}
+
+template <unsigned N>
+ucdivider_t<N> build_ucdivider(const typename divider_uint_t<N>::type d) {
+  typedef typename divider_uint_t<N * 2>::type T1;
+  typedef typename divider_uint_t<N>::type T2;
+  ucdivider_t<N> result;
+  result.ctrl = bitLength(d) - 1;
+  if ((d & (d - 1)) == 0) {
+    result.magic = ~(T2)0;
+    result.ctrl |= 0x80;
+  } else {
+    T2 shm = 1 << result.ctrl;
+    T2 t = (((T1)shm) << N) / d;
+    T2 r = t * d + d;
+    if (r <= shm) {
+      result.magic = t + 1;
+    } else {
+      result.magic = t;
+      result.ctrl |= 0x80;
+    }
+  }
+  return result;
+}
+
+ucdivider_t<16> build_ucdivider_16(const uint16_t d) {
+  return build_ucdivider<16>(d);
+}
+ucdivider_t<32> build_ucdivider_32(const uint32_t d) {
+  return build_ucdivider<32>(d);
+}
+
+template <unsigned N>
+sdivider_t<N> build_sdivider(const typename divider_int_t<N>::type d_) {
+  sdivider_t<N> result;
+  udivider_t<N> uresult = build_udivider<N>(d_ < 0 ? -d_ : d_);
+  result.magic1 = uresult.magic1;
+  result.magic2 = uresult.magic2;
+  result.shift = uresult.shift;
+  result.sign = d_ < 0 ? -1 : 0;
+  return result;
+}
+sdivider_t<16> build_sdivider_16(const int16_t d) {
+  return build_sdivider<16>(d);
+}
+sdivider_t<32> build_sdivider_32(const int32_t d) {
+  return build_sdivider<32>(d);
+}
+
+template <unsigned N>
+scdivider_t<N> build_scdivider(const typename divider_int_t<N>::type d_) {
+  scdivider_t<N> result;
+  ucdivider_t<N> uresult = build_ucdivider<N>(d_ < 0 ? -d_ : d_);
+  result.magic = uresult.magic;
+  result.ctrl  = uresult.ctrl;
+  if (d_ < 0) {
+    result.ctrl |= 0x40;
+  }
+  return result;
+}
+scdivider_t<16> build_scdivider_16(const int16_t d) {
+  return build_scdivider<16>(d);
+}
+scdivider_t<32> build_scdivider_32(const int32_t d) {
+  return build_scdivider<32>(d);
+}
--- a/contrib/preflate/support/const_division.h
+++ b/contrib/preflate/support/const_division.h
@@ -0,0 +1,170 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef CONST_DIVISION_H
+#define CONST_DIVISION_H
+
+#include <stdint.h>
+
+template <unsigned N> struct divider_int_t;
+template <unsigned N> struct divider_uint_t;
+template <> struct divider_int_t<16> {
+  typedef int16_t type; 
+};
+template <> struct divider_int_t<32> {
+  typedef int32_t type;
+};
+template <> struct divider_uint_t<16> {
+  typedef uint16_t type;
+};
+template <> struct divider_uint_t<32> {
+  typedef uint32_t type;
+};
+template <> struct divider_uint_t<64> {
+  typedef uint64_t type;
+};
+
+
+template <unsigned N>
+struct udivider_t {
+  typename divider_uint_t<N>::type magic1; // factor
+  typename divider_uint_t<N>::type magic2; // addend
+  uint8_t shift;
+};
+
+template <unsigned N>
+struct ucdivider_t {
+  typename divider_uint_t<N>::type magic; // factor/addend
+  uint8_t ctrl; // bits 0..3/4/5 - shift, bit 7 - add required
+};
+
+// If it wasn't for +/-1, the signed dividers wouldn't
+// need the add-term (magic2), as they could just
+// use a factor (magic1) with one more bit precision.
+template <unsigned N>
+struct sdivider_t {
+  typename divider_uint_t<N>::type magic1; // factor
+  typename divider_uint_t<N>::type magic2; // addend
+  uint8_t shift; 
+  int8_t sign; // -1 if negative, 0 otherwise
+};
+
+template <unsigned N>
+struct scdivider_t {
+  typename divider_uint_t<N>::type magic;
+  uint8_t ctrl; // bits 0..3/4/5 - shift, bit 6 - negative, bit 7 - add required
+};
+
+udivider_t<16> build_udivider_16(const uint16_t d);
+udivider_t<32> build_udivider_32(const uint32_t d);
+
+ucdivider_t<16> build_ucdivider_16(const uint16_t d);
+ucdivider_t<32> build_ucdivider_32(const uint32_t d);
+
+sdivider_t<16> build_sdivider_16(const int16_t d);
+sdivider_t<32> build_sdivider_32(const int32_t d);
+
+scdivider_t<16> build_scdivider_16(const int16_t d);
+scdivider_t<32> build_scdivider_32(const int32_t d);
+
+template <unsigned N1, unsigned N2>
+inline typename divider_uint_t<N1>::type 
+divide_template(const typename divider_uint_t<N1>::type dividend, 
+                const udivider_t<N2>& divisor) {
+  typedef typename divider_uint_t<N1 * 2>::type T1;
+  typedef typename divider_uint_t<N1>::type T2;
+  T1 t = ((T1)dividend) * divisor.magic1 + divisor.magic2;
+  T2 u = (T2)(t >> N2);
+  return u >> divisor.shift;
+}
+template <unsigned N1, unsigned N2>
+inline typename divider_uint_t<N1>::type
+divide_template(const typename divider_uint_t<N1>::type dividend,
+                const ucdivider_t<N2>& divisor) {
+  typedef typename divider_uint_t<N1 * 2>::type T1;
+  typedef typename divider_uint_t<N1>::type T2;
+  T1 t = ((T1)dividend) * divisor.magic
+        + (divisor.ctrl & 0x80 ? divisor.magic : 0);
+  T2 u = (T2)(t >> N2);
+  return u >> (divisor.ctrl & (N2 - 1));
+}
+template <unsigned N1, unsigned N2>
+inline typename divider_int_t<N1>::type
+divide_template(const typename divider_int_t<N1>::type dividend,
+                const sdivider_t<N2>& divisor) {
+  typedef typename divider_uint_t<N1 * 2>::type T1;
+  typedef typename divider_uint_t<N1>::type T2;
+  T2 s = dividend < 0 ? -1 : 0;
+  T1 t = ((T1)(T2)((dividend ^ s) - s)) * divisor.magic1 
+        + divisor.magic2;
+  T2 u = (T2)(t >> N2) >> divisor.shift;
+  s ^= divisor.sign;
+  return (u ^ s) - s;
+}
+template <unsigned N1, unsigned N2>
+inline typename divider_int_t<N1>::type
+divide_template(const typename divider_int_t<N1>::type dividend,
+                const scdivider_t<N2>& divisor) {
+  typedef typename divider_uint_t<N1 * 2>::type T1;
+  typedef typename divider_uint_t<N1>::type T2;
+  
+  T2 s = dividend < 0 ? -1 : 0;
+  T1 t = ((T1)(T2)((dividend ^ s) - s)) * divisor.magic
+        + (divisor.ctrl & 0x80 ? divisor.magic : 0);
+  T2 u = (T2)(t >> N2) >> (divisor.ctrl & (N2 - 1));
+  s ^= (divisor.ctrl & 0x40 ? -1 : 0);
+  return (u ^ s) - s;
+}
+
+inline uint16_t divide(const uint16_t dividend, const udivider_t<16>& divisor) {
+  return divide_template<16, 16>(dividend, divisor);
+}
+inline uint32_t divide(const uint32_t dividend, const udivider_t<16>& divisor) {
+  return divide_template<32, 16>(dividend, divisor);
+}
+inline uint32_t divide(const uint32_t dividend, const udivider_t<32>& divisor) {
+  return divide_template<32, 32>(dividend, divisor);
+}
+
+inline uint16_t divide(const uint16_t dividend, const ucdivider_t<16>& divisor) {
+  return divide_template<16, 16>(dividend, divisor);
+}
+inline uint32_t divide(const uint32_t dividend, const ucdivider_t<16>& divisor) {
+  return divide_template<32, 16>(dividend, divisor);
+}
+inline uint32_t divide(const uint32_t dividend, const ucdivider_t<32>& divisor) {
+  return divide_template<32, 32>(dividend, divisor);
+}
+
+inline int16_t divide(const int16_t dividend, const sdivider_t<16>& divisor) {
+  return divide_template<16, 16>(dividend, divisor);
+}
+inline int32_t divide(const int32_t dividend, const sdivider_t<16>& divisor) {
+  return divide_template<32, 16>(dividend, divisor);
+}
+inline int32_t divide(const int32_t dividend, const sdivider_t<32>& divisor) {
+  return divide_template<32, 32>(dividend, divisor);
+}
+
+inline int16_t divide(const int16_t dividend, const scdivider_t<16>& divisor) {
+  return divide_template<16, 16>(dividend, divisor);
+}
+inline int32_t divide(const int32_t dividend, const scdivider_t<16>& divisor) {
+  return divide_template<32, 16>(dividend, divisor);
+}
+inline int32_t divide(const int32_t dividend, const scdivider_t<32>& divisor) {
+  return divide_template<32, 32>(dividend, divisor);
+}
+
+#endif /* CONST_DIVISION_H */
--- a/contrib/preflate/support/filestream.cpp
+++ b/contrib/preflate/support/filestream.cpp
@@ -0,0 +1,41 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include <algorithm>
+#include <string.h>
+#include <stdio.h>
+#include "filestream.h"
+
+FileStream::FileStream(FILE* f) : _f(f) {}
+
+bool FileStream::eof() const {
+  return feof(_f);
+}
+size_t FileStream::read(unsigned char* buffer, const size_t size) {
+  return fread(buffer, 1, size, _f);
+}
+
+size_t FileStream::write(const unsigned char* buffer, const size_t size) {
+  return fwrite(buffer, 1, size, _f);
+}
+
+uint64_t FileStream::tell() const {
+  return _ftelli64(_f);
+}
+uint64_t FileStream::seek(const uint64_t newPos) {
+  uint64_t oldPos = tell();
+  _fseeki64(_f, newPos, SEEK_SET);
+  return oldPos;
+}
--- a/contrib/preflate/support/filestream.h
+++ b/contrib/preflate/support/filestream.h
@@ -0,0 +1,38 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef FILESTREAM_H
+#define FILESTREAM_H
+
+#include <stdint.h>
+#include <vector>
+#include "stream.h"
+
+class FileStream : public SeekableInputOutputStream {
+public:
+  FileStream(FILE* f);
+
+  virtual bool eof() const;
+  virtual size_t read(unsigned char* buffer, const size_t size);
+
+  virtual size_t write(const unsigned char* buffer, const size_t size);
+
+  virtual uint64_t tell() const;
+  virtual uint64_t seek(const uint64_t newPos);
+
+private:
+  FILE* _f;
+};
+
+#endif /* FILESTREAM_H */
--- a/contrib/preflate/support/huffman_decoder.cpp
+++ b/contrib/preflate/support/huffman_decoder.cpp
@@ -0,0 +1,111 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include <algorithm>
+#include "huffman_decoder.h"
+#include "huffman_helper.h"
+#include "bit_helper.h"
+
+HuffmanDecoder::HuffmanDecoder(
+    const unsigned char* symbolBitLengths,
+    const size_t symbolCount,
+    const bool disableZeroBitSymbols,
+    const unsigned char maxBitsPerTable
+) : _error(false) {
+  if (!_constructTables(symbolBitLengths, symbolCount, disableZeroBitSymbols, maxBitsPerTable)) {
+    _constructErrorTable();
+  }
+}
+
+size_t HuffmanDecoder::_decodeDeeper(
+    BitInputStream& bis, 
+    const size_t tableId_
+) const {
+  bis.skip(_table0.peekBits);
+  size_t tableId = tableId_;
+  do {
+    const Table* table = &_tables[tableId];
+    size_t v = bis.peek(table->peekBits);
+    signed short w = table->lookup[v];
+    if (w >= 0) {
+      bis.skip(w & 0xf);
+      return w >> 4;
+    }
+    bis.skip(table->peekBits);
+    tableId = ~w;
+  } while (true);
+}
+bool HuffmanDecoder::_constructTables(
+    const unsigned char* symbolBitLengths,
+    const size_t symbolCount,
+    const bool disableZeroBitSymbols,
+    const unsigned char maxBitsPerTable
+) {
+  if (maxBitsPerTable < 1 || maxBitsPerTable > 15) {
+    return false;
+  }
+  unsigned nextCode[HuffmanHelper::MAX_BL + 2];
+  unsigned char minLength, maxLength;
+  if (!HuffmanHelper::countSymbols(nextCode, minLength, maxLength,
+                                   symbolBitLengths, symbolCount, 
+                                   disableZeroBitSymbols)) {
+    return false;
+  }
+
+  _table0.peekBits = min((unsigned char)(maxLength - 1), maxBitsPerTable);
+  _table0.lookup.resize(1 << _table0.peekBits);
+  std::fill(_table0.lookup.begin(), _table0.lookup.end(), 0);
+
+  unsigned char minL = disableZeroBitSymbols ? 2 : 1;
+
+  for (unsigned i = 0; i < symbolCount; ++i) {
+    unsigned char l = (unsigned char)(symbolBitLengths[i] + 1);
+    if (l < minL) {
+      continue;
+    }
+    unsigned char k = l - 1, maxK = maxLength - 1;
+    unsigned code = bitReverse(nextCode[l]++, k);
+    Table* t = &_table0;
+    while (k > t->peekBits) {
+      k -= t->peekBits;
+      maxK -= t->peekBits;
+      unsigned subbits = code & ((1 << t->peekBits) - 1);
+      code >>= t->peekBits;
+      signed short v = t->lookup[subbits];
+      if (v >= 0) {
+        unsigned newTableId = _tables.size();
+        t->lookup[subbits] = ~newTableId;
+        _tables.push_back(Table());
+        t = &_tables[newTableId];
+        t->peekBits = min(maxK, maxBitsPerTable);
+        t->lookup.resize(1 << t->peekBits);
+        std::fill(t->lookup.begin(), t->lookup.end(), 0);
+      } else {
+        t = &_tables[~v];
+      }
+    }
+    do {
+      t->lookup[code] = (i << 4) | k;
+      code += 1 << k;
+    } while (code < t->lookup.size());
+  }
+  return true;
+}
+void HuffmanDecoder::_constructErrorTable() {
+  _error = true;
+  _table0.peekBits = 0;
+  _table0.lookup.resize(1);
+  _table0.lookup[0] = 0;
+}
--- a/contrib/preflate/support/huffman_decoder.h
+++ b/contrib/preflate/support/huffman_decoder.h
@@ -0,0 +1,62 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef HUFFMAN_DECODER_H
+#define HUFFMAN_DECODER_H
+
+#include <vector>
+#include "bitstream.h"
+
+// Huffman decoder
+class HuffmanDecoder {
+public:
+  HuffmanDecoder(const unsigned char* symbolBitLengths,
+                 const size_t symbolCount,
+                 const bool disableZeroBitSymbols,
+                 const unsigned char maxBitsPerTable);
+
+  bool error() const {
+    return _error;
+  }
+
+  size_t decode(BitInputStream& bis) const {
+    size_t v = bis.peek(_table0.peekBits);
+    signed short w = _table0.lookup[v];
+    if (w >= 0) {
+      bis.skip(w & 0xf);
+      return w >> 4;
+    }
+    return _decodeDeeper(bis, ~w);
+  }
+
+private:
+  size_t _decodeDeeper(BitInputStream& bis, const size_t tableId) const;
+  bool _constructTables(const unsigned char* symbolBitLengths,
+                        const size_t symbolCount,
+                        const bool disableZeroBitSymbols,
+                        const unsigned char maxBitsPerTable);
+  void _constructErrorTable();
+
+private:
+  struct Table {
+    unsigned char peekBits;
+    std::vector<signed short> lookup;
+  };
+
+  Table _table0;
+  std::vector<Table> _tables;
+  bool _error;
+};
+
+#endif /* HUFFMAN_DECODER_H */
--- a/contrib/preflate/support/huffman_encoder.cpp
+++ b/contrib/preflate/support/huffman_encoder.cpp
@@ -0,0 +1,65 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include <algorithm>
+#include "huffman_encoder.h"
+#include "huffman_helper.h"
+#include "bit_helper.h"
+
+HuffmanEncoder::HuffmanEncoder(
+    const unsigned char* symbolBitLengths,
+    const unsigned symbolCount,
+    const bool disableZeroBitSymbols
+) : _error(false) {
+  if (!_constructTables(symbolBitLengths, symbolCount, disableZeroBitSymbols)) {
+    _constructErrorTable(symbolCount);
+  }
+}
+
+bool HuffmanEncoder::_constructTables(
+    const unsigned char* symbolBitLengths,
+    const unsigned symbolCount,
+    const bool disableZeroBitSymbols
+) {
+  unsigned nextCode[HuffmanHelper::MAX_BL + 2];
+  unsigned char minLength, maxLength;
+  if (!HuffmanHelper::countSymbols(nextCode, minLength, maxLength,
+                                   symbolBitLengths, symbolCount,
+                                   disableZeroBitSymbols)) {
+    return false;
+  }
+
+  unsigned char minL = disableZeroBitSymbols ? 2 : 1;
+
+  _lookup.resize(symbolCount);
+  for (unsigned i = 0; i < symbolCount; ++i) {
+    unsigned char l = (unsigned char)(symbolBitLengths[i] + 1);
+    if (l < minL) {
+      _lookup[i] = 0;
+      continue;
+    }
+    unsigned char k = l - 1;
+    unsigned code = bitReverse(nextCode[l]++, k);
+    _lookup[i] = (code << 5) | k;
+  }
+  return true;
+}
+void HuffmanEncoder::_constructErrorTable(
+    const unsigned symbolCount
+) {
+  _error = true;
+  _lookup.resize(symbolCount);
+  std::fill(_lookup.begin(), _lookup.end(), 0);
+}
--- a/contrib/preflate/support/huffman_encoder.h
+++ b/contrib/preflate/support/huffman_encoder.h
@@ -0,0 +1,48 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef HUFFMAN_ENCODER_H
+#define HUFFMAN_ENCODER_H
+
+#include <vector>
+#include "bitstream.h"
+
+// Huffman decoder
+class HuffmanEncoder {
+public:
+  HuffmanEncoder(const unsigned char* symbolBitLengths,
+                 const unsigned symbolCount,
+                 const bool disableZeroBitSymbols);
+
+  bool error() const {
+    return _error;
+  }
+
+  void encode(BitOutputStream& bos, const unsigned symbol) const {
+    unsigned v = _lookup[symbol];
+    bos.put(v >> 5, v & 0x1f);
+  }
+
+private:
+  bool _constructTables(const unsigned char* symbolBitLengths,
+                        const unsigned symbolCount,
+                        const bool disableZeroBitSymbols);
+  void _constructErrorTable(const unsigned symbolCount);
+
+private:
+  std::vector<unsigned> _lookup;
+  bool _error;
+};
+
+#endif /* HUFFMAN_ENCODER_H */
--- a/contrib/preflate/support/huffman_helper.cpp
+++ b/contrib/preflate/support/huffman_helper.cpp
@@ -0,0 +1,75 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include <algorithm>
+#include <string.h>
+#include "huffman_helper.h"
+#include "bit_helper.h"
+
+bool HuffmanHelper::countSymbols(
+    unsigned(&nextCode)[MAX_BL + 2],
+    unsigned char& minLength,
+    unsigned char& maxLength,
+    const unsigned char* symbolBitLengths,
+    const unsigned symbolCount,
+    const bool disableZeroBitSymbols
+) {
+  if (symbolCount < 1 || symbolCount >= 1024) {
+    return false;
+  }
+  unsigned short blCount[MAX_BL + 2];
+
+  // Count symbol frequencies
+  memset(blCount, 0, sizeof(blCount));
+  for (unsigned i = 0; i < symbolCount; ++i) {
+    unsigned char l = (unsigned char)(symbolBitLengths[i] + 1);
+    if (l > MAX_BL + 1) {
+      return false;
+    }
+    blCount[l]++;
+  }
+  for (minLength = 1; minLength <= MAX_BL + 1; ++minLength) {
+    if (blCount[minLength]) {
+      break;
+    }
+  }
+  for (maxLength = MAX_BL + 1; maxLength >= minLength; --maxLength) {
+    if (blCount[maxLength]) {
+      break;
+    }
+  }
+  if (minLength > maxLength) {
+    return false;
+  }
+  // Remove deleted symbols
+  blCount[0] = 0;
+  if (disableZeroBitSymbols) {
+    blCount[1] = 0;
+  }
+
+  // Calculate start codes
+  unsigned code = 0;
+  for (unsigned i = minLength; i <= maxLength; ++i) {
+    code = (code + blCount[i - 1]) << 1;
+    nextCode[i] = code;
+  }
+
+  if (minLength == maxLength && blCount[maxLength] == 1) {
+    return true;
+  }
+
+  // Check that we don't have holes
+  return nextCode[maxLength] + blCount[maxLength] == (unsigned)(1 << (maxLength - 1));
+}
--- a/contrib/preflate/support/huffman_helper.h
+++ b/contrib/preflate/support/huffman_helper.h
@@ -0,0 +1,34 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef HUFFMAN_HELPER_H
+#define HUFFMAN_HELPER_H
+
+#include <vector>
+
+// Huffman decoder
+class HuffmanHelper {
+public:
+  enum {
+    MAX_BL = 25
+  };
+  static bool countSymbols(unsigned(&nextCode)[MAX_BL + 2],
+                           unsigned char& minLength,
+                           unsigned char& maxLength,
+                           const unsigned char* symbolBitLengths,
+                           const unsigned symbolCount,
+                           const bool disableZeroBitSymbols);
+};
+
+#endif /* HUFFMAN_HELPER_H */
--- a/contrib/preflate/support/memstream.cpp
+++ b/contrib/preflate/support/memstream.cpp
@@ -0,0 +1,57 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include <algorithm>
+#include <string.h>
+#include "memstream.h"
+
+MemStream::MemStream() : _pos(0) {}
+MemStream::MemStream(const std::vector<uint8_t>& content)
+  : _data(content)
+  , _pos(0) {}
+MemStream::MemStream(const std::vector<uint8_t>& content, const size_t off, const size_t sz)
+  : _data(max(min(content.size(), off + sz), off) - off)
+  , _pos(0) {
+  memcpy(_data.data(), content.data() + off, _data.size());
+}
+
+bool MemStream::eof() const {
+  return _pos == _data.size();
+}
+size_t MemStream::read(unsigned char* buffer, const size_t size) {
+  size_t toCopy = min(size, _data.size() - _pos);
+  memcpy(buffer, _data.data() + _pos, toCopy);
+  _pos += toCopy;
+  return toCopy;
+}
+
+size_t MemStream::write(const unsigned char* buffer, const size_t size) {
+  size_t remaining = _data.size() - _pos;
+  if (size > remaining) {
+    _data.resize(_pos + size);
+  }
+  memcpy(_data.data() + _pos, buffer, size);
+  _pos += size;
+  return size;
+}
+
+uint64_t MemStream::tell() const {
+  return _pos;
+}
+uint64_t MemStream::seek(const uint64_t newPos) {
+  size_t oldPos = _pos;
+  _pos = min(newPos, (uint64_t)_data.size());
+  return oldPos;
+}
--- a/contrib/preflate/support/memstream.h
+++ b/contrib/preflate/support/memstream.h
@@ -0,0 +1,52 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef MEMSTREAM_H
+#define MEMSTREAM_H
+
+#include <stdint.h>
+#include <vector>
+#include "stream.h"
+
+class MemStream : public SeekableInputOutputStream {
+public:
+  MemStream();
+  MemStream(const std::vector<uint8_t>& content);
+  MemStream(const std::vector<uint8_t>& content, const size_t off, const size_t sz);
+
+  virtual bool eof() const;
+  virtual size_t read(unsigned char* buffer, const size_t size);
+
+  virtual size_t write(const unsigned char* buffer, const size_t size);
+
+  virtual uint64_t tell() const;
+  virtual uint64_t seek(const uint64_t newPos);
+
+  void replaceData(const std::vector<uint8_t>& content) {
+    _data = content;
+  }
+
+  const std::vector<uint8_t>& data() const {
+    return _data;
+  }
+  std::vector<uint8_t> extractData() {
+    return std::move(_data);
+  }
+
+private:
+  std::vector<uint8_t> _data;
+  size_t _pos;
+};
+
+#endif /* MEMSTREAM_H */
--- a/contrib/preflate/support/outputcachestream.cpp
+++ b/contrib/preflate/support/outputcachestream.cpp
@@ -0,0 +1,30 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include <algorithm>
+#include "outputcachestream.h"
+
+OutputCacheStream::OutputCacheStream(OutputStream& os)
+  : _os(os)
+  , _cacheStartPos(0) {}
+OutputCacheStream::~OutputCacheStream() {
+}
+
+void OutputCacheStream::flushUpTo(const uint64_t newStartPos) {
+  size_t toWrite = min(newStartPos - _cacheStartPos, (uint64_t)_cache.size());
+  size_t written = _os.write(_cache.data(), toWrite);
+  _cacheStartPos += written;
+  _cache.erase(_cache.begin(), _cache.begin() + written);
+}
--- a/contrib/preflate/support/outputcachestream.h
+++ b/contrib/preflate/support/outputcachestream.h
@@ -0,0 +1,67 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef OUTPUTCACHESTREAM_H
+#define OUTPUTCACHESTREAM_H
+
+#include <algorithm>
+#include <vector>
+#include "stream.h"
+
+class OutputCacheStream : public OutputStream {
+public:
+  OutputCacheStream(OutputStream& os);
+  virtual ~OutputCacheStream();
+
+  size_t write(const unsigned char* buffer, const size_t size) {
+/*    if (size == 1) {
+      _cache.push_back(*buffer);
+      return 1;
+    }*/
+    _cache.insert(_cache.end(), buffer, buffer + size);
+    return size;
+  }
+  void reserve(const size_t len) {
+    size_t cap = _cache.capacity();
+    if (_cache.size() + len > cap) {
+      _cache.reserve(cap + max(cap >> 1, len));
+    }
+  }
+  void flush() {
+    flushUpTo(cacheEndPos());
+  }
+  void flushUpTo(const uint64_t newStartPos);
+  uint64_t cacheStartPos() const {
+    return _cacheStartPos;
+  }
+  uint64_t cacheEndPos() const {
+    return _cacheStartPos + _cache.size();
+  }
+  const unsigned char* cacheData(const uint64_t pos) const {
+    return _cache.data() + (ptrdiff_t)(pos - _cacheStartPos);
+  }
+  const unsigned char* cacheEnd() const {
+    return _cache.data() + _cache.size();
+  }
+  const size_t cacheSize() const {
+    return _cache.size();
+  }
+
+private:
+  OutputStream& _os;
+  std::vector<unsigned char> _cache;
+  uint64_t _cacheStartPos;
+};
+
+#endif /* OUTPUTCACHESTREAM_H */
--- a/contrib/preflate/support/stream.h
+++ b/contrib/preflate/support/stream.h
@@ -0,0 +1,51 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef STREAM_H
+#define STREAM_H
+
+#include <stdint.h>
+
+class InputStream {
+public:
+  virtual ~InputStream() {}
+
+  virtual bool eof() const = 0;
+  virtual size_t read(unsigned char* buffer, const size_t size) = 0;
+};
+
+class OutputStream {
+public:
+  virtual ~OutputStream() {}
+  
+  virtual size_t write(const unsigned char* buffer, const size_t size) = 0;
+};
+
+class SeekableStream {
+public:
+  virtual ~SeekableStream() {}
+
+  virtual uint64_t tell() const = 0;
+  virtual uint64_t seek(const uint64_t newPos) = 0;
+};
+
+class SeekableInputStream 
+  : public InputStream
+  , public SeekableStream {};
+class SeekableInputOutputStream 
+  : public SeekableInputStream
+  , public OutputStream {};
+
+
+#endif /* STREAM_H */
--- a/contrib/preflate/support/support_tests.cpp
+++ b/contrib/preflate/support/support_tests.cpp
@@ -0,0 +1,181 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "pch.h"
+#include <stdio.h>
+#include "array_helper.h"
+#include "bit_helper.h"
+#include "bitstream.h"
+#include "const_division.h"
+#include "huffman_decoder.h"
+#include "huffman_encoder.h"
+#include "huffman_helper.h"
+#include "memstream.h"
+#include "outputcachestream.h"
+#include "stream.h"
+
+bool support_self_tests() {
+  unsigned arr[] = {1,2,3,4,5};
+  if (sumArray(arr) != 15
+      || sumArray(arr, sizeof(arr) / sizeof(arr[0])) != 15) {
+    printf("sumArray failed\n");
+    return false;
+  }
+  if (bitLength(0) != 0 
+      || bitLength(15) != 4
+      || bitLength(0xffffffff) != 32) {
+    printf("bitLength failed\n");
+    return false;
+  }
+  if (bitReverse(1, 3) != 4
+      || bitReverse(0x12345678, 32) != 0x1e6a2c48
+      || bitReverse(0xfedcba90, 32) != 0x095d3b7f) {
+    printf("bitReverse failed\n");
+    return false;
+  }
+
+  MemStream mem;
+  mem.write((const uint8_t*)"Hello", 5);
+  if (mem.tell() != 5 || !mem.eof()) {
+    printf("MemStream/1 failed\n");
+    return false;
+  }
+  mem.write((const uint8_t*)"!", 1);
+  uint8_t tmp[5], tmp2[2];
+  if (mem.read(tmp, 5) != 0) {
+    printf("MemStream/2 failed\n");
+    return false;
+  }
+  if (mem.seek(0) != 6) {
+    printf("MemStream/3 failed\n");
+    return false;
+  }
+  if (mem.tell() != 0) {
+    printf("MemStream/4 failed\n");
+    return false;
+  }
+  if (mem.read(tmp, 5) != 5 || tmp[0] != 'H' || tmp[4] != 'o') {
+    printf("MemStream/5 failed\n");
+    return false;
+  }
+  if (mem.read(tmp2, 2) != 1 || tmp2[0] != '!') {
+    printf("MemStream/6 failed\n");
+    return false;
+  }
+  if (!mem.eof()) {
+    printf("MemStream/7 failed\n");
+    return false;
+  }
+
+  mem.seek(0);
+  {
+    BitOutputStream bos(mem);
+    for (unsigned i = 0; i <= HuffmanHelper::MAX_BL; ++i) {
+      bos.put(i, i);
+    }
+    bos.flush();
+  }
+  mem.seek(0);
+  {
+    BitInputStream bis(mem);
+    for (unsigned i = 0; i <= HuffmanHelper::MAX_BL; ++i) {
+      if (bis.get(i) != i) {
+       printf("BitStreams failed\n");
+       return false;
+      }
+    }
+  }
+
+  unsigned char lengths[] = {
+    1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,
+    17,18,19,20,21,22,23,24,25,25
+  };
+  unsigned count = sizeof(lengths) / sizeof(lengths[0]);
+  HuffmanEncoder henc(lengths, count, false);
+  HuffmanDecoder hdec(lengths, count, false, 7);
+  if (henc.error() || hdec.error()) {
+    printf("HuffmanEncoder failed\n");
+    return false;
+  }
+  mem.seek(0);
+  {
+    BitOutputStream bos(mem);
+    for (unsigned i = 0; i < count; ++i) {
+      henc.encode(bos, i);
+    }
+    bos.flush();
+  }
+  mem.seek(0);
+  {
+    BitInputStream bis(mem);
+    for (unsigned i = 0; i < count; ++i) {
+      if (hdec.decode(bis) != i) {
+        printf("HuffmanDecoder failed\n");
+        return false;
+      }
+    }
+  }
+
+  uint16_t divtest16[] = {1, 3, 5, 7, 9, 11, 13, 17, 32767};
+  for (int i = 0, n = sizeof(divtest16) / sizeof(divtest16[0]); i < n; ++i) {
+    udivider_t<16> du   = build_udivider_16(divtest16[i]);
+    ucdivider_t<16> duc = build_ucdivider_16(divtest16[i]);
+    sdivider_t<16> ds = build_sdivider_16(divtest16[i]);
+    scdivider_t<16> dsc = build_scdivider_16(divtest16[i]);
+
+    for (int k = 0; k < 65536; ++k) {
+      uint16_t c1 = divide((uint16_t)k, du);
+      uint16_t c2 = divide((uint16_t)k, duc);
+      uint16_t r = k / divtest16[i];
+      if (c1 != r || c2 != r) {
+        printf("16bit divider/1 failed\n");
+        return false;
+      }
+
+      int16_t d1 = divide((int16_t)(k - 32768), ds);
+      int16_t d2 = divide((int16_t)(k - 32768), dsc);
+      int16_t s = ((int16_t)(k - 32768)) / (int16_t)divtest16[i];
+      if (d1 != s || d2 != s) {
+        printf("16bit divider/2 failed\n");
+        return false;
+      }
+    }
+  }
+  uint32_t divtest32[] = {1, 3, 5, 7, 9, 11, 13, 17, 0x7fff, 0x7fffffff};
+  for (int i = 0, n = sizeof(divtest32) / sizeof(divtest32[0]); i < n; ++i) {
+    udivider_t<32> du = build_udivider_32(divtest32[i]);
+    ucdivider_t<32> duc = build_ucdivider_32(divtest32[i]);
+    sdivider_t<32> ds = build_sdivider_32(divtest32[i]);
+    scdivider_t<32> dsc = build_scdivider_32(divtest32[i]);
+
+    for (int k = 0; k < 65536; ++k) {
+      uint32_t c1 = divide(((uint32_t)k)* 65536, du);
+      uint32_t c2 = divide(((uint32_t)k) * 65536, duc);
+      uint32_t r = (((uint32_t)k) * 65536) / divtest32[i];
+      if (c1 != r || c2 != r) {
+        printf("32bit divider/1 failed\n");
+        return false;
+      }
+
+      int32_t d1 = divide((int32_t)(k - 32768) * 65536, ds);
+      int32_t d2 = divide((int32_t)(k - 32768) * 65536, dsc);
+      int32_t s = ((int32_t)(k - 32768)) * 65536 / (int32_t)divtest32[i];
+      if (d1 != s || d2 != s) {
+        printf("32bit divider/2 failed\n");
+        return false;
+      }
+    }
+  }
+  return true;
+}
--- a/contrib/preflate/support/support_tests.h
+++ b/contrib/preflate/support/support_tests.h
@@ -0,0 +1,20 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#ifndef SUPPORT_TESTS_H
+#define SUPPORT_TESTS_H
+
+bool support_self_tests();
+
+#endif /* SUPPORT_TESTS_H */
--- a/contrib/preflate/support/task_pool.cpp
+++ b/contrib/preflate/support/task_pool.cpp
@@ -0,0 +1,58 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+#include "pch.h"
+#include "task_pool.h"
+
+#include <memory>
+
+/*TaskPool globalTaskPool;
+
+TaskPool::TaskPool()
+  : _state(INIT)
+  , _threadLimit(max(1u, std::thread::hardware_concurrency()) - 1) {
+}
+
+void TaskPool::_init() {
+  _state = RUN;
+  std::function<void(void)> workerLoop = [this] {
+    for (;;) {
+      std::function<void()> task;
+
+      {
+        std::unique_lock<std::mutex> lock(this->_mutex);
+        this->_condition.wait(lock,
+                             [this] { return this->_state == FINISH || !this->_tasks.empty(); });
+        if (this->_state == FINISH) {
+          return;
+        }
+        task = std::move(this->_tasks.front());
+        this->_tasks.pop();
+      }
+      task();
+    }
+  };
+  for (unsigned i = 0, n = max((size_t)1, _threadLimit); i < n; ++i) {
+    _workers.emplace_back(workerLoop);
+  }
+}
+
+TaskPool::~TaskPool() {
+  _state = FINISH;
+  _condition.notify_all();
+  for (auto& thr : _workers) {
+    if (thr.joinable()) {
+      thr.join();
+    }
+  }
+} */
--- a/contrib/preflate/support/task_pool.h
+++ b/contrib/preflate/support/task_pool.h
@@ -0,0 +1,70 @@
+/* Copyright 2018 Dirk Steinke
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+// #ifndef TASK_POOL_H
+// #define TASK_POOL_H
+
+#include <condition_variable>
+#include <functional>
+#include <future>
+#include <memory>
+#include <mutex>
+#include <queue>
+#include <thread>
+#include <vector>
+
+ /* class TaskPool {
+public:
+  TaskPool();
+  ~TaskPool();
+
+  template<class F, class... Args>
+  auto addTask(F&& f, Args&&... args)
+    -> std::future<typename std::result_of<F(Args...)>::type> {
+    using R = typename std::result_of<F(Args...)>::type;
+    auto task = std::make_shared<std::packaged_task<R()>>(
+      std::bind(std::forward<F>(f), std::forward<Args>(args)...));
+
+    if (_state == INIT) {
+      _init();
+    }
+    std::future<R> res = task->get_future();
+    {
+      std::unique_lock<std::mutex> lock(_mutex);
+      _tasks.emplace([task]() { (*task)(); });
+    }
+    _condition.notify_one();
+    return res;
+  }
+
+  size_t extraThreadCount() const {
+    return _threadLimit;
+  }
+
+private:
+  enum State { INIT, RUN, FINISH };
+
+  void _init();
+
+  State _state;
+  size_t _threadLimit;
+  std::vector<std::thread> _workers;
+  std::mutex _mutex;
+  std::condition_variable _condition;
+  std::queue<std::function<void()>> _tasks;
+};
+
+extern TaskPool globalTaskPool;
+
+#endif */ /* TASK_POOL_H */