source upload

This commit is contained in:
Razor12911
2022-01-17 22:16:47 +02:00
parent 12936d065b
commit 098e8c48de
1778 changed files with 1206749 additions and 0 deletions

View File

@@ -0,0 +1,69 @@
// dllmain.cpp : Defines the entry point for the DLL application.
#include "pch.h"
#include <algorithm>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <string.h>
#include <cstdint>
#include <iterator>
#include <vector>
#include "preflate_decoder.h"
#include "preflate_reencoder.h"
BOOL APIENTRY DllMain( HMODULE hModule,
DWORD ul_reason_for_call,
LPVOID lpReserved
)
{
switch (ul_reason_for_call)
{
case DLL_PROCESS_ATTACH:
case DLL_THREAD_ATTACH:
case DLL_THREAD_DETACH:
case DLL_PROCESS_DETACH:
break;
}
return TRUE;
}
extern "C" __declspec(dllexport) bool decode(const unsigned char* src,
int srcSize, unsigned char* dst1, int* dst1Capacity, unsigned char* dst2,
int* dst2Capacity) {
std::vector<unsigned char>deflate_raw(srcSize);
std::vector<unsigned char>unpacked_output;
std::vector<unsigned char>preflate_diff;
memcpy(deflate_raw.data(), src, srcSize);
if ((preflate_decode(unpacked_output, preflate_diff, deflate_raw, *dst2Capacity) == true) && (unpacked_output.size() <= *dst1Capacity)
&& (preflate_diff.size() <= *dst2Capacity)) {
*dst1Capacity = unpacked_output.size();
memcpy(dst1, unpacked_output.data(), unpacked_output.size());
*dst2Capacity = preflate_diff.size();
memcpy(dst2, preflate_diff.data(), preflate_diff.size());
return true;
}
else {
return false;
}
}
extern "C" __declspec(dllexport) bool reencode(const unsigned char* src1,
int src1Size, const unsigned char* src2,
int src2Size, unsigned char* dst, int* dstCapacity) {
std::vector<unsigned char>unpacked_input(src1Size);
std::vector<unsigned char>preflate_diff(src2Size);
std::vector<unsigned char>deflate_raw;
memcpy(unpacked_input.data(), src1, src1Size);
memcpy(preflate_diff.data(), src2, src2Size);
if ((preflate_reencode(deflate_raw, preflate_diff, unpacked_input) == true) && (deflate_raw.size() <= *dstCapacity)) {
*dstCapacity = deflate_raw.size();
memcpy(dst, deflate_raw.data(), deflate_raw.size());
return true;
}
else {
return false;
}
}

View File

@@ -0,0 +1,5 @@
#pragma once
#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers
// Windows Header Files
#include <windows.h>

5
contrib/preflate/pch.cpp Normal file
View File

@@ -0,0 +1,5 @@
// pch.cpp: source file corresponding to the pre-compiled header
#include "pch.h"
// When you are using pre-compiled headers, this source file is necessary for compilation to succeed.

13
contrib/preflate/pch.h Normal file
View File

@@ -0,0 +1,13 @@
// pch.h: This is a precompiled header file.
// Files listed below are compiled only once, improving build performance for future builds.
// This also affects IntelliSense performance, including code completion and many code browsing features.
// However, files listed here are ALL re-compiled if any one of them is updated between builds.
// Do not add files here that you will be updating frequently as this negates the performance advantage.
#ifndef PCH_H
#define PCH_H
// add headers that you want to pre-compile here
#include "framework.h"
#endif //PCH_H

View File

@@ -0,0 +1,21 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PREFLATE_H
#define PREFLATE_H
#include "preflate_decoder.h"
#include "preflate_reencoder.h"
#endif /* PREFLATE_H */

View File

@@ -0,0 +1,192 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <algorithm>
#include <string.h>
#include "preflate_block_decoder.h"
#include "preflate_block_trees.h"
#include "support/bit_helper.h"
PreflateBlockDecoder::PreflateBlockDecoder(
BitInputStream& input,
OutputCacheStream& output)
: _input(input)
, _output(output)
, _errorCode(OK)
, _dynamicLitLenDecoder(nullptr, 0, false, 0)
, _dynamicDistDecoder(nullptr, 0, false, 0) {
}
bool PreflateBlockDecoder::_error(const ErrorCode code) {
_errorCode = code;
return false;
}
bool PreflateBlockDecoder::readBlock(PreflateTokenBlock &block, bool &last) {
block.uncompressedStartPos = _output.cacheEndPos();
int32_t earliest_reference = INT32_MAX, curPos = 0;
if (_input.eof()) {
return false;
}
last = _readBit() != 0;
unsigned char mode = _readBits(2);
switch (mode) {
default:
return false;
case 0: {
block.type = PreflateTokenBlock::STORED;
block.paddingBitCount = (_input.bitPos()) & 7;
block.paddingBits = _input.get(block.paddingBitCount);
size_t len = _readBits(16);
size_t ilen = _readBits(16);
if ((len ^ ilen) != 0xffff) {
return _error(STORED_BLOCK_LEN_MISMATCH);
}
block.uncompressedLen = len;
block.contextLen = 0;
return _input.copyBytesTo(_output, len) == len;
}
case 1:
case 2:
if (mode == 1) {
block.type = PreflateTokenBlock::STATIC_HUFF;
_setupStaticTables();
} else {
block.type = PreflateTokenBlock::DYNAMIC_HUFF;
if (!_readDynamicTables(block)) {
return false;
}
}
while (true) {
if (_input.eof()) {
return false;
}
unsigned litLen = _litLenDecoder->decode(_input);
if (litLen < 256) {
_writeLiteral(litLen);
block.tokens.push_back(PreflateToken(PreflateToken::LITERAL));
curPos++;
} else if (litLen == 256) {
block.uncompressedLen = _output.cacheEndPos() - block.uncompressedStartPos;
block.contextLen = -earliest_reference;
return true;
} else {
unsigned lcode = litLen - PreflateConstants::NONLEN_CODE_COUNT;
if (lcode >= PreflateConstants::LEN_CODE_COUNT) {
return false;
}
unsigned len = PreflateConstants::MIN_MATCH
+ PreflateConstants::lengthBaseTable[lcode]
+ _readBits(PreflateConstants::lengthExtraTable[lcode]);
bool irregular258 = len == 258 && lcode != PreflateConstants::LEN_CODE_COUNT - 1;
unsigned dcode = _distDecoder->decode(_input);
if (dcode >= PreflateConstants::DIST_CODE_COUNT) {
return false;
}
unsigned dist = 1
+ PreflateConstants::distBaseTable[dcode]
+ _readBits(PreflateConstants::distExtraTable[dcode]);
if (dist > _output.cacheEndPos()) {
return false;
}
_writeReference(dist, len);
block.tokens.push_back(PreflateToken(PreflateToken::REFERENCE, len, dist, irregular258));
earliest_reference = min(earliest_reference, curPos - (int32_t)dist);
curPos += len;
}
}
}
}
void PreflateBlockDecoder::_setupStaticTables() {
_litLenDecoder = PreflateBlockTrees::staticLitLenTreeDecoder();
_distDecoder = PreflateBlockTrees::staticDistTreeDecoder();
}
bool PreflateBlockDecoder::_readDynamicTables(PreflateTokenBlock& block) {
block.nlen = PreflateConstants::NONLEN_CODE_COUNT + _readBits(5);
block.ndist = 1 + _readBits(5);
block.ncode = 4 + _readBits(4);
if (block.nlen > PreflateConstants::LITLEN_CODE_COUNT || block.ndist > PreflateConstants::DIST_CODE_COUNT) {
return false;
}
block.treecodes.clear();
block.treecodes.reserve(block.nlen + block.ndist + block.ncode);
unsigned char tcBitLengths[PreflateConstants::CODETREE_CODE_COUNT];
unsigned char ldBitLengths[PreflateConstants::LITLENDIST_CODE_COUNT];
memset(tcBitLengths, 0, sizeof(tcBitLengths));
memset(ldBitLengths, 0, sizeof(ldBitLengths));
for (unsigned i = 0, n = block.ncode; i < n; ++i) {
unsigned char tc = _readBits(3);
block.treecodes.push_back(tc);
tcBitLengths[PreflateConstants::treeCodeOrderTable[i]] = tc;
}
HuffmanDecoder tcTree(tcBitLengths, PreflateConstants::CODETREE_CODE_COUNT, true, 7);
if (tcTree.error()) {
return false;
}
for (unsigned i = 0, n = block.nlen + block.ndist; i < n; ++i) {
unsigned char code = tcTree.decode(_input);
if (code > 18) {
return false;
}
block.treecodes.push_back(code);
if (code < 16) {
ldBitLengths[i] = code;
continue;
}
unsigned char len = 0, tocopy = 0;
switch (code) {
case 16:
if (i == 0) {
return false;
}
tocopy = ldBitLengths[i - 1];
len = 3 + _readBits(2);
break;
case 17:
tocopy = 0;
len = 3 + _readBits(3);
break;
case 18:
tocopy = 0;
len = 11 + _readBits(7);
break;
}
if (i + len > n) {
return false;
}
block.treecodes.push_back(len);
memset(ldBitLengths + i, tocopy, len);
i += len - 1;
}
if (!ldBitLengths[256]) {
return false;
}
_dynamicLitLenDecoder = HuffmanDecoder(ldBitLengths, block.nlen, true, 15);
if (_dynamicLitLenDecoder.error()) {
return false;
}
_litLenDecoder = &_dynamicLitLenDecoder;
_dynamicDistDecoder = HuffmanDecoder(ldBitLengths + block.nlen, block.ndist, true, 15);
if (_dynamicDistDecoder.error()) {
return false;
}
_distDecoder = &_dynamicDistDecoder;
return true;
}

View File

@@ -0,0 +1,83 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PREFLATE_BLOCK_DECODER_H
#define PREFLATE_BLOCK_DECODER_H
#include "preflate_constants.h"
#include "preflate_hash_chain.h"
#include "preflate_input.h"
#include "preflate_token.h"
#include "support/bitstream.h"
#include "support/huffman_decoder.h"
#include "support/outputcachestream.h"
class PreflateBlockDecoder {
public:
enum ErrorCode {
OK,
STORED_BLOCK_LEN_MISMATCH,
STORED_BLOCK_PADDING_MISMATCH,
BADLY_CODED_MAX_LENGTH
};
PreflateBlockDecoder(BitInputStream& input,
OutputCacheStream& output);
bool readBlock(PreflateTokenBlock&, bool& last);
ErrorCode status() const {
return _errorCode;
}
private:
bool _error(const ErrorCode);
unsigned char _readBit() {
return _input.get(1);
}
unsigned _readBits(const unsigned bits) {
return _input.get(bits);
}
void _skipToByte() {
_input.skipToByte();
}
bool _checkLastBitsOfByte() {
return _input.checkLastBitsOfByteAreZero();
}
void _writeLiteral(const unsigned char l) {
_output.write(&l, 1);
}
void _writeReference(const size_t dist, const size_t len) {
_output.reserve(len);
if (len <= dist) {
_output.write(_output.cacheEnd() - dist, len);
} else {
const uint8_t* ptr = _output.cacheEnd() - dist;
for (size_t i = 0; i < len; ++i) {
_output.write(&ptr[i], 1);
}
}
}
void _setupStaticTables();
bool _readDynamicTables(PreflateTokenBlock&);
BitInputStream& _input;
OutputCacheStream& _output;
ErrorCode _errorCode;
const HuffmanDecoder* _litLenDecoder;
const HuffmanDecoder* _distDecoder;
HuffmanDecoder _dynamicLitLenDecoder;
HuffmanDecoder _dynamicDistDecoder;
};
#endif /* PREFLATE_BLOCK_DECODER_H */

View File

@@ -0,0 +1,195 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <algorithm>
#include <string.h>
#include "preflate_block_reencoder.h"
#include "preflate_block_trees.h"
#include "support/bit_helper.h"
PreflateBlockReencoder::PreflateBlockReencoder(
BitOutputStream& bos,
const std::vector<unsigned char>& uncompressedData,
const size_t uncompressedOffset)
: _output(bos)
, _uncompressedData(uncompressedData)
, _uncompressedDataPos(uncompressedOffset)
, _errorCode(OK)
, _dynamicLitLenEncoder(nullptr, 0, false)
, _dynamicDistEncoder(nullptr, 0, false) {
}
bool PreflateBlockReencoder::_error(const ErrorCode code) {
_errorCode = code;
return false;
}
void PreflateBlockReencoder::_setupStaticTables() {
_litLenEncoder = PreflateBlockTrees::staticLitLenTreeEncoder();
_distEncoder = PreflateBlockTrees::staticDistTreeEncoder();
}
bool PreflateBlockReencoder::_buildAndWriteDynamicTables(const PreflateTokenBlock& block) {
if (block.ncode < 4 || block.ncode > PreflateConstants::CODETREE_CODE_COUNT
|| block.treecodes.size() < (size_t)block.ncode
|| block.nlen < PreflateConstants::NONLEN_CODE_COUNT
|| block.nlen > PreflateConstants::LITLEN_CODE_COUNT
|| block.ndist < 1 || block.ndist > PreflateConstants::DIST_CODE_COUNT) {
return _error(TREE_OUT_OF_RANGE);
}
unsigned char tcBitLengths[PreflateConstants::CODETREE_CODE_COUNT];
unsigned char ldBitLengths[PreflateConstants::LITLENDIST_CODE_COUNT];
memset(tcBitLengths, 0, sizeof(tcBitLengths));
memset(ldBitLengths, 0, sizeof(ldBitLengths));
for (unsigned i = 0, n = block.ncode; i < n; ++i) {
unsigned char tc = block.treecodes[i];
_output.put(tc, 3);
tcBitLengths[PreflateConstants::treeCodeOrderTable[i]] = tc;
}
HuffmanEncoder tcTree(tcBitLengths, PreflateConstants::CODETREE_CODE_COUNT, true);
if (tcTree.error()) {
return _error(BAD_CODE_TREE);
}
// unpack tree codes
unsigned o = 0, maxo = block.nlen + block.ndist;
for (auto i = block.treecodes.begin() + block.ncode, e = block.treecodes.end(); i != e; ++i) {
unsigned char code = *i;
if (code > 18) {
return _error(BAD_LD_TREE);
}
tcTree.encode(_output, code);
if (code < 16) {
if (o >= maxo) {
return _error(BAD_LD_TREE);
}
ldBitLengths[o++] = code;
continue;
}
if (i + 1 == e) {
return _error(BAD_LD_TREE);
}
if (code == 16 && o == 0) {
return _error(BAD_LD_TREE);
}
unsigned char len = *++i;
unsigned char tocopy = code == 16 ? ldBitLengths[o - 1] : 0;
static unsigned char repExtraBits[3] = {2, 3, 7};
static unsigned char repOffset[3] = {3, 3, 11};
_output.put(len - repOffset[code - 16], repExtraBits[code - 16]);
if (o + len > maxo) {
return _error(BAD_LD_TREE);
}
memset(ldBitLengths + o, tocopy, len);
o += len;
}
if (o != maxo) {
return _error(BAD_LD_TREE);
}
if (!ldBitLengths[256]) {
return _error(BAD_LD_TREE);
}
_dynamicLitLenEncoder = HuffmanEncoder(ldBitLengths, block.nlen, true);
if (_dynamicLitLenEncoder.error()) {
return _error(BAD_LD_TREE);
}
_litLenEncoder = &_dynamicLitLenEncoder;
_dynamicDistEncoder = HuffmanEncoder(ldBitLengths + block.nlen, block.ndist, true);
if (_dynamicDistEncoder.error()) {
return _error(BAD_LD_TREE);
}
_distEncoder = &_dynamicDistEncoder;
return true;
}
bool PreflateBlockReencoder::_writeTokens(const std::vector<PreflateToken>& tokens) {
for (size_t i = 0; i < tokens.size(); ++i) {
PreflateToken token = tokens[i];
if (token.len == 1) {
if (_uncompressedDataPos >= _uncompressedData.size()) {
return _error(LITERAL_OUT_OF_BOUNDS);
}
unsigned char literal = _uncompressedData[_uncompressedDataPos++];
_litLenEncoder->encode(_output, literal);
} else {
// handle irregular length of 258
if (token.irregular258) {
_litLenEncoder->encode(_output, PreflateConstants::LITLEN_CODE_COUNT - 2);
_output.put(31, 5);
} else {
unsigned lencode = PreflateConstants::LCode(token.len);
_litLenEncoder->encode(_output, PreflateConstants::NONLEN_CODE_COUNT + lencode);
unsigned lenextra = PreflateConstants::lengthExtraTable[lencode];
if (lenextra) {
_output.put(token.len - PreflateConstants::MIN_MATCH - PreflateConstants::lengthBaseTable[lencode], lenextra);
}
}
unsigned distcode = PreflateConstants::DCode(token.dist);
_distEncoder->encode(_output, distcode);
unsigned distextra = PreflateConstants::distExtraTable[distcode];
if (distextra) {
_output.put(token.dist - 1 - PreflateConstants::distBaseTable[distcode], distextra);
}
_uncompressedDataPos += token.len;
}
}
_litLenEncoder->encode(_output, PreflateConstants::LITERAL_COUNT); // EOB
return true;
}
bool PreflateBlockReencoder::writeBlock(const PreflateTokenBlock& block, bool last) {
if (status() != OK) {
return false;
}
_output.put(last, 1); //
switch (block.type) {
case PreflateTokenBlock::DYNAMIC_HUFF:
_output.put(2, 2); //
_output.put(block.nlen - PreflateConstants::NONLEN_CODE_COUNT, 5);
_output.put(block.ndist - 1, 5);
_output.put(block.ncode - 4, 4);
if (!_buildAndWriteDynamicTables(block)) {
return false;
}
if (!_writeTokens(block.tokens)) {
return false;
}
break;
case PreflateTokenBlock::STATIC_HUFF:
_output.put(1, 2); //
_setupStaticTables();
if (!_writeTokens(block.tokens)) {
return false;
}
break;
case PreflateTokenBlock::STORED:
_output.put(0, 2); //
_output.put(block.paddingBits, block.paddingBitCount);
_output.fillByte();
_output.put(block.uncompressedLen, 16); //
_output.put(~block.uncompressedLen, 16); //
if (_uncompressedDataPos + block.uncompressedLen > _uncompressedData.size()) {
return _error(LITERAL_OUT_OF_BOUNDS);
}
_output.putBytes(_uncompressedData.data() + _uncompressedDataPos, block.uncompressedLen);
_uncompressedDataPos += block.uncompressedLen;
break;
}
return true;
}
void PreflateBlockReencoder::flush() {
_output.flush();
}

View File

@@ -0,0 +1,77 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PREFLATE_BLOCK_REENCODER_H
#define PREFLATE_BLOCK_REENCODER_H
#include "preflate_constants.h"
#include "preflate_token.h"
#include "support/bitstream.h"
#include "support/huffman_encoder.h"
class PreflateBlockReencoder {
public:
enum ErrorCode {
OK,
LITERAL_OUT_OF_BOUNDS,
TREE_OUT_OF_RANGE,
BAD_CODE_TREE,
BAD_LD_TREE,
};
/* enum {
BUFSIZE = 1024
};
std::vector<unsigned char> output;
unsigned char buffer[BUFSIZE];
unsigned bufferpos;
unsigned bitbuffer;
unsigned bitbuffersize;*/
/* unsigned short litLenDistCodeStorage[PreflateConstants::LD_CODES];
unsigned short treeCodeStorage[PreflateConstants::BL_CODES];
unsigned char litLenDistBitStorage[PreflateConstants::LD_CODES];
unsigned char treeBitStorage[PreflateConstants::BL_CODES];
const unsigned short *litLenCode, *distCode, *treeCode;
const unsigned char *litLenBits, *distBits, *treeBits;*/
PreflateBlockReencoder(BitOutputStream& bos,
const std::vector<unsigned char>& uncompressedData,
const size_t uncompressedOffset);
bool writeBlock(const PreflateTokenBlock&, const bool last);
void flush();
ErrorCode status() const {
return _errorCode;
}
private:
bool _error(const ErrorCode);
void _setupStaticTables();
bool _buildAndWriteDynamicTables(const PreflateTokenBlock&);
bool _writeTokens(const std::vector<PreflateToken>& tokens);
BitOutputStream& _output;
const std::vector<unsigned char>& _uncompressedData;
size_t _uncompressedDataPos;
ErrorCode _errorCode;
const HuffmanEncoder* _litLenEncoder;
const HuffmanEncoder* _distEncoder;
HuffmanEncoder _dynamicLitLenEncoder;
HuffmanEncoder _dynamicDistEncoder;
};
#endif /* PREFLATE_BLOCK_REENCODER_H */

View File

@@ -0,0 +1,66 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <algorithm>
#include "preflate_block_trees.h"
#include "support/bit_helper.h"
static HuffmanDecoder* staticLitLenDecoder;
static HuffmanDecoder* staticDistDecoder;
static HuffmanEncoder* staticLitLenEncoder;
static HuffmanEncoder* staticDistEncoder;
static void setLitLenBitLengths(unsigned char(&a)[288]) {
std::fill(a + 0, a + 144, 8);
std::fill(a + 144, a + 256, 9);
std::fill(a + 256, a + 280, 7);
std::fill(a + 280, a + 288, 8);
}
static void setDistBitLengths(unsigned char(&a)[32]) {
std::fill(a, a + 32, 5);
}
const HuffmanDecoder* PreflateBlockTrees::staticLitLenTreeDecoder() {
if (!staticLitLenDecoder) {
unsigned char l_lengths[288];
setLitLenBitLengths(l_lengths);
staticLitLenDecoder = new HuffmanDecoder(l_lengths, 288, true, 15);
}
return staticLitLenDecoder;
}
const HuffmanDecoder* PreflateBlockTrees::staticDistTreeDecoder() {
if (!staticDistDecoder) {
unsigned char d_lengths[32];
setDistBitLengths(d_lengths);
staticDistDecoder = new HuffmanDecoder(d_lengths, 32, true, 15);
}
return staticDistDecoder;
}
const HuffmanEncoder* PreflateBlockTrees::staticLitLenTreeEncoder() {
if (!staticLitLenEncoder) {
unsigned char l_lengths[288];
setLitLenBitLengths(l_lengths);
staticLitLenEncoder = new HuffmanEncoder(l_lengths, 288, true);
}
return staticLitLenEncoder;
}
const HuffmanEncoder* PreflateBlockTrees::staticDistTreeEncoder() {
if (!staticDistEncoder) {
unsigned char d_lengths[32];
setDistBitLengths(d_lengths);
staticDistEncoder = new HuffmanEncoder(d_lengths, 32, true);
}
return staticDistEncoder;
}

View File

@@ -0,0 +1,32 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PREFLATE_BLOCK_TREES_H
#define PREFLATE_BLOCK_TREES_H
#include "preflate_constants.h"
#include "preflate_hash_chain.h"
#include "preflate_input.h"
#include "preflate_token.h"
#include "support/huffman_decoder.h"
#include "support/huffman_encoder.h"
struct PreflateBlockTrees {
static const HuffmanDecoder* staticLitLenTreeDecoder();
static const HuffmanDecoder* staticDistTreeDecoder();
static const HuffmanEncoder* staticLitLenTreeEncoder();
static const HuffmanEncoder* staticDistTreeEncoder();
};
#endif /* PREFLATE_BLOCK_TREES_H */

View File

@@ -0,0 +1,289 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <stdio.h>
#include <string.h>
#include "preflate_block_decoder.h"
#include "preflate_block_reencoder.h"
#include "preflate_checker.h"
#include "preflate_parameter_estimator.h"
#include "preflate_statistical_model.h"
#include "preflate_token_predictor.h"
#include "preflate_tree_predictor.h"
#include "support/bitstream.h"
#include "support/memstream.h"
#include "support/outputcachestream.h"
#include <algorithm>
#include <chrono>
bool preflate_checker(const std::vector<unsigned char>& deflate_raw) {
printf("Checking raw deflate file of size %d\n", (int)deflate_raw.size());
MemStream decIn(deflate_raw);
MemStream decUnc;
BitInputStream decInBits(decIn);
OutputCacheStream decOutCache(decUnc);
std::vector<PreflateTokenBlock> blocks;
auto ts_start = std::chrono::steady_clock::now();
PreflateBlockDecoder bdec(decInBits, decOutCache);
if (bdec.status() != PreflateBlockDecoder::OK) {
return false;
}
bool last;
unsigned i = 0;
do {
PreflateTokenBlock newBlock;
bool ok = bdec.readBlock(newBlock, last);
if (!ok) {
printf("inflating error (preflate)\n");
return false;
}
blocks.push_back(newBlock);
++i;
} while (!last);
uint8_t remaining_bit_count = (8 - decInBits.bitPos()) & 7;
uint8_t remaining_bits = decInBits.get(remaining_bit_count);
decOutCache.flush();
std::vector<unsigned char> unpacked_output = decUnc.extractData();
auto ts_end = std::chrono::steady_clock::now();
printf("Unpacked data has size %d\n", (int)unpacked_output.size());
printf("Unpacking took %g seconds\n", std::chrono::duration<double>(ts_end - ts_start).count());
// Encode
PreflateParameters paramsE = estimatePreflateParameters(unpacked_output, 0, blocks);
printf("prediction parameters: w %d, c %d, m %d, zlib %d, farL3M %d, very far M %d, M2S %d, log2CD %d\n",
paramsE.windowBits, paramsE.compLevel, paramsE.memLevel,
paramsE.zlibCompatible, paramsE.farLen3MatchesDetected,
paramsE.veryFarMatchesDetected, paramsE.matchesToStartDetected,
paramsE.log2OfMaxChainDepthM1);
ts_start = std::chrono::steady_clock::now();
PreflateStatisticsCounter counterE;
memset(&counterE, 0, sizeof(counterE));
PreflateTokenPredictor tokenPredictorE(paramsE, unpacked_output, 0);
PreflateTreePredictor treePredictorE(unpacked_output, 0);
for (unsigned i = 0, n = blocks.size(); i < n; ++i) {
tokenPredictorE.analyzeBlock(i, blocks[i]);
if (tokenPredictorE.predictionFailure) {
printf("block %d: compress failed token prediction\n", i);
return false;
}
treePredictorE.analyzeBlock(i, blocks[i]);
if (treePredictorE.predictionFailure) {
printf("block %d: compress failed tree prediction\n", i);
return false;
}
tokenPredictorE.updateCounters(&counterE, i);
treePredictorE.updateCounters(&counterE, i);
}
counterE.block.incNonZeroPadding(remaining_bits != 0);
ts_end = std::chrono::steady_clock::now();
printf("Prediction took %g seconds\n", std::chrono::duration<double>(ts_end - ts_start).count());
counterE.print();
ts_start = std::chrono::steady_clock::now();
PreflateMetaEncoder codecE;
if (codecE.error()) {
return false;
}
PreflatePredictionEncoder pcodecE;
unsigned modelId = codecE.addModel(counterE, paramsE);
if (!codecE.beginMetaBlockWithModel(pcodecE, modelId)) {
return false;
}
for (unsigned i = 0, n = blocks.size(); i < n; ++i) {
tokenPredictorE.encodeBlock(&pcodecE, i);
if (tokenPredictorE.predictionFailure) {
printf("block %d: compress failed token encoding\n", i);
return false;
}
treePredictorE.encodeBlock(&pcodecE, i);
if (treePredictorE.predictionFailure) {
printf("block %d: compress failed tree encoding\n", i);
return false;
}
tokenPredictorE.encodeEOF(&pcodecE, i, i + 1 == blocks.size());
}
pcodecE.encodeNonZeroPadding(remaining_bits != 0);
if (remaining_bits != 0) {
unsigned bitsToSave = bitLength(remaining_bits);
pcodecE.encodeValue(bitsToSave, 3);
if (bitsToSave > 1) {
pcodecE.encodeValue(remaining_bits & ((1 << (bitsToSave - 1)) - 1), bitsToSave - 1);
}
}
if (!codecE.endMetaBlock(pcodecE, unpacked_output.size())) {
return false;
}
std::vector<unsigned char> preflate_diff = codecE.finish();
ts_end = std::chrono::steady_clock::now();
printf("Prediction diff has size %d\n", (int)preflate_diff.size());
printf("Encoding diff took %g seconds\n", std::chrono::duration<double>(ts_end - ts_start).count());
// Decode
ts_start = std::chrono::steady_clock::now();
PreflateMetaDecoder codecD(preflate_diff, unpacked_output.size());
PreflatePredictionDecoder pcodecD;
PreflateParameters paramsD;
if (codecD.error() || codecD.metaBlockCount() != 1) {
return false;
}
if (!codecD.beginMetaBlock(pcodecD, paramsD, 0)) {
return false;
}
PreflateTokenPredictor tokenPredictorD(paramsD, unpacked_output, 0);
PreflateTreePredictor treePredictorD(unpacked_output, 0);
MemStream mem;
BitOutputStream bos(mem);
std::vector<PreflateTokenBlock> dblocks;
unsigned blockno = 0;
bool eof = true;
do {
PreflateTokenBlock block = tokenPredictorD.decodeBlock(&pcodecD);
if (tokenPredictorD.predictionFailure) {
printf("block %d: token uncompress failed\n", blockno);
return false;
}
if (!treePredictorD.decodeBlock(block, &pcodecD)) {
printf("block %d: tree uncompress failed\n", blockno);
return false;
}
if (treePredictorD.predictionFailure) {
printf("block %d: tree uncompress failed\n", blockno);
return false;
}
eof = tokenPredictorD.decodeEOF(&pcodecD);
dblocks.push_back(block);
++blockno;
} while (!eof);
ts_end = std::chrono::steady_clock::now();
printf("Decoding diff and reprediction took %g seconds\n", std::chrono::duration<double>(ts_end - ts_start).count());
if (paramsD.windowBits != paramsE.windowBits) {
printf("parameter decoding failed: windowBits mismatch\n");
return false;
}
if (paramsD.memLevel != paramsE.memLevel) {
printf("parameter decoding failed: memLevel mismatch\n");
return false;
}
if (paramsD.compLevel != paramsE.compLevel) {
printf("parameter decoding failed: compLevel mismatch\n");
return false;
}
if (paramsD.zlibCompatible != paramsE.zlibCompatible) {
printf("parameter decoding failed: zlib compatible flag mismatch\n");
return false;
}
if (!paramsD.zlibCompatible && (0
// || paramsD.farLen3MatchesDetected != paramsE.farLen3MatchesDetected
|| paramsD.veryFarMatchesDetected != paramsE.veryFarMatchesDetected
|| paramsD.matchesToStartDetected != paramsE.matchesToStartDetected
|| paramsD.log2OfMaxChainDepthM1 != paramsE.log2OfMaxChainDepthM1)) {
printf("parameter decoding failed: non-zlib flag mismatch\n");
return false;
}
if (!isEqual(pcodecD, pcodecE)) {
printf("decoded model differs from original\n");
return false;
}
for (size_t blockno = 0, n = min(blocks.size(), dblocks.size()); blockno < n; ++blockno) {
if (dblocks[blockno].type != blocks[blockno].type) {
printf("block %zu: type differs: org %d, new %d\n", blockno, blocks[blockno].type, dblocks[blockno].type);
return false;
}
for (unsigned i = 0, n = min(dblocks[blockno].tokens.size(), blocks[blockno].tokens.size()); i < n; ++i) {
PreflateToken orgToken = blocks[blockno].tokens[i];
PreflateToken newToken = dblocks[blockno].tokens[i];
if (newToken.len != orgToken.len || newToken.dist != orgToken.dist) {
printf("block %zu: generated token %d differs: org(%d,%d), new(%d,%d)\n",
blockno, i, orgToken.len, orgToken.dist, newToken.len, newToken.dist);
return false;
}
}
if (dblocks[blockno].tokens.size() != blocks[blockno].tokens.size()) {
printf("block %zu: differing token count: org %d, new %d\n",
blockno, (int)blocks[blockno].tokens.size(), (int)dblocks[blockno].tokens.size());
return false;
}
if (dblocks[blockno].type == PreflateTokenBlock::DYNAMIC_HUFF) {
if (dblocks[blockno].nlen != blocks[blockno].nlen) {
printf("block %zu: literal/len count differs: org %d, new %d\n",
blockno, blocks[blockno].nlen, dblocks[blockno].nlen);
return false;
}
if (dblocks[blockno].ndist != blocks[blockno].ndist) {
printf("block %zu: dist count differs: org %d, new %d\n",
blockno, blocks[blockno].ndist, dblocks[blockno].ndist);
return false;
}
if (dblocks[blockno].ncode != blocks[blockno].ncode) {
printf("block %zu: tree code count differs: org %d, new %d\n",
blockno, blocks[blockno].ncode, dblocks[blockno].ncode);
return false;
}
if (dblocks[blockno].treecodes != blocks[blockno].treecodes) {
printf("block %zu: generated tree codes differs\n", blockno);
return false;
}
}
}
ts_start = std::chrono::steady_clock::now();
PreflateBlockReencoder deflater(bos, unpacked_output, 0);
for (size_t i = 0; i < dblocks.size(); ++i) {
deflater.writeBlock(dblocks[i], i + 1 == dblocks.size());
}
bool non_zero_bits = pcodecD.decodeNonZeroPadding();
if (non_zero_bits) {
unsigned bitsToLoad = pcodecD.decodeValue(3);
unsigned padding = 0;
if (bitsToLoad > 0) {
padding = (1 << (bitsToLoad - 1)) + pcodecD.decodeValue(bitsToLoad - 1);
}
bos.put(padding, bitsToLoad);
}
if (!codecD.endMetaBlock(pcodecD)) {
return false;
}
deflater.flush();
std::vector<unsigned char> deflate_raw_out = mem.extractData();
ts_end = std::chrono::steady_clock::now();
printf("Reencoding deflate stream took %g seconds\n", std::chrono::duration<double>(ts_end - ts_start).count());
for (unsigned i = 0, n = min(deflate_raw.size(), deflate_raw_out.size()); i < n; ++i) {
if (deflate_raw[i] != deflate_raw_out[i]) {
printf("created deflate stream differs at offset %d\n", i);
return false;
}
}
if (deflate_raw.size() != deflate_raw_out.size()) {
printf("created deflate streams differs in size: org %d, new %d\n",
(int)deflate_raw.size(), (int)deflate_raw_out.size());
return false;
}
printf("Success\n");
return true;
}

View File

@@ -0,0 +1,22 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PREFLATE_CHECKER_H
#define PREFLATE_CHECKER_H
#include <vector>
bool preflate_checker(const std::vector<unsigned char>& deflate_raw);
#endif /* PREFLATE_CHECKER_H */

View File

@@ -0,0 +1,222 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <algorithm>
#include <string.h>
#include "preflate_complevel_estimator.h"
#include "preflate_constants.h"
PreflateCompLevelEstimatorState::PreflateCompLevelEstimatorState(
const int wbits,
const int mbits,
const std::vector<unsigned char>& unpacked_output_,
const size_t off0_,
const std::vector<PreflateTokenBlock>& blocks_)
: slowHash(unpacked_output_, mbits)
, fastL1Hash(unpacked_output_, mbits)
, fastL2Hash(unpacked_output_, mbits)
, fastL3Hash(unpacked_output_, mbits)
, blocks(blocks_)
, wsize(1 << wbits)
, off0(off0_)
{
memset(&info, 0, sizeof(info));
info.possibleCompressionLevels = (1 << 10) - (1 << 1);
updateHash(off0);
}
void PreflateCompLevelEstimatorState::updateHash(const unsigned len) {
if (info.possibleCompressionLevels & (1 << 1)) {
fastL1Hash.updateHash(len);
}
if (info.possibleCompressionLevels & (1 << 2)) {
fastL2Hash.updateHash(len);
}
if (info.possibleCompressionLevels & (1 << 3)) {
fastL3Hash.updateHash(len);
}
slowHash.updateHash(len);
}
void PreflateCompLevelEstimatorState::updateOrSkipSingleFastHash(
PreflateHashChainExt& hash,
const unsigned len,
const PreflateParserConfig& config) {
if (len <= config.max_lazy) {
hash.updateHash(len);
} else {
hash.skipHash(len);
}
}
void PreflateCompLevelEstimatorState::updateOrSkipHash(const unsigned len) {
if (info.possibleCompressionLevels & (1 << 1)) {
updateOrSkipSingleFastHash(fastL1Hash, len, fastPreflateParserSettings[0]);
}
if (info.possibleCompressionLevels & (1 << 2)) {
updateOrSkipSingleFastHash(fastL2Hash, len, fastPreflateParserSettings[1]);
}
if (info.possibleCompressionLevels & (1 << 3)) {
updateOrSkipSingleFastHash(fastL3Hash, len, fastPreflateParserSettings[2]);
}
slowHash.updateHash(len);
}
unsigned short PreflateCompLevelEstimatorState::matchDepth(
const unsigned hashHead,
const PreflateToken& targetReference,
const PreflateHashChainExt& hash) {
unsigned curPos = hash.input().pos();
unsigned curMaxDist = min(curPos, windowSize());
unsigned startDepth = hash.getNodeDepth(hashHead);
PreflateHashIterator chainIt = hash.iterateFromPos(curPos - targetReference.dist, curPos, curMaxDist);
if (!chainIt.curPos || targetReference.dist > curMaxDist) {
return 0xffffu;
}
unsigned endDepth = chainIt.depth();
return min(startDepth - endDepth, 0xffffu);
}
bool PreflateCompLevelEstimatorState::checkMatchSingleFastHash(
const PreflateToken& token,
const PreflateHashChainExt& hash,
const PreflateParserConfig& config,
const unsigned hashHead) {
unsigned mdepth = matchDepth(hash.getHead(hashHead), token, hash);
if (mdepth > config.max_chain) {
return false;
}
return true;
}
void PreflateCompLevelEstimatorState::checkMatch(const PreflateToken& token) {
unsigned hashHead = slowHash.curHash();
if (slowHash.input().pos() >= token.dist + off0) {
if (info.possibleCompressionLevels & (1 << 1)) {
if (!checkMatchSingleFastHash(token, fastL1Hash, fastPreflateParserSettings[0], hashHead)) {
info.possibleCompressionLevels &= ~(1 << 1);
}
}
if (info.possibleCompressionLevels & (1 << 2)) {
if (!checkMatchSingleFastHash(token, fastL2Hash, fastPreflateParserSettings[1], hashHead)) {
info.possibleCompressionLevels &= ~(1 << 2);
}
}
if (info.possibleCompressionLevels & (1 << 3)) {
if (!checkMatchSingleFastHash(token, fastL3Hash, fastPreflateParserSettings[2], hashHead)) {
info.possibleCompressionLevels &= ~(1 << 3);
}
}
}
if (slowHash.input().pos() >= token.dist) {
info.referenceCount++;
unsigned short mdepth = matchDepth(slowHash.getHead(hashHead), token, slowHash);
if (mdepth >= 0x8001) {
info.unfoundReferences++;
} else {
info.maxChainDepth = max(info.maxChainDepth, mdepth);
}
if (token.dist == slowHash.input().pos()) {
info.matchToStart = true;
}
if (mdepth == 0) {
info.longestDistAtHop0 = max(info.longestDistAtHop0, token.dist);
} else {
info.longestDistAtHop1Plus = max(info.longestDistAtHop1Plus, token.dist);
}
if (token.len == 3) {
info.longestLen3Dist = max(info.longestLen3Dist, token.dist);
}
if (info.possibleCompressionLevels & ((1 << 10) - (1 << 4))) {
for (unsigned i = 0; i < 6; ++i) {
if (!(info.possibleCompressionLevels & (1 << (4 + i)))) {
continue;
}
const PreflateParserConfig& config = slowPreflateParserSettings[i];
if (mdepth > config.max_chain) {
info.possibleCompressionLevels &= ~(1 << (4 + i));
}
}
}
}
}
void PreflateCompLevelEstimatorState::checkDump(bool early_out) {
for (unsigned i = 0, n = blocks.size(); i < n; ++i) {
const PreflateTokenBlock& b = blocks[i];
if (b.type == PreflateTokenBlock::STORED) {
updateHash(b.uncompressedLen);
continue;
}
for (unsigned j = 0, m = b.tokens.size(); j < m; ++j) {
const PreflateToken& t = b.tokens[j];
if (t.len == 1) {
updateHash(1);
} else {
checkMatch(t);
updateOrSkipHash(t.len);
}
if (early_out && (info.possibleCompressionLevels & (info.possibleCompressionLevels - 1)) == 0) {
return;
}
}
}
}
void PreflateCompLevelEstimatorState::recommend() {
info.recommendedCompressionLevel = 9;
info.veryFarMatches = !(info.longestDistAtHop0 <= windowSize() - PreflateConstants::MIN_LOOKAHEAD
&& info.longestDistAtHop1Plus < windowSize() - PreflateConstants::MIN_LOOKAHEAD);
info.farLen3Matches = info.longestLen3Dist > 4096;
info.zlibCompatible = info.possibleCompressionLevels > 1
&& !info.matchToStart
&& !info.veryFarMatches
&& (!info.farLen3Matches || (info.possibleCompressionLevels & 0xe) != 0);
if (info.unfoundReferences) {
return;
}
if (info.zlibCompatible && info.possibleCompressionLevels > 1) {
unsigned l = info.possibleCompressionLevels >> 1;
info.recommendedCompressionLevel = 1;
while ((l & 1) == 0) {
info.recommendedCompressionLevel++;
l >>= 1;
}
return;
}
for (int i = 0; i < 6; ++i) {
const PreflateParserConfig& config = slowPreflateParserSettings[i];
if (info.maxChainDepth <= config.max_chain) {
info.recommendedCompressionLevel = 4 + i;
return;
}
}
}
PreflateCompLevelInfo estimatePreflateCompLevel(
const int wbits,
const int mbits,
const std::vector<unsigned char>& unpacked_output,
const size_t off0,
const std::vector<PreflateTokenBlock>& blocks,
const bool early_out) {
PreflateCompLevelEstimatorState state(wbits, mbits, unpacked_output, off0, blocks);
state.checkDump(early_out);
state.recommend();
return state.info;
}

View File

@@ -0,0 +1,76 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PREFLATE_COMPLEVEL_ESTIMATOR_H
#define PREFLATE_COMPLEVEL_ESTIMATOR_H
#include "preflate_predictor_state.h"
#include "preflate_token.h"
struct PreflateCompLevelInfo {
unsigned possibleCompressionLevels;
unsigned recommendedCompressionLevel;
bool zlibCompatible;
unsigned referenceCount;
unsigned unfoundReferences;
unsigned short maxChainDepth;
unsigned short longestLen3Dist;
unsigned short longestDistAtHop0;
unsigned short longestDistAtHop1Plus;
bool matchToStart;
bool veryFarMatches;
bool farLen3Matches;
};
struct PreflateCompLevelEstimatorState {
PreflateHashChainExt slowHash;
PreflateHashChainExt fastL1Hash;
PreflateHashChainExt fastL2Hash;
PreflateHashChainExt fastL3Hash;
const std::vector<PreflateTokenBlock>& blocks;
PreflateCompLevelInfo info;
uint16_t wsize;
size_t off0;
PreflateCompLevelEstimatorState(const int wbits, const int mbits,
const std::vector<unsigned char>& unpacked_output,
const size_t off0,
const std::vector<PreflateTokenBlock>& blocks);
void updateHash(const unsigned len);
void updateOrSkipHash(const unsigned len);
void checkMatch(const PreflateToken& token);
void checkDump(bool early_out);
void recommend();
private:
void updateOrSkipSingleFastHash(PreflateHashChainExt&, const unsigned len, const PreflateParserConfig&);
bool checkMatchSingleFastHash(const PreflateToken& token, const PreflateHashChainExt&, const PreflateParserConfig&,
const unsigned hashHead);
uint16_t matchDepth(const unsigned hashHead, const PreflateToken& targetReference,
const PreflateHashChainExt& hash);
unsigned windowSize() const {
return wsize;
}
};
PreflateCompLevelInfo estimatePreflateCompLevel(
const int wbits,
const int mbits,
const std::vector<unsigned char>& unpacked_output,
const size_t off0,
const std::vector<PreflateTokenBlock>& blocks,
const bool early_out);
#endif /* PREFLATE_COMPLEVEL_ESTIMATOR_H */

View File

@@ -0,0 +1,88 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <algorithm>
#include "preflate_block_reencoder.h"
#include "support/bit_helper.h"
/* tables taken from zlib */
const unsigned char PreflateConstants::distCodeTable[512] = {
0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8,
8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 16, 17,
18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22,
23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29
};
const unsigned char PreflateConstants::lengthCodeTable[MAX_MATCH - MIN_MATCH + 1 ] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28
};
const unsigned char PreflateConstants::lengthBaseTable[LEN_CODE_COUNT] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
64, 80, 96, 112, 128, 160, 192, 224, 255
};
const unsigned short PreflateConstants::distBaseTable[DIST_CODE_COUNT] = {
0, 1, 2, 3, 4, 6, 8, 12, 16, 24,
32, 48, 64, 96, 128, 192, 256, 384, 512, 768,
1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576
};
const unsigned char PreflateConstants::lengthExtraTable[LEN_CODE_COUNT] = { /* extra bits for each length code */
0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0
};
const unsigned char PreflateConstants::distExtraTable[DIST_CODE_COUNT] = { /* extra bits for each distance code */
0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13
};
const unsigned char PreflateConstants::treeCodeOrderTable[CODETREE_CODE_COUNT] = {
16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15
};

View File

@@ -0,0 +1,53 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PREFLATE_CONSTANTS_H
#define PREFLATE_CONSTANTS_H
struct PreflateConstants {
enum {
LITERAL_COUNT = 256,
NONLEN_CODE_COUNT = LITERAL_COUNT + 1, // EOB
LEN_CODE_COUNT = 29,
LITLEN_CODE_COUNT = NONLEN_CODE_COUNT + LEN_CODE_COUNT,
DIST_CODE_COUNT = 30,
LITLENDIST_CODE_COUNT = LITLEN_CODE_COUNT + DIST_CODE_COUNT,
CODETREE_CODE_COUNT = 19,
MIN_MATCH = 3,
MAX_MATCH = 258,
MAX_BITS = 15,
MIN_LOOKAHEAD = MAX_MATCH + MIN_MATCH + 1,
};
static const unsigned char distCodeTable[512];
static const unsigned char lengthCodeTable[MAX_MATCH - MIN_MATCH + 1];
static const unsigned char lengthBaseTable[LEN_CODE_COUNT];
static const unsigned short distBaseTable[DIST_CODE_COUNT];
static const unsigned char lengthExtraTable[LEN_CODE_COUNT];
static const unsigned char distExtraTable[DIST_CODE_COUNT];
static const unsigned char treeCodeOrderTable[CODETREE_CODE_COUNT];
static inline unsigned DCode(const unsigned dist) {
return distCodeTable[dist <= 256 ? dist - 1 : 256 + ((dist - 1) >> 7)];
}
static inline unsigned LCode(const unsigned len) {
return lengthCodeTable[len - MIN_MATCH];
}
};
#endif /* PREFLATE_CONSTANTS_H */

View File

@@ -0,0 +1,278 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <string.h>
#include <functional>
#include "preflate_block_decoder.h"
#include "preflate_decoder.h"
#include "preflate_parameter_estimator.h"
#include "preflate_statistical_model.h"
#include "preflate_token_predictor.h"
#include "preflate_tree_predictor.h"
#include "support/bitstream.h"
#include "support/memstream.h"
#include "support/outputcachestream.h"
class PreflateDecoderHandler : public PreflateDecoderTask::Handler {
public:
PreflateDecoderHandler(std::function<void(void)> progressCallback_)
: progressCallback(progressCallback_) {}
bool finish(std::vector<uint8_t>& reconstructionData) {
reconstructionData = encoder.finish();
return !encoder.error();
}
bool error() const {
return encoder.error();
}
virtual uint32_t setModel(const PreflateStatisticsCounter& counters, const PreflateParameters& parameters) {
return encoder.addModel(counters, parameters);
}
virtual bool beginEncoding(const uint32_t metaBlockId, PreflatePredictionEncoder& codec, const uint32_t modelId) {
return encoder.beginMetaBlockWithModel(codec, modelId);
}
virtual bool endEncoding(const uint32_t metaBlockId, PreflatePredictionEncoder& codec, const size_t uncompressedSize) {
return encoder.endMetaBlock(codec, uncompressedSize);
}
virtual void markProgress() {
std::unique_lock<std::mutex> lock(this->_mutex);
progressCallback();
}
private:
PreflateMetaEncoder encoder;
std::function<void(void)> progressCallback;
std::mutex _mutex;
};
PreflateDecoderTask::PreflateDecoderTask(PreflateDecoderTask::Handler& handler_,
const uint32_t metaBlockId_,
std::vector<PreflateTokenBlock>&& tokenData_,
std::vector<uint8_t>&& uncompressedData_,
const size_t uncompressedOffset_,
const bool lastMetaBlock_,
const uint32_t paddingBits_)
: handler(handler_)
, metaBlockId(metaBlockId_)
, tokenData(tokenData_)
, uncompressedData(uncompressedData_)
, uncompressedOffset(uncompressedOffset_)
, lastMetaBlock(lastMetaBlock_)
, paddingBits(paddingBits_) {
}
bool PreflateDecoderTask::analyze() {
params = estimatePreflateParameters(uncompressedData, uncompressedOffset, tokenData);
memset(&counter, 0, sizeof(counter));
tokenPredictor.reset(new PreflateTokenPredictor(params, uncompressedData, uncompressedOffset));
treePredictor.reset(new PreflateTreePredictor(uncompressedData, uncompressedOffset));
for (unsigned i = 0, n = tokenData.size(); i < n; ++i) {
tokenPredictor->analyzeBlock(i, tokenData[i]);
treePredictor->analyzeBlock(i, tokenData[i]);
if (tokenPredictor->predictionFailure || treePredictor->predictionFailure) {
return false;
}
tokenPredictor->updateCounters(&counter, i);
treePredictor->updateCounters(&counter, i);
handler.markProgress();
}
counter.block.incNonZeroPadding(paddingBits != 0);
return true;
}
bool PreflateDecoderTask::encode() {
PreflatePredictionEncoder pcodec;
unsigned modelId = handler.setModel(counter, params);
if (!handler.beginEncoding(metaBlockId, pcodec, modelId)) {
return false;
}
for (unsigned i = 0, n = tokenData.size(); i < n; ++i) {
tokenPredictor->encodeBlock(&pcodec, i);
treePredictor->encodeBlock(&pcodec, i);
if (tokenPredictor->predictionFailure || treePredictor->predictionFailure) {
return false;
}
if (lastMetaBlock) {
tokenPredictor->encodeEOF(&pcodec, i, i + 1 == tokenData.size());
}
}
if (lastMetaBlock) {
pcodec.encodeNonZeroPadding(paddingBits != 0);
if (paddingBits != 0) {
unsigned bitsToSave = bitLength(paddingBits);
pcodec.encodeValue(bitsToSave, 3);
if (bitsToSave > 1) {
pcodec.encodeValue(paddingBits & ((1 << (bitsToSave - 1)) - 1), bitsToSave - 1);
}
}
}
return handler.endEncoding(metaBlockId, pcodec, uncompressedData.size() - uncompressedOffset);
}
bool preflate_decode(OutputStream& unpacked_output,
std::vector<unsigned char>& preflate_diff,
uint64_t& deflate_size,
InputStream& deflate_raw,
std::function<void(void)> block_callback,
const size_t min_deflate_size,
const size_t metaBlockSize) {
deflate_size = 0;
uint64_t deflate_bits = 0;
size_t prevBitPos = 0;
BitInputStream decInBits(deflate_raw);
OutputCacheStream decOutCache(unpacked_output);
PreflateBlockDecoder bdec(decInBits, decOutCache);
if (bdec.status() != PreflateBlockDecoder::OK) {
return false;
}
bool last;
unsigned i = 0;
std::vector<PreflateTokenBlock> blocks;
std::vector<uint32_t> blockSizes;
uint64_t sumBlockSizes = 0;
uint64_t lastEndPos = 0;
uint64_t uncompressedMetaStart = 0;
size_t MBSize = std::min<size_t>(std::max<size_t>(metaBlockSize, 1u << 18), (1u << 31) - 1);
size_t MBThreshold = (MBSize * 3) >> 1;
PreflateDecoderHandler encoder(block_callback);
size_t MBcount = 0;
std::queue<std::future<std::shared_ptr<PreflateDecoderTask>>> futureQueue;
size_t queueLimit = 0;
bool fail = false;
do {
PreflateTokenBlock newBlock;
bool ok = bdec.readBlock(newBlock, last);
if (!ok) {
fail = true;
break;
}
uint64_t blockSize = decOutCache.cacheEndPos() - lastEndPos;
lastEndPos = decOutCache.cacheEndPos();
if (blockSize >= (1 << 31)) {
// No mega blocks
fail = true;
break;
}
blocks.push_back(newBlock);
blockSizes.push_back(blockSize);
++i;
block_callback();
deflate_bits += decInBits.bitPos() - prevBitPos;
prevBitPos = decInBits.bitPos();
sumBlockSizes += blockSize;
if (last || sumBlockSizes >= MBThreshold) {
size_t blockCount, blockSizeSum;
if (last) {
blockCount = blockSizes.size();
blockSizeSum = sumBlockSizes;
} else {
blockCount = 0;
blockSizeSum = 0;
for (const auto bs : blockSizes) {
blockSizeSum += bs;
++blockCount;
if (blockSizeSum >= MBSize) {
break;
}
}
}
std::vector<PreflateTokenBlock> blocksForMeta;
for (size_t j = 0; j < blockCount; ++j) {
blocksForMeta.push_back(std::move(blocks[j]));
}
blocks.erase(blocks.begin(), blocks.begin() + blockCount);
blockSizes.erase(blockSizes.begin(), blockSizes.begin() + blockCount);
sumBlockSizes -= blockSizeSum;
size_t uncompressedOffset = MBcount == 0 ? 0 : 1 << 15;
std::vector<uint8_t> uncompressedDataForMeta(
decOutCache.cacheData(uncompressedMetaStart - uncompressedOffset),
decOutCache.cacheData(uncompressedMetaStart - uncompressedOffset) + blockSizeSum + uncompressedOffset);
uncompressedMetaStart += blockSizeSum;
size_t paddingBits = 0;
if (last) {
uint8_t remaining_bit_count = (8 - deflate_bits) & 7;
paddingBits = decInBits.get(remaining_bit_count);
deflate_bits += decInBits.bitPos() - prevBitPos;
prevBitPos = decInBits.bitPos();
}
if (futureQueue.empty() && (queueLimit == 0 || last)) {
PreflateDecoderTask task(encoder, MBcount,
std::move(blocksForMeta),
std::move(uncompressedDataForMeta),
uncompressedOffset,
last, paddingBits);
if (!task.analyze() || !task.encode()) {
fail = true;
break;
}
}
if (!last) {
decOutCache.flushUpTo(uncompressedMetaStart - (1 << 15));
}
MBcount++;
}
} while (!fail && !last);
while (!futureQueue.empty()) {
std::future<std::shared_ptr<PreflateDecoderTask>> first = std::move(futureQueue.front());
futureQueue.pop();
std::shared_ptr<PreflateDecoderTask> data = first.get();
if (fail || !data || !data->encode()) {
fail = true;
}
}
decOutCache.flush();
deflate_size = (deflate_bits + 7) >> 3;
if (deflate_size < min_deflate_size) {
return false;
}
return !fail && encoder.finish(preflate_diff);
}
bool preflate_decode(std::vector<unsigned char>& unpacked_output,
std::vector<unsigned char>& preflate_diff,
uint64_t& deflate_size,
InputStream& deflate_raw,
std::function<void(void)> block_callback,
const size_t min_deflate_size,
const size_t metaBlockSize) {
MemStream uncompressedOutput;
bool result = preflate_decode(uncompressedOutput, preflate_diff, deflate_size, deflate_raw,
block_callback, min_deflate_size, metaBlockSize);
unpacked_output = uncompressedOutput.extractData();
return result;
}
bool preflate_decode(std::vector<unsigned char>& unpacked_output,
std::vector<unsigned char>& preflate_diff,
const std::vector<unsigned char>& deflate_raw,
const size_t metaBlockSize) {
MemStream mem(deflate_raw);
uint64_t raw_size;
return preflate_decode(unpacked_output, preflate_diff,
raw_size, mem, [] {}, 0, metaBlockSize)
&& raw_size == deflate_raw.size();
}

View File

@@ -0,0 +1,90 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PREFLATE_DECODER_H
#define PREFLATE_DECODER_H
#include <functional>
#include <queue>
#include <vector>
#include "preflate_statistical_codec.h"
#include "preflate_token.h"
#include "support/stream.h"
#include "support/task_pool.h"
struct PreflateTokenPredictor;
struct PreflateTreePredictor;
class PreflateDecoderTask {
public:
class Handler {
public:
virtual ~Handler() {}
virtual uint32_t setModel(const PreflateStatisticsCounter&, const PreflateParameters&) = 0;
virtual bool beginEncoding(const uint32_t metaBlockId, PreflatePredictionEncoder&, const uint32_t modelId) = 0;
virtual bool endEncoding(const uint32_t metaBlockId, PreflatePredictionEncoder&, const size_t uncompressedSize) = 0;
virtual void markProgress() = 0;
};
PreflateDecoderTask(Handler& handler,
const uint32_t metaBlockId,
std::vector<PreflateTokenBlock>&& tokenData,
std::vector<uint8_t>&& uncompressedData,
const size_t uncompressedOffset,
const bool lastMetaBlock,
const uint32_t paddingBits);
bool analyze();
bool encode();
uint32_t id() {
return metaBlockId;
}
private:
Handler& handler;
uint32_t metaBlockId;
std::vector<PreflateTokenBlock> tokenData;
std::vector<uint8_t> uncompressedData;
size_t uncompressedOffset;
bool lastMetaBlock;
uint32_t paddingBits;
PreflateParameters params;
PreflateStatisticsCounter counter;
std::unique_ptr<PreflateTokenPredictor> tokenPredictor;
std::unique_ptr<PreflateTreePredictor> treePredictor;
};
bool preflate_decode(OutputStream& unpacked_output,
std::vector<unsigned char>& preflate_diff,
uint64_t& deflate_size,
InputStream& deflate_raw,
std::function<void(void)> block_callback,
const size_t min_deflate_size,
const size_t metaBlockSize = INT32_MAX);
bool preflate_decode(std::vector<unsigned char>& unpacked_output,
std::vector<unsigned char>& preflate_diff,
const std::vector<unsigned char>& deflate_raw,
const size_t metaBlockSize = INT32_MAX);
bool preflate_decode(std::vector<unsigned char>& unpacked_output,
std::vector<unsigned char>& preflate_diff,
uint64_t& deflate_size,
InputStream& deflate_raw,
std::function<void (void)> block_callback,
const size_t min_deflate_size,
const size_t metaBlockSize = INT32_MAX);
#endif /* PREFLATE_DECODER_H */

View File

@@ -0,0 +1,25 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16
VisualStudioVersion = 16.0.30204.135
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "preflate_dll", "preflate_dll.vcxproj", "{C4097C5B-2BFC-499A-BEB4-4B709B576722}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Release|x64 = Release|x64
Release|x86 = Release|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{C4097C5B-2BFC-499A-BEB4-4B709B576722}.Release|x64.ActiveCfg = Release|x64
{C4097C5B-2BFC-499A-BEB4-4B709B576722}.Release|x64.Build.0 = Release|x64
{C4097C5B-2BFC-499A-BEB4-4B709B576722}.Release|x86.ActiveCfg = Release|Win32
{C4097C5B-2BFC-499A-BEB4-4B709B576722}.Release|x86.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {6A2147EA-B1D0-47D9-8DB6-5B335F718795}
EndGlobalSection
EndGlobal

View File

@@ -0,0 +1,179 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<VCProjectVersion>16.0</VCProjectVersion>
<Keyword>Win32Proj</Keyword>
<ProjectGuid>{c4097c5b-2bfc-499a-beb4-4b709b576722}</ProjectGuid>
<RootNamespace>preflatedll</RootNamespace>
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v142</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v142</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>WIN32;NDEBUG;PREFLATEDLL_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<PrecompiledHeader>Use</PrecompiledHeader>
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<Optimization>Full</Optimization>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>false</GenerateDebugInformation>
<EnableUAC>false</EnableUAC>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>NDEBUG;PREFLATEDLL_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<PrecompiledHeader>Use</PrecompiledHeader>
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<Optimization>Full</Optimization>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>false</GenerateDebugInformation>
<EnableUAC>false</EnableUAC>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="framework.h" />
<ClInclude Include="pch.h" />
<ClInclude Include="preflate.h" />
<ClInclude Include="preflate_block_decoder.h" />
<ClInclude Include="preflate_block_reencoder.h" />
<ClInclude Include="preflate_block_trees.h" />
<ClInclude Include="preflate_checker.h" />
<ClInclude Include="preflate_complevel_estimator.h" />
<ClInclude Include="preflate_constants.h" />
<ClInclude Include="preflate_decoder.h" />
<ClInclude Include="preflate_hash_chain.h" />
<ClInclude Include="preflate_info.h" />
<ClInclude Include="preflate_input.h" />
<ClInclude Include="preflate_parameter_estimator.h" />
<ClInclude Include="preflate_parser_config.h" />
<ClInclude Include="preflate_predictor_state.h" />
<ClInclude Include="preflate_reencoder.h" />
<ClInclude Include="preflate_seq_chain.h" />
<ClInclude Include="preflate_statistical_codec.h" />
<ClInclude Include="preflate_statistical_model.h" />
<ClInclude Include="preflate_token.h" />
<ClInclude Include="preflate_token_predictor.h" />
<ClInclude Include="preflate_tree_predictor.h" />
<ClInclude Include="support\arithmetic_coder.h" />
<ClInclude Include="support\array_helper.h" />
<ClInclude Include="support\bitstream.h" />
<ClInclude Include="support\bit_helper.h" />
<ClInclude Include="support\const_division.h" />
<ClInclude Include="support\filestream.h" />
<ClInclude Include="support\huffman_decoder.h" />
<ClInclude Include="support\huffman_encoder.h" />
<ClInclude Include="support\huffman_helper.h" />
<ClInclude Include="support\memstream.h" />
<ClInclude Include="support\outputcachestream.h" />
<ClInclude Include="support\stream.h" />
<ClInclude Include="support\support_tests.h" />
<ClInclude Include="support\task_pool.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="dllmain.cpp" />
<ClCompile Include="pch.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
</ClCompile>
<ClCompile Include="preflate_block_decoder.cpp" />
<ClCompile Include="preflate_block_reencoder.cpp" />
<ClCompile Include="preflate_block_trees.cpp" />
<ClCompile Include="preflate_checker.cpp" />
<ClCompile Include="preflate_complevel_estimator.cpp" />
<ClCompile Include="preflate_constants.cpp" />
<ClCompile Include="preflate_decoder.cpp" />
<ClCompile Include="preflate_hash_chain.cpp" />
<ClCompile Include="preflate_info.cpp" />
<ClCompile Include="preflate_parameter_estimator.cpp" />
<ClCompile Include="preflate_parser_config.cpp" />
<ClCompile Include="preflate_predictor_state.cpp" />
<ClCompile Include="preflate_reencoder.cpp" />
<ClCompile Include="preflate_seq_chain.cpp" />
<ClCompile Include="preflate_statistical_codec.cpp" />
<ClCompile Include="preflate_statistical_debug.cpp" />
<ClCompile Include="preflate_statistical_model.cpp" />
<ClCompile Include="preflate_token.cpp" />
<ClCompile Include="preflate_token_predictor.cpp" />
<ClCompile Include="preflate_tree_predictor.cpp" />
<ClCompile Include="support\arithmetic_coder.cpp" />
<ClCompile Include="support\array_helper.cpp" />
<ClCompile Include="support\bitstream.cpp" />
<ClCompile Include="support\bit_helper.cpp" />
<ClCompile Include="support\const_division.cpp" />
<ClCompile Include="support\filestream.cpp" />
<ClCompile Include="support\huffman_decoder.cpp" />
<ClCompile Include="support\huffman_encoder.cpp" />
<ClCompile Include="support\huffman_helper.cpp" />
<ClCompile Include="support\memstream.cpp" />
<ClCompile Include="support\outputcachestream.cpp" />
<ClCompile Include="support\support_tests.cpp" />
<ClCompile Include="support\task_pool.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

View File

@@ -0,0 +1,237 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
</Filter>
<Filter Include="Resource Files">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="framework.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="pch.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="preflate.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="preflate_block_decoder.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="preflate_block_reencoder.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="preflate_block_trees.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="preflate_checker.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="preflate_complevel_estimator.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="preflate_constants.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="preflate_decoder.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="preflate_hash_chain.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="preflate_info.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="preflate_input.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="preflate_parameter_estimator.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="preflate_parser_config.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="preflate_predictor_state.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="preflate_reencoder.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="preflate_seq_chain.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="preflate_statistical_codec.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="preflate_statistical_model.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="preflate_token.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="preflate_token_predictor.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="preflate_tree_predictor.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="support\arithmetic_coder.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="support\array_helper.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="support\bit_helper.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="support\bitstream.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="support\const_division.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="support\filestream.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="support\huffman_decoder.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="support\huffman_encoder.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="support\huffman_helper.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="support\memstream.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="support\outputcachestream.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="support\stream.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="support\support_tests.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="support\task_pool.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="dllmain.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="pch.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="preflate_block_decoder.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="preflate_block_reencoder.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="preflate_block_trees.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="preflate_checker.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="preflate_complevel_estimator.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="preflate_constants.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="preflate_decoder.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="preflate_hash_chain.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="preflate_info.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="preflate_parameter_estimator.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="preflate_parser_config.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="preflate_predictor_state.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="preflate_reencoder.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="preflate_seq_chain.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="preflate_statistical_codec.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="preflate_statistical_debug.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="preflate_statistical_model.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="preflate_token.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="preflate_token_predictor.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="preflate_tree_predictor.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="support\arithmetic_coder.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="support\array_helper.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="support\bit_helper.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="support\bitstream.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="support\const_division.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="support\filestream.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="support\huffman_decoder.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="support\huffman_encoder.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="support\huffman_helper.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="support\memstream.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="support\outputcachestream.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="support\support_tests.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="support\task_pool.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
</Project>

View File

@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup />
</Project>

View File

@@ -0,0 +1,116 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <algorithm>
#include <string.h>
#include "preflate_constants.h"
#include "preflate_hash_chain.h"
PreflateHashChainExt::PreflateHashChainExt(
const std::vector<unsigned char>& input_,
const unsigned char memLevel)
: _input(input_)
, totalShift(-8) {
hashBits = memLevel + 7;
hashShift = (hashBits + PreflateConstants::MIN_MATCH - 1) / PreflateConstants::MIN_MATCH;
hashMask = (1 << hashBits) - 1;
head = new unsigned short[hashMask + 1];
prev = new unsigned short[1 << 16];
chainDepth = new unsigned[1 << 16];
memset(head, 0, sizeof(short) * (hashMask + 1));
memset(prev, 0, sizeof(short) * (1 << 16));
memset(chainDepth, 0, sizeof(unsigned) * (1 << 16));
runningHash = 0;
if (_input.remaining() > 2) {
updateRunningHash(_input.curChar(0));
updateRunningHash(_input.curChar(1));
}
}
PreflateHashChainExt::~PreflateHashChainExt() {
delete[] head;
delete[] chainDepth;
delete[] prev;
}
void PreflateHashChainExt::updateHash(const unsigned l) {
if (l > 0x180) {
unsigned l_ = l;
while (l_ > 0) {
unsigned blk = min(l_, 0x180u);
updateHash(blk);
l_ -= blk;
}
return;
}
const unsigned char* b = _input.curChars();
unsigned pos = _input.pos();
if (pos - totalShift >= 0xfe08) {
reshift();
}
for (unsigned i = 2; i < min(l + 2, _input.remaining()); ++i) {
updateRunningHash(b[i]);
unsigned h = runningHash & hashMask;
unsigned p = (pos + i - 2) - totalShift;
chainDepth[p] = chainDepth[head[h]] + 1;
prev[p] = head[h];
head[h] = p;
}
_input.advance(l);
}
void PreflateHashChainExt::skipHash(const unsigned l) {
const unsigned char* b = _input.curChars();
unsigned pos = _input.pos();
if (pos - totalShift >= 0xfe08) {
reshift();
}
unsigned remaining = _input.remaining();
if (remaining > 2) {
updateRunningHash(b[2]);
unsigned h = runningHash & hashMask;
unsigned p = (pos) - totalShift;
chainDepth[p] = chainDepth[head[h]] + 1;
prev[p] = head[h];
head[h] = p;
// Skipped data is not inserted into the hash chain,
// but we must still update the chainDepth, to avoid
// bad analysis results
// --------------------
for (unsigned i = 1; i < l; ++i) {
unsigned p = (pos + i)-totalShift;
chainDepth[p] = 0xffff8000;
}
// l must be at least 3
if (remaining > l) {
updateRunningHash(b[l]);
if (remaining > l + 1) {
updateRunningHash(b[l + 1]);
}
}
}
_input.advance(l);
}
void PreflateHashChainExt::reshift() {
const unsigned short delta = 0x7e00;
for (unsigned i = 0, n = hashMask + 1; i < n; ++i) {
head[i] = max(head[i], delta) - delta;
}
for (unsigned i = delta + 8, n = 1 << 16; i < n; ++i) {
prev[i - delta] = max(prev[i], delta) - delta;
}
memmove(chainDepth + 8, chainDepth + 8 + delta, (0x10000 - 8 - delta) * sizeof(chainDepth[0]));
totalShift += delta;
}

View File

@@ -0,0 +1,125 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PREFLATE_HASH_CHAIN_H
#define PREFLATE_HASH_CHAIN_H
#include <algorithm>
#include "preflate_input.h"
struct PreflateHashIterator {
const unsigned short* chain;
const unsigned * chainDepth;
const unsigned refPos;
const unsigned maxDist;
unsigned curPos, curDist;
bool isValid;
PreflateHashIterator(
const unsigned short* chain_,
const unsigned * depth_,
const unsigned refPos_,
const unsigned maxDist_,
unsigned startPos_)
: chain(chain_)
, chainDepth(depth_)
, refPos(refPos_)
, maxDist(maxDist_)
, curPos(startPos_)
, curDist(dist(refPos_, startPos_)) {
isValid = curDist <= maxDist;
}
inline bool valid() const {
return isValid;
}
inline bool operator !() const {
return !isValid;
}
static inline unsigned dist(const unsigned p1, const unsigned p2) {
return p1 - p2;
}
inline unsigned dist() const {
return curDist;
}
inline unsigned depth() const {
return chainDepth[curPos];
}
inline bool next() {
curPos = chain[curPos];
curDist = dist(refPos, curPos);
isValid = curPos > 0 && curDist <= maxDist;
return isValid;
}
};
struct PreflateHashChainExt {
PreflateInput _input;
unsigned short* head;
unsigned * chainDepth;
unsigned short* prev;
unsigned char hashBits, hashShift;
unsigned short runningHash, hashMask;
unsigned totalShift;
PreflateHashChainExt(const std::vector<unsigned char>& input_, const unsigned char memLevel);
~PreflateHashChainExt();
unsigned nextHash(const unsigned char b) const {
return ((runningHash << hashShift) ^ b);
}
unsigned nextHash(const unsigned char b1, const unsigned char b2) const {
return ((((runningHash << hashShift) ^ b1) << hashShift) ^ b2);
}
void updateRunningHash(const unsigned char b) {
runningHash = (runningHash << hashShift) ^ b;
}
void reshift();
unsigned getHead(const unsigned hash) const {
return head[hash & hashMask];
}
unsigned getNodeDepth(const unsigned node) const {
return chainDepth[node];
}
unsigned getRelPosDepth(const unsigned refPos, const unsigned head) const {
return chainDepth[head] - chainDepth[refPos - totalShift];
}
PreflateHashIterator iterateFromHead(const unsigned hash, const unsigned refPos, const unsigned maxDist) const {
return PreflateHashIterator(prev, chainDepth, refPos - totalShift, maxDist, head[hash & hashMask]);
}
PreflateHashIterator iterateFromNode(const unsigned node, const unsigned refPos, const unsigned maxDist) const {
return PreflateHashIterator(prev, chainDepth, refPos - totalShift, maxDist, node);
}
PreflateHashIterator iterateFromPos(const unsigned pos, const unsigned refPos, const unsigned maxDist) const {
return PreflateHashIterator(prev, chainDepth, refPos - totalShift, maxDist, pos - totalShift);
}
const PreflateInput& input() const {
return _input;
}
unsigned curHash() const {
return nextHash(_input.curChar(2));
}
unsigned curPlus1Hash() const {
return nextHash(_input.curChar(2), _input.curChar(3));
}
void updateHash(const unsigned l);
void updateHashLong(const unsigned l);
void skipHash(const unsigned l);
private:
void _updateHashSimple(const unsigned l);
};
#endif /* PREFLATE_HASH_CHAIN_H */

View File

@@ -0,0 +1,56 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <algorithm>
#include <string.h>
#include "preflate_info.h"
// -----------------------------------------
PreflateStreamInfo extractPreflateInfo(const std::vector<PreflateTokenBlock>& blocks) {
PreflateStreamInfo result;
memset(&result, 0, sizeof(result));
result.countBlocks = blocks.size();
for (unsigned i = 0, n = result.countBlocks; i < n; ++i) {
const PreflateTokenBlock& b = blocks[i];
if (b.type == PreflateTokenBlock::STORED) {
result.countStoredBlocks++;
continue;
}
if (b.type == PreflateTokenBlock::STATIC_HUFF) {
result.countStaticHuffTreeBlocks++;
}
result.tokenCount += b.tokens.size();
result.maxTokensPerBlock = max(result.maxTokensPerBlock, (unsigned)b.tokens.size());
unsigned blockMaxDist = 0;
for (unsigned j = 0, m = b.tokens.size(); j < m; ++j) {
const PreflateToken& t = b.tokens[j];
if (t.len == 1) {
result.literalCount++;
} else {
result.referenceCount++;
blockMaxDist = max(blockMaxDist, (unsigned)t.dist);
}
}
result.maxDist = max(result.maxDist, blockMaxDist);
if (blockMaxDist == 0) {
result.countHuffBlocks++;
} else if (blockMaxDist == 1) {
result.countRLEBlocks++;
}
}
return result;
}

View File

@@ -0,0 +1,35 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PREFLATE_INFO_H
#define PREFLATE_INFO_H
#include "preflate_token.h"
struct PreflateStreamInfo {
unsigned tokenCount;
unsigned literalCount;
unsigned referenceCount;
unsigned maxDist;
unsigned maxTokensPerBlock;
unsigned countBlocks;
unsigned countStoredBlocks;
unsigned countHuffBlocks;
unsigned countRLEBlocks;
unsigned countStaticHuffTreeBlocks;
};
PreflateStreamInfo extractPreflateInfo(const std::vector<PreflateTokenBlock>& blocks);
#endif /* PREFLATE_INFO_H */

View File

@@ -0,0 +1,53 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PREFLATE_INPUT_H
#define PREFLATE_INPUT_H
#include <vector>
class PreflateInput {
public:
PreflateInput(const std::vector<unsigned char>& v)
: _data(v.size() > 0 ? &v[0] : nullptr), _size(v.size()), _pos(0) {}
const unsigned pos() const {
return _pos;
}
const unsigned size() const {
return _size;
}
const unsigned char* curChars(int offset = 0) const {
return _data + _pos + offset;
}
const unsigned char curChar(int offset = 0) const {
return _data[_pos + offset];
}
void advance(const unsigned l) {
_pos += l;
}
const unsigned remaining() const {
return _size - _pos;
}
private:
const unsigned char* _data;
unsigned _size;
unsigned _pos;
};
#endif /* PREFLATE_INPUT_H */

View File

@@ -0,0 +1,83 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <algorithm>
#include "preflate_complevel_estimator.h"
#include "preflate_constants.h"
#include "preflate_info.h"
#include "preflate_parameter_estimator.h"
#include "preflate_token_predictor.h"
#include "support/bit_helper.h"
unsigned char estimatePreflateMemLevel(const unsigned maxBlockSize_) {
unsigned maxBlockSize = maxBlockSize_;
unsigned mbits = 0;
while (maxBlockSize > 0) {
++mbits; maxBlockSize >>= 1;
}
mbits = min(max(mbits, 7u), 15u);
return mbits - 6;
}
unsigned char estimatePreflateWindowBits(const unsigned maxDist_) {
unsigned maxDist = maxDist_;
maxDist += PreflateConstants::MIN_LOOKAHEAD;
unsigned wbits = bitLength(maxDist - 1);
wbits = min(max(wbits, 9u), 15u);
return wbits;
}
PreflateStrategy estimatePreflateStrategy(const PreflateStreamInfo& info) {
if (info.countStoredBlocks == info.countBlocks) {
return PREFLATE_STORE;
}
if (info.countHuffBlocks == info.countBlocks) {
return PREFLATE_HUFF_ONLY;
}
if (info.countRLEBlocks == info.countBlocks) {
return PREFLATE_RLE_ONLY;
}
return PREFLATE_DEFAULT;
}
PreflateHuffStrategy estimatePreflateHuffStrategy(const PreflateStreamInfo& info) {
if (info.countStaticHuffTreeBlocks == info.countBlocks) {
return PREFLATE_HUFF_STATIC;
}
if (info.countStaticHuffTreeBlocks == 0) {
return PREFLATE_HUFF_DYNAMIC;
}
return PREFLATE_HUFF_MIXED;
}
PreflateParameters estimatePreflateParameters(const std::vector<unsigned char>& unpacked_output,
const size_t off0,
const std::vector<PreflateTokenBlock>& blocks) {
PreflateStreamInfo info = extractPreflateInfo(blocks);
PreflateParameters result;
result.windowBits = estimatePreflateWindowBits(info.maxDist);
result.memLevel = estimatePreflateMemLevel(info.maxTokensPerBlock);
result.strategy = estimatePreflateStrategy(info);
result.huffStrategy = estimatePreflateHuffStrategy(info);
PreflateCompLevelInfo cl = estimatePreflateCompLevel(result.windowBits, result.memLevel, unpacked_output, off0, blocks, false);
result.compLevel = cl.recommendedCompressionLevel;
result.zlibCompatible = cl.zlibCompatible;
result.farLen3MatchesDetected = cl.farLen3Matches;
result.veryFarMatchesDetected = cl.veryFarMatches;
result.matchesToStartDetected = cl.matchToStart;
result.log2OfMaxChainDepthM1 = cl.maxChainDepth == 0 ? 0 : bitLength(cl.maxChainDepth - 1);
return result;
}

View File

@@ -0,0 +1,110 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PREFLATE_PARAMETER_ESTIMATOR_H
#define PREFLATE_PARAMETER_ESTIMATOR_H
/* deflate has four parameters:
* - strategy: the strategy can usually be guessed by looking on the given deflate stream
* (e.g. only stored blocks -> stored,
* max distance = 0 -> huffman-only,
* max distance = 1 -> rle,
* only fixed huffman trees -> fixed-huffman-tree,
* otherwise default)
* - window bits: known by max distance, less window bits would be impossible, more window
* bits would be pointless
* - mem level: used for hash calculation and number of tokens per block
* the latter can be used to put a lower limit on mem level
* - compression level: parameters for the reference finder
*
* When reencoding a deflate stream, the predictor has to make a token proposal (either to
* encode a literal or a (dist, len) pair. A correction data stream will either accept the
* proposal, or change it to the correct values. The corrected values are then fed to the
* deflate encoder, and to the predictor.
*
* The main problem is to find the missing deflate parameters (compression level and
* mem level) to minimize the number and complexity of required corrections.
* Data streams that were encoded with zlib should get perfect recognition,
* requiring only the detected deflate parameters to be encoded for perfect reconstruction.
* Data streams from other encoders (7zip, kzip, ...) should be reconstructible with minimal
* corrective instructions, similar to reflate.
*
* kzip does not limit block size to < 64k tokens, while zlib enforces it for various reasons
* (and defaults to max 16k tokens).
* Prediction for end-of-block is therefore independent of literal/reference prediction.
*
* Mixing or interpolating the prediction from different parameter packs is
* possible, but not planned right now.
*/
#include "preflate_info.h"
#include "preflate_parser_config.h"
#include "preflate_token.h"
enum PreflateStrategy {
PREFLATE_DEFAULT,
PREFLATE_RLE_ONLY,
PREFLATE_HUFF_ONLY,
PREFLATE_STORE
};
enum PreflateHuffStrategy {
PREFLATE_HUFF_DYNAMIC,
PREFLATE_HUFF_MIXED,
PREFLATE_HUFF_STATIC,
};
struct PreflateParameters {
PreflateStrategy strategy;
PreflateHuffStrategy huffStrategy;
bool zlibCompatible;
unsigned char windowBits;
unsigned char memLevel;
unsigned char compLevel;
// true if matches of len 3 with a distance > 4096 are allowed
// (disallowed by zlib level 4+)
bool farLen3MatchesDetected;
// true if matches of distance >= 32768 - (MAX_MATCH + MIN_MATCH + 1) are allowed
// or > 32768 - (MAX_MATCH + MIN_MATCH + 1) if it's the first node in the hash chain
// (disallowed by zlib)
bool veryFarMatchesDetected;
// true if matches to start of stream are allowed
// (disallowed by zlib)
bool matchesToStartDetected;
// log2 of maximal found chain depth - 1
// so, 9 to 16 have value 3
unsigned char log2OfMaxChainDepthM1;
bool isFastCompressor() const {
return compLevel >= 1 && compLevel <= 3;
}
bool isSlowCompressor() const {
return compLevel >= 4 && compLevel <= 9;
}
const PreflateParserConfig& config() const {
return isFastCompressor() ? fastPreflateParserSettings[compLevel - 1]
: slowPreflateParserSettings[isSlowCompressor() ? compLevel - 4 : 5];
}
};
unsigned char estimatePreflateMemLevel(const unsigned maxBlockSize);
PreflateStrategy estimatePreflateStrategy(const PreflateStreamInfo&);
PreflateHuffStrategy estimatePreflateHuffStrategy(const PreflateStreamInfo&);
unsigned char estimatePreflateWindowBits(const unsigned maxDist);
PreflateParameters estimatePreflateParameters(const std::vector<unsigned char>& unpacked_output,
const size_t off0,
const std::vector<PreflateTokenBlock>& blocks);
#endif /* PREFLATE_PARAMETER_ESTIMATOR_H */

View File

@@ -0,0 +1,35 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include "preflate_parser_config.h"
#include <algorithm>
// -----------------------------------------
/* good lazy nice chain */
const PreflateParserConfig fastPreflateParserSettings[3] = {
/* 1 */ {4, 4, 8, 4}, /* max speed, no lazy matches */
/* 2 */ {4, 5, 16, 8},
/* 3 */ {4, 6, 32, 32},
};
const PreflateParserConfig slowPreflateParserSettings[6] = {
/* 4 */ {4, 4, 16, 16}, /* lazy matches */
/* 5 */ {8, 16, 32, 32},
/* 6 */ {8, 16, 128, 128},
/* 7 */ {8, 32, 128, 256},
/* 8 */ {32, 128, 258, 1024},
/* 9 */ {32, 258, 258, 4096}, /* max compression */
};

View File

@@ -0,0 +1,35 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PREFLATE_PARSER_CONFIG
#define PREFLATE_PARSER_CONFIG
/* Values for max_lazy_match, good_match and max_chain_length, depending on
* the desired pack level (0..9). The values given below have been tuned to
* exclude worst case performance for pathological files. Better values may be
* found for specific files.
*/
struct PreflateParserConfig {
unsigned char good_length; /* reduce lazy search above this match length */
unsigned short max_lazy; /* do not perform lazy search above this match length */
unsigned short nice_length; /* quit search above this match length */
unsigned short max_chain;
};
extern const PreflateParserConfig fastPreflateParserSettings[3];
extern const PreflateParserConfig slowPreflateParserSettings[6];
#endif
/* PREFLATE_PARSER_CONFIG */

View File

@@ -0,0 +1,449 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include "preflate_constants.h"
#include "preflate_predictor_state.h"
#include <algorithm>
PreflatePredictorState::PreflatePredictorState(
const PreflateHashChainExt& hash_,
const PreflateSeqChain& seq_,
const PreflateParserConfig& config_,
const int wbits,
const int mbits)
: hash(hash_)
, seq(seq_)
, windowBytes(1 << wbits)
, maxTokenCount((1 << (6 + mbits)) - 1)
, config(config_) {
}
/* deflate has four parameters:
* - strategy: the strategy can usually be guessed by looking on the given deflate stream
* (e.g. only stored blocks -> stored,
* max distance = 0 -> huffman-only,
* max distance = 1 -> rle,
* only fixed huffman trees -> fixed-huffman-tree,
* otherwise default)
* - window bits: known by max distance, less window bits would be impossible, more window
* bits would be pointless
* - mem level: used for hash calculation and number of tokens per block
* the latter can be used to put a lower limit on mem level
* - compression level: parameters for the reference finder
*
* When reencoding a deflate stream, the predictor has to make a token proposal (either to
* encode a literal or a (dist, len) pair. A correction data stream will either accept the
* proposal, or change it to the correct values. The corrected values are then fed to the
* deflate encoder, and to the predictor.
*
* The main problem is to find the missing deflate parameters (compression level and
* mem level) to minimize the number and complexity of required corrections.
* Data streams that were encoded with zlib should get perfect recognition,
* requiring only the detected deflate parameters to be encoded for perfect reconstruction.
* Data streams from other encoders (7zip, kzip, ...) should be reconstructible with minimal
* corrective instructions, similar to reflate.
*
* kzip does not limit block size to < 64k tokens, while zlib enforces it for various reasons
* (and defaults to max 16k tokens).
* Prediction for end-of-block is therefore independent of literal/reference prediction.
*
* Mixing or interpolating the prediction from different parameter packs is
* possible, but not planned right now.
*/
unsigned PreflatePredictorState::prefixCompare(
const unsigned char* s1,
const unsigned char* s2,
const unsigned bestLen,
const unsigned maxLen) {
if (s1[bestLen] != s2[bestLen]) {
return 0;
}
if (s1[0] != s2[0] || s1[1] != s2[1] || s1[2] != s2[2]) {
return 0;
}
const unsigned char* scan = s2 + 3;
const unsigned char* match = s1 + 3;
const unsigned char* scanend = s2 + maxLen;
/* while (scan < scanend
&& *++scan == *++match && *++scan == *++match
&& *++scan == *++match && *++scan == *++match
&& *++scan == *++match && *++scan == *++match
&& *++scan == *++match && *++scan == *++match) {
}*/
while (scan < scanend
&& *scan == *match) {
++scan;
++match;
}
return scan - s2;
}
unsigned PreflatePredictorState::suffixCompare(
const unsigned char* s1,
const unsigned char* s2,
const unsigned bestLen,
const unsigned maxLen) {
if (s1[bestLen] != s2[bestLen]) {
return 0;
}
unsigned len = 0;
while (s1[len] == s2[len] && ++len < maxLen) {
}
return len;
}
bool PreflatePredictorState::createMatchHelper(
MatchHelper& helper,
const unsigned prevLen,
const unsigned startPos,
const bool veryFarMatches,
const bool matchesToStart,
const unsigned maxDepth) {
helper.maxLen = min(totalInputSize() - startPos, (unsigned)PreflateConstants::MAX_MATCH);
if (helper.maxLen < std::max<uint32_t>(prevLen + 1, PreflateConstants::MIN_MATCH)) {
return false;
}
helper.startPos = startPos;
unsigned maxDistToStart = startPos - (matchesToStart ? 0 : 1);
if (veryFarMatches) {
helper.curMaxDistHop1Plus
= helper.curMaxDistHop0
= min(maxDistToStart, windowSize());
} else {
unsigned maxDist = windowSize() - PreflateConstants::MIN_LOOKAHEAD;
helper.curMaxDistHop0 = min(maxDistToStart, maxDist);
helper.curMaxDistHop1Plus = min(maxDistToStart, maxDist - 1);
}
if (maxDepth > 0) {
helper.maxChain = maxDepth;
helper.niceLen = helper.maxLen;
} else {
helper.maxChain = maxChainLength();/* max hash chain length */
helper.niceLen = min(niceMatchLength(), helper.maxLen);
if (prevLen >= goodMatchLength()) {
helper.maxChain >>= 2;
}
}
return true;
}
PreflateToken PreflatePredictorState::match(
const unsigned hashHead,
const unsigned prevLen,
const unsigned offset,
const bool veryFarMatches,
const bool matchesToStart,
const unsigned maxDepth) {
PreflateToken bestMatch(PreflateToken::NONE);
MatchHelper h;
if (!createMatchHelper(h, prevLen, currentInputPos() + offset,
veryFarMatches, matchesToStart, maxDepth)) {
return bestMatch;
}
PreflateHashIterator chainIt = iterateFromNode(hashHead, h.startPos, h.curMaxDistHop1Plus);
// Handle ZLIB quirk: the very first entry in the hash chain can have a larger
// distance than all following entries
if (chainIt.dist() > h.curMaxDistHop0) {
return bestMatch;
}
const unsigned char* input = inputCursor() + offset;
unsigned bestLen = prevLen;
do {
const unsigned char* match = input - chainIt.dist();
unsigned matchLength = prefixCompare(match, input, bestLen, h.maxLen);
if (matchLength > bestLen) {
bestLen = matchLength;
bestMatch = PreflateToken(PreflateToken::REFERENCE, matchLength, chainIt.dist());
if (bestLen >= h.niceLen) {
break;
}
}
} while (chainIt.next() && h.maxChain-- > 1);
return bestMatch;
}
PreflateToken PreflatePredictorState::seqMatch(
const unsigned startPos,
const unsigned hashHead,
const unsigned prevLen,
const bool veryFarMatches,
const bool matchesToStart,
const unsigned maxDepth) {
PreflateToken bestMatch(PreflateToken::NONE);
MatchHelper h;
if (!createMatchHelper(h, prevLen, startPos,
veryFarMatches, matchesToStart, maxDepth)) {
return bestMatch;
}
PreflateSeqIterator chainIt = seq.iterateFromPos(startPos);
if (!chainIt) {
return bestMatch;
}
unsigned curSeqLen = std::min<uint32_t>(seq.len(startPos), h.maxLen);
unsigned curMaxDist = h.curMaxDistHop1Plus;
unsigned bestLen = prevLen;
if (curSeqLen < PreflateConstants::MIN_MATCH) {
// startPos is part of a bigger sequence,
// and the ZLIB quirk does not apply, yeah!
curSeqLen = min(chainIt.len() - chainIt.dist(), h.maxLen);
if (curSeqLen > prevLen && 1 <= h.curMaxDistHop0) {
bestLen = curSeqLen;
bestMatch = PreflateToken(PreflateToken::REFERENCE, curSeqLen, 1);
}
if (bestLen >= h.niceLen || !chainIt.next()) {
return bestMatch;
}
if (chainIt.dist() > h.curMaxDistHop1Plus + chainIt.len() - PreflateConstants::MIN_MATCH) {
return bestMatch;
}
} else {
unsigned minDistOff = chainIt.len() - PreflateConstants::MIN_MATCH;
if (chainIt.dist() > h.curMaxDistHop1Plus + minDistOff) {
if (chainIt.dist() > h.curMaxDistHop0 + minDistOff) {
return bestMatch;
}
// Handle ZLIB quirk: the very first entry in the hash chain can have a larger
// distance than all following entries
unsigned latestPos = h.startPos - chainIt.dist() + minDistOff;
unsigned depth = hash.getRelPosDepth(latestPos, hashHead);
if (depth == 0) {
curMaxDist = h.curMaxDistHop0;
}
}
}
const unsigned char* input = inputCursor() + startPos - currentInputPos();
unsigned bestSeqLen = min(curSeqLen, bestLen);
do {
if (chainIt.len() < bestSeqLen) {
// If we do not even meet the already matched number of sequence bytes,
// we can just skip this
continue;
}
unsigned oldBestSeqLen = bestSeqLen;
bestSeqLen = std::min<uint32_t>(std::min<uint32_t>(curSeqLen, chainIt.len()), h.niceLen);
unsigned bestDist = chainIt.dist() - chainIt.len() + bestSeqLen;
unsigned error = 0;
if (bestDist > curMaxDist) {
// best subsequence is already beyond the search range
error = bestDist - curMaxDist;
if (error > chainIt.len() - PreflateConstants::MIN_MATCH) {
break;
}
}
unsigned bestChainDepth = hash.getRelPosDepth(h.startPos - bestDist + error, hashHead);
if (bestChainDepth >= h.maxChain) {
// best subsequence is already beyond the search range
error += bestChainDepth - h.maxChain + 1;
if (error > chainIt.len() - PreflateConstants::MIN_MATCH) {
break;
}
}
if (error) {
if (bestSeqLen > std::max<uint32_t>(oldBestSeqLen, PreflateConstants::MIN_MATCH - 1) + error) {
bestMatch = PreflateToken(PreflateToken::REFERENCE, bestSeqLen - error, bestDist - error);
}
// Since we had to correct the length down, we know that
// the comparer cannot find a better match
break;
}
if (bestSeqLen == h.maxLen) {
bestMatch = PreflateToken(PreflateToken::REFERENCE, bestSeqLen, bestDist);
break;
} else {
const unsigned char* match = input - bestDist;
unsigned matchLength = bestSeqLen + suffixCompare(match + bestSeqLen, input + bestSeqLen, max(bestLen, bestSeqLen) - bestSeqLen, h.maxLen - bestSeqLen);
if (matchLength > bestLen) {
bestLen = matchLength;
bestMatch = PreflateToken(PreflateToken::REFERENCE, matchLength, bestDist);
if (bestLen >= h.niceLen) {
break;
}
}
}
curMaxDist = h.curMaxDistHop1Plus;
} while (chainIt.next());
return bestMatch;
}
PreflateNextMatchInfo PreflatePredictorState::nextMatchInfo(
const unsigned hashHead,
const PreflateToken& targetReference,
const PreflateHashChainExt& hash) {
PreflateNextMatchInfo result;
result.nextChainDepth = (unsigned short)~0u;
result.nextLen = 0;
result.nextDist = 0xffff;
unsigned maxLen = min(availableInputSize(), (unsigned)PreflateConstants::MAX_MATCH);
if (maxLen < (unsigned)PreflateConstants::MIN_MATCH) {
return result;
}
unsigned maxDist = windowSize() - PreflateConstants::MIN_LOOKAHEAD - 1;
unsigned curPos = currentInputPos();
unsigned curMaxDist = min(curPos - 1, maxDist);
unsigned curMaxDistAlt = min(curPos - 1, windowSize() - PreflateConstants::MIN_LOOKAHEAD);
const unsigned char* input = inputCursor();
unsigned startDepth = hash.getNodeDepth(hashHead);
unsigned maxChainOrg = maxChainLength();/* max hash chain length */
PreflateHashIterator chainIt = hash.iterateFromPos(curPos - targetReference.dist, curPos, curMaxDist);
if (!chainIt.curPos || (hashHead == chainIt.curPos && chainIt.dist() > curMaxDistAlt)
|| (hashHead != chainIt.curPos && chainIt.dist() > curMaxDist)) {
return result;
}
unsigned endDepth = chainIt.depth();
unsigned maxChain = maxChainOrg - min(startDepth - endDepth, 0xffffu);/* max hash chain length */
unsigned bestLen = targetReference.len;
while (maxChain > 0) {
if (!chainIt.next()) {
break;
}
const unsigned char* match = input - chainIt.dist();
unsigned matchLength = prefixCompare(match, input, bestLen, maxLen);
if (matchLength > bestLen) {
result.nextLen = matchLength;
result.nextChainDepth = maxChainOrg - maxChain;
result.nextDist = chainIt.dist();
break;
}
--maxChain;
}
return result;
}
PreflateRematchInfo PreflatePredictorState::rematchInfo(
const unsigned hashHead,
const PreflateToken& targetReference) {
PreflateRematchInfo result;
result.firstMatchDepth = 0xffff;
result.requestedMatchDepth = 0xffff;
result.condensedHops = 0;
unsigned maxLen = min(availableInputSize(), (unsigned)PreflateConstants::MAX_MATCH);
if (maxLen < targetReference.len) {
return result;
}
unsigned maxDist = windowSize();
unsigned curPos = currentInputPos();
unsigned curMaxDist = min(curPos, maxDist);
PreflateHashIterator chainIt = hash.iterateFromNode(hashHead, curPos, curMaxDist);
if (!chainIt) {
return result;
}
const unsigned char* input = inputCursor();
unsigned maxChainOrg = 0xffff;/* max hash chain length */
unsigned maxChain = maxChainOrg;/* max hash chain length */
unsigned bestLen = targetReference.len;
do {
const unsigned char* match = input - chainIt.dist();
unsigned matchLength = prefixCompare(match, input, bestLen - 1, bestLen);
if (matchLength >= bestLen) {
result.firstMatchDepth = min((unsigned)result.firstMatchDepth, maxChainOrg - maxChain);
result.condensedHops++;
}
if (chainIt.dist() >= targetReference.dist) {
if (chainIt.dist() == targetReference.dist) {
result.requestedMatchDepth = maxChainOrg - maxChain;
}
return result;
}
chainIt.next();
} while (!!chainIt && maxChain-- > 1);
return result;
}
unsigned PreflatePredictorState::firstMatch(const unsigned len) {
unsigned maxLen = min(availableInputSize(), (unsigned)PreflateConstants::MAX_MATCH);
if (maxLen < max(len, (unsigned)PreflateConstants::MIN_MATCH)) {
return 0;
}
unsigned curPos = currentInputPos();
unsigned curMaxDist = min(curPos, windowSize());
unsigned hash = calculateHash();
PreflateHashIterator chainIt = iterateFromHead(hash, curPos, curMaxDist);
if (!chainIt) {
return 0;
}
const unsigned char* input = inputCursor();
do {
const unsigned char* match = input - chainIt.dist();
unsigned matchLength = prefixCompare(match, input, len - 1, len);
if (matchLength >= len) {
return chainIt.dist();
}
} while (chainIt.next());
return 0;
}
unsigned PreflatePredictorState::hopMatch(const PreflateToken& targetReference, const unsigned hops) {
if (hops == 0) {
return targetReference.dist;
}
unsigned curPos = currentInputPos();
unsigned errorDist = 0;
unsigned maxLen = min(availableInputSize(), (unsigned)PreflateConstants::MAX_MATCH);
if (maxLen < targetReference.len) {
return errorDist;
}
unsigned maxDist = windowSize();
unsigned curMaxDist = min(curPos, maxDist);
PreflateHashIterator chainIt = iterateFromDist(targetReference.dist, curPos, curMaxDist);
if (!chainIt) {
return 0;
}
const unsigned char* input = inputCursor();
unsigned bestLen = targetReference.len;
for (unsigned todo = hops; todo > 0; ) {
if (!chainIt.next()) {
break;
}
const unsigned char* match = input - chainIt.dist();
unsigned matchLength = prefixCompare(match, input - targetReference.dist, bestLen - 1, bestLen);
if (matchLength >= bestLen) {
if (--todo == 0) {
return chainIt.dist();
}
}
}
return errorDist;
}

View File

@@ -0,0 +1,176 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PREFLATE_PREDICTOR_STATE_H
#define PREFLATE_PREDICTOR_STATE_H
#include <vector>
#include "preflate_input.h"
#include "preflate_hash_chain.h"
#include "preflate_parser_config.h"
#include "preflate_seq_chain.h"
#include "preflate_token.h"
struct PreflatePreviousMatchInfo {
PreflateToken previousMatches[256];
};
struct PreflateNextMatchInfo {
unsigned short nextChainDepth;
unsigned short nextLen;
unsigned short nextDist;
};
struct PreflateRematchInfo {
unsigned short firstMatchDepth;
unsigned short firstMatchDist;
unsigned short requestedMatchDepth;
unsigned short condensedHops;
};
struct PreflatePredictorState {
const PreflateHashChainExt& hash;
const PreflateSeqChain& seq;
unsigned short windowBytes;
unsigned maxTokenCount;
const PreflateParserConfig& config;
PreflatePredictorState(const PreflateHashChainExt&,
const PreflateSeqChain&,
const PreflateParserConfig&,
const int wbits,
const int mbits);
unsigned currentInputPos() const {
return hash.input().pos();
}
const unsigned char* inputCursor() const {
return hash.input().curChars();
}
unsigned windowSize() const {
return windowBytes;
}
unsigned totalInputSize() const {
return hash.input().size();
}
unsigned availableInputSize() const {
return hash.input().remaining();
}
unsigned maxChainLength() const {
return config.max_chain;
}
unsigned niceMatchLength() const {
return config.nice_length;
}
unsigned goodMatchLength() const {
return config.good_length;
}
unsigned lazyMatchLength() const {
return config.max_lazy;
}
unsigned calculateHash() const {
return hash.curHash();
}
unsigned calculateHashNext() const {
return hash.curPlus1Hash();
}
unsigned getCurrentHashHead(const unsigned hashNext) const {
return hash.getHead(hashNext);
}
PreflateHashIterator iterateFromHead(const unsigned hash_, const unsigned refPos, const unsigned maxDist) const {
return hash.iterateFromHead(hash_, refPos, maxDist);
}
PreflateHashIterator iterateFromNode(const unsigned node_, const unsigned refPos, const unsigned maxDist) const {
return hash.iterateFromNode(node_, refPos, maxDist);
}
PreflateHashIterator iterateFromDist(const unsigned dist_, const unsigned refPos, const unsigned maxDist) const {
return hash.iterateFromPos(refPos - dist_, refPos, maxDist);
}
static unsigned prefixCompare(
const unsigned char* s1,
const unsigned char* s2,
const unsigned bestLen,
const unsigned maxLen);
static unsigned suffixCompare(
const unsigned char* s1,
const unsigned char* s2,
const unsigned bestLen,
const unsigned maxLen);
bool betterMatchPossible(
const unsigned prevLen,
const unsigned startPos);
PreflateToken matchHop0MaxDist(
const unsigned hashHead,
const unsigned prevLen,
const unsigned offset,
const bool veryFarMatches,
const bool matchesToStart);
PreflateToken match(
const unsigned hashHead,
const unsigned prevLen,
const unsigned offset,
const bool veryFarMatches,
const bool matchesToStart,
const unsigned maxDepth);
PreflateToken seqMatch(
const unsigned startPos,
const unsigned hashHead,
const unsigned prevLen,
const bool veryFarMatches,
const bool matchesToStart,
const unsigned maxDepth);
/*
unsigned short matchDepth(const unsigned hashHead, const PreflateToken& targetReference,
const PreflateHashChainExt&);*/
PreflateNextMatchInfo nextMatchInfo(const unsigned hashHead, const PreflateToken& targetReference,
const PreflateHashChainExt&);
PreflateRematchInfo rematchInfo(const unsigned hashHead, const PreflateToken& targetReference);
unsigned firstMatch(const unsigned len);
unsigned hopMatch(const PreflateToken& token, const unsigned hops);
private:
struct MatchHelper {
unsigned startPos;
unsigned maxLen;
unsigned curMaxDistHop0;
unsigned curMaxDistHop1Plus;
unsigned maxChain;
unsigned niceLen;
bool validHop0Dist(const unsigned d) const {
return d <= curMaxDistHop0;
}
bool validHop1PlusDist(const unsigned d) const {
return d <= curMaxDistHop1Plus;
}
};
bool createMatchHelper(
MatchHelper& helper,
const unsigned prevLen,
const unsigned startPos,
const bool veryFarMatches,
const bool matchesToStart,
const unsigned maxDepth);
};
#endif /* PREFLATE_PREDICTOR_STATE_H */

View File

@@ -0,0 +1,205 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <functional>
#include "preflate_block_reencoder.h"
#include "preflate_reencoder.h"
#include "preflate_statistical_codec.h"
#include "preflate_token_predictor.h"
#include "preflate_tree_predictor.h"
#include "support/bitstream.h"
#include "support/memstream.h"
class PreflateReencoderHandler : public PreflateReencoderTask::Handler {
public:
PreflateReencoderHandler(BitOutputStream& bos_,
const std::vector<uint8_t>& reconData,
const size_t uncompressedSize,
std::function<void(void)> progressCallback_)
: decoder(reconData, uncompressedSize)
, progressCallback(progressCallback_)
, bos(bos_) {}
size_t metaBlockCount() const {
return decoder.metaBlockCount();
}
size_t metaBlockUncompressedSize(const size_t metaBlockId) const {
return decoder.metaBlockUncompressedSize(metaBlockId);
}
bool error() const {
return decoder.error();
}
bool finish() {
decoder.finish();
return !decoder.error();
}
virtual bool beginDecoding(const uint32_t metaBlockId,
PreflatePredictionDecoder& codec, PreflateParameters& params) {
return decoder.beginMetaBlock(codec, params, metaBlockId);
}
virtual bool endDecoding(const uint32_t metaBlockId, PreflatePredictionDecoder& codec,
std::vector<PreflateTokenBlock>&& tokenData,
std::vector<uint8_t>&& uncompressedData,
const size_t uncompressedOffset,
const size_t paddingBitCount,
const size_t paddingValue) {
if (!decoder.endMetaBlock(codec)) {
return false;
}
PreflateBlockReencoder deflater(bos, uncompressedData, uncompressedOffset);
for (size_t j = 0, n = tokenData.size(); j < n; ++j) {
deflater.writeBlock(tokenData[j],
metaBlockId + 1 == decoder.metaBlockCount() && j + 1 == n);
markProgress();
}
bos.put(paddingValue, paddingBitCount);
return true;
}
virtual void markProgress() {
std::unique_lock<std::mutex> lock(this->_mutex);
progressCallback();
}
private:
PreflateMetaDecoder decoder;
std::function<void(void)> progressCallback;
BitOutputStream& bos;
std::mutex _mutex;
};
PreflateReencoderTask::PreflateReencoderTask(PreflateReencoderHandler::Handler& handler_,
const uint32_t metaBlockId_,
std::vector<uint8_t>&& uncompressedData_,
const size_t uncompressedOffset_,
const bool lastMetaBlock_)
: handler(handler_)
, metaBlockId(metaBlockId_)
, uncompressedData(uncompressedData_)
, uncompressedOffset(uncompressedOffset_)
, lastMetaBlock(lastMetaBlock_) {}
bool PreflateReencoderTask::decodeAndRepredict() {
PreflateParameters params;
if (!handler.beginDecoding(metaBlockId, pcodec, params)) {
return false;
}
PreflateTokenPredictor tokenPredictor(params, uncompressedData, uncompressedOffset);
PreflateTreePredictor treePredictor(uncompressedData, uncompressedOffset);
bool eof = true;
do {
PreflateTokenBlock block = tokenPredictor.decodeBlock(&pcodec);
if (!treePredictor.decodeBlock(block, &pcodec)) {
return false;
}
if (tokenPredictor.predictionFailure || treePredictor.predictionFailure) {
return false;
}
tokenData.push_back(std::move(block));
if (!lastMetaBlock) {
eof = tokenPredictor.inputEOF();
} else {
eof = tokenPredictor.decodeEOF(&pcodec);
}
handler.markProgress();
} while (!eof);
paddingBitCount = 0;
paddingBits = 0;
if (lastMetaBlock) {
bool non_zero_bits = pcodec.decodeNonZeroPadding();
if (non_zero_bits) {
paddingBitCount = pcodec.decodeValue(3);
if (paddingBitCount > 0) {
paddingBits = (1 << (paddingBitCount - 1)) + pcodec.decodeValue(paddingBitCount - 1);
}
}
}
return true;
}
bool PreflateReencoderTask::reencode() {
return handler.endDecoding(metaBlockId, pcodec, std::move(tokenData),
std::move(uncompressedData), uncompressedOffset,
paddingBitCount, paddingBits);
}
bool preflate_reencode(OutputStream& os,
const std::vector<unsigned char>& preflate_diff,
InputStream& is,
const uint64_t unpacked_size,
std::function<void(void)> block_callback) {
BitOutputStream bos(os);
PreflateReencoderHandler decoder(bos, preflate_diff, unpacked_size, block_callback);
if (decoder.error()) {
return false;
}
std::vector<uint8_t> uncompressedData;
std::queue<std::future<std::shared_ptr<PreflateReencoderTask>>> futureQueue;
size_t maxMetaBlockSize = 1;
for (size_t j = 0, n = decoder.metaBlockCount(); j < n; ++j) {
maxMetaBlockSize = max(maxMetaBlockSize, decoder.metaBlockUncompressedSize(j));
}
size_t queueLimit = 0;
bool fail = false;
for (size_t j = 0, n = decoder.metaBlockCount(); j < n; ++j) {
size_t curUncSize = uncompressedData.size();
size_t newSize = decoder.metaBlockUncompressedSize(j);
uncompressedData.resize(curUncSize + newSize);
if (is.read(uncompressedData.data() + curUncSize, newSize) != newSize) {
return false;
}
if (futureQueue.empty() && (queueLimit == 0 || j + 1 == n)) {
PreflateReencoderTask task(decoder, j, std::vector<uint8_t>(uncompressedData), curUncSize, j + 1 == n);
if (!task.decodeAndRepredict() || !task.reencode()) {
return false;
}
}
if (j + 1 < n) {
uncompressedData.erase(uncompressedData.begin(),
uncompressedData.begin() + std::max<size_t>(uncompressedData.size(), 1 << 15) - (1 << 15));
}
}
while (!futureQueue.empty()) {
std::future<std::shared_ptr<PreflateReencoderTask>> first = std::move(futureQueue.front());
futureQueue.pop();
std::shared_ptr<PreflateReencoderTask> data = first.get();
if (fail || !data || !data->reencode()) {
fail = true;
}
}
bos.flush();
return !fail && !decoder.error();
}
bool preflate_reencode(OutputStream& os,
const std::vector<unsigned char>& preflate_diff,
const std::vector<unsigned char>& unpacked_input,
std::function<void(void)> block_callback) {
MemStream is(unpacked_input);
return preflate_reencode(os, preflate_diff, is, unpacked_input.size(), block_callback);
}
bool preflate_reencode(std::vector<unsigned char>& deflate_raw,
const std::vector<unsigned char>& preflate_diff,
const std::vector<unsigned char>& unpacked_input) {
MemStream mem;
bool result = preflate_reencode(mem, preflate_diff, unpacked_input, [] {});
deflate_raw = mem.extractData();
return result;
}

View File

@@ -0,0 +1,79 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PREFLATE_REENCODER_H
#define PREFLATE_REENCODER_H
#include <vector>
#include "preflate_statistical_codec.h"
#include "support/stream.h"
#include "support/task_pool.h"
class PreflateReencoderTask {
public:
class Handler {
public:
virtual ~Handler() {}
virtual bool beginDecoding(const uint32_t metaBlockId,
PreflatePredictionDecoder&, PreflateParameters&) = 0;
virtual bool endDecoding(const uint32_t metaBlockId, PreflatePredictionDecoder&,
std::vector<PreflateTokenBlock>&& tokenData,
std::vector<uint8_t>&& uncompressedData,
const size_t uncompressedOffset,
const size_t paddingBitCount,
const size_t paddingValue) = 0;
virtual void markProgress() = 0;
};
PreflateReencoderTask(Handler& handler,
const uint32_t metaBlockId,
std::vector<uint8_t>&& uncompressedData,
const size_t uncompressedOffset,
const bool lastMetaBlock);
bool decodeAndRepredict();
bool reencode();
uint32_t id() {
return metaBlockId;
}
private:
Handler& handler;
uint32_t metaBlockId;
std::vector<uint8_t> uncompressedData;
size_t uncompressedOffset;
bool lastMetaBlock;
std::vector<PreflateTokenBlock> tokenData;
PreflatePredictionDecoder pcodec;
size_t paddingBitCount;
size_t paddingBits;
};
bool preflate_reencode(std::vector<unsigned char>& deflate_raw,
const std::vector<unsigned char>& preflate_diff,
const std::vector<unsigned char>& unpacked_input);
bool preflate_reencode(OutputStream& os,
const std::vector<unsigned char>& preflate_diff,
InputStream& unpacked_input,
const uint64_t unpacked_size,
std::function<void(void)> block_callback);
bool preflate_reencode(OutputStream& os,
const std::vector<unsigned char>& preflate_diff,
const std::vector<unsigned char>& unpacked_input,
std::function<void(void)> block_callback);
#endif /* PREFLATE_REENCODER_H */

View File

@@ -0,0 +1,140 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <algorithm>
#include <string.h>
#include "preflate_constants.h"
#include "preflate_seq_chain.h"
PreflateSeqChain::PreflateSeqChain(
const std::vector<unsigned char>& input_)
: _input(input_)
, totalShift(-8)
, curPos(0) {
prev = new SeqChainEntry[1 << 16];
memset(heads, 0x00, sizeof(heads));
_build(8, std::min<uint32_t>((1 << 16) - 8, _input.remaining()));
}
PreflateSeqChain::~PreflateSeqChain() {
delete[] prev;
}
void PreflateSeqChain::_reshift() {
const unsigned short delta = 0x7e00;
unsigned remaining = (1 << 16) - (delta + 8);
// If the head of large sequence is shifted out,
// but the tail remains in the cache,
// we need to adapt the head and all pointers to it,
// that is all members, the next non-member pointing to it
// or heads
if (prev[delta + 8].distToNext != 0xffff && prev[delta + 8].length < PreflateConstants::MIN_MATCH) {
unsigned d = prev[delta + 8].distToNext;
prev[delta + 8].distToNext = 0xffff;
prev[delta + 8].length = prev[delta + 8 - d].length - d;
for (unsigned i = 3; i < prev[delta + 8].length; ++i) {
prev[delta + 8 + i - 2].distToNext -= d;
}
uint8_t c = *_input.curChars(-(int)remaining);
if (heads[c] == delta + 8 - d) {
heads[c] += d;
} else {
for (unsigned i = prev[delta + 8].length; i < remaining; ++i) {
if (prev[delta + 8 + i].distToNext == i + d) {
prev[delta + 8 + i].distToNext -= d;
break;
}
}
}
}
for (unsigned i = 0; i < 256; ++i) {
heads[i] = max(heads[i], delta) - delta;
}
memmove(prev + 8, prev + (delta + 8), sizeof(SeqChainEntry) * remaining);
totalShift += delta;
_build(8 + remaining, std::min<uint32_t>(delta, _input.remaining()));
}
void PreflateSeqChain::_build(const unsigned off0, const unsigned size) {
if (!size) {
return;
}
const unsigned char* b = _input.curChars();
uint8_t curChar = b[0];
SeqChainEntry startOfSeq = {0xffff, 0x0}, *ptrToFirstOfSeq;
unsigned startOff = off0;
prev[off0] = startOfSeq;
if (off0 > 8 && curChar == b[-1]) {
--startOff;
// new block continues the old
if (curChar == b[-2]) {
--startOff;
// this is definitely a sequence
if (curChar == b[-3]) {
// This was already a sequence in the previous block,
// just append
startOff = heads[curChar];
prev[off0 - 2].distToNext = off0 - startOff - 2;
prev[off0 - 1].distToNext = off0 - startOff - 1;
prev[off0].distToNext = off0 - startOff;
prev[off0].length = 1;
} else {
// Otherwise enter the sequence in the books
prev[startOff].distToNext = startOff - heads[curChar];
prev[startOff + 1].distToNext = 1;
prev[startOff + 2].distToNext = 2;
prev[startOff + 2].length = 1;
heads[curChar] = startOff;
}
} else {
prev[startOff + 1].distToNext = 1;
prev[startOff + 1].length = 1;
}
}
ptrToFirstOfSeq = &prev[startOff];
++ptrToFirstOfSeq->length;
uint8_t prevChar = curChar;
for (unsigned i = 1; i < size; ++i) {
curChar = b[i];
if (prevChar == curChar) {
if (++ptrToFirstOfSeq->length == 3) {
prev[startOff].distToNext = startOff - heads[prevChar];
heads[prevChar] = startOff;
}
prev[off0 + i].distToNext = off0 + i - startOff;
prev[off0 + i].length = 1;
} else {
// Last two of a sequence are not a sequence themselves
if (ptrToFirstOfSeq->length >= 2) {
if (ptrToFirstOfSeq->length >= 3) {
prev[off0 + i - 2].distToNext = 0xffff;
}
prev[off0 + i - 1].distToNext = 0xffff;
}
prev[off0 + i] = startOfSeq;
startOff = off0 + i;
ptrToFirstOfSeq = &prev[startOff];
++ptrToFirstOfSeq->length;
}
prevChar = curChar;
}
// Last two of a sequence are not a sequence themselves
if (ptrToFirstOfSeq->length >= 2) {
if (ptrToFirstOfSeq->length >= 3) {
prev[off0 + size - 2].distToNext = 0xffff;
}
prev[off0 + size - 1].distToNext = 0xffff;
}
_input.advance(size);
}

View File

@@ -0,0 +1,88 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PREFLATE_SEQ_CHAIN_H
#define PREFLATE_SEQ_CHAIN_H
#include <algorithm>
#include "preflate_input.h"
struct SeqChainEntry {
uint16_t distToNext;
uint16_t length;
};
struct PreflateSeqIterator {
const SeqChainEntry* chain;
const unsigned refPos;
unsigned curDist;
PreflateSeqIterator(
const SeqChainEntry* chain_,
const unsigned refPos_)
: chain(chain_)
, refPos(refPos_)
, curDist(chain_[refPos_].distToNext) {
}
inline bool valid() const {
return curDist <= refPos - 8;
}
inline bool operator !() const {
return !valid();
}
inline unsigned dist() const {
return curDist;
}
inline uint16_t len() const {
return chain[refPos - curDist].length;
}
inline bool next() {
curDist += chain[refPos - curDist].distToNext;
return valid();
}
};
struct PreflateSeqChain {
PreflateInput _input;
SeqChainEntry* prev;
unsigned totalShift;
unsigned curPos;
uint16_t heads[256];
PreflateSeqChain(const std::vector<unsigned char>& input_);
~PreflateSeqChain();
bool valid(const unsigned refPos) const {
return prev[refPos - totalShift].distToNext != 0xffff;
}
uint16_t len(const unsigned refPos) const {
return prev[refPos - totalShift].length;
}
PreflateSeqIterator iterateFromPos(const unsigned refPos) const {
return PreflateSeqIterator(prev, refPos - totalShift);
}
void updateSeq(const unsigned l) {
curPos += l;
while (curPos - totalShift >= 0xfe08) {
_reshift();
}
}
private:
void _reshift();
void _build(const unsigned off0, const unsigned size);
};
#endif /* PREFLATE_SEQ_CHAIN_H */

View File

@@ -0,0 +1,795 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <algorithm>
#include <string.h>
#include "preflate_parameter_estimator.h"
#include "preflate_statistical_codec.h"
#include "preflate_statistical_model.h"
#include "support/array_helper.h"
#include "support/bit_helper.h"
#include <stdint.h>
template <unsigned N>
void PreflateSubModel<N>::build_impl(const unsigned* arr, const unsigned defval, const uint8_t prec) {
if (N == 0) {
isDefault = true;
return;
}
for (unsigned i = 0; i < N; ++i) {
ids[i] = i;
}
std::sort(ids, ids + N, [=](unsigned i1, unsigned i2) {
if (arr[i1] != arr[i2]) {
return arr[i1] < arr[i2];
}
return i1 < i2;
});
for (unsigned i = 0; i < N; ++i) {
bounds[i] = arr[ids[i]];
rids[ids[i]] = i;
}
unsigned sum = sumArray(bounds, N), acc, prev;
prev = bounds[0];
bounds[0] = acc = 0;
for (unsigned i = 0; i < N; ++i) {
if (prev) {
acc += prev;
prev = bounds[i + 1];
int diff = (((uint64_t)acc) << 16) / sum - bounds[i];
unsigned diff_bits = bitLength(diff);
const unsigned k = 5;
if (diff > 0 && diff_bits > k) {
diff = diff & (((1 << k) - 1) << (diff_bits - k));
}
// bounds[i + 1] = (((uint64_t)acc) << 16) / sum;
bounds[i + 1] = bounds[i] + diff;
if (bounds[i + 1] <= bounds[i]) {
bounds[i + 1] = bounds[i] + 1;
}
} else {
prev = bounds[i + 1];
bounds[i + 1] = bounds[i];
}
}
if (bounds[N] > 0) {
bounds[N] = 1 << 16;
}
isDefault = N == 0 || bounds[N] == 0 || (bounds[N - 1] == 0 && ids[N - 1] == defval);
build_scale_down();
}
template <unsigned N>
void PreflateSubModel<N>::buildDefault(const unsigned defval) {
if (N == 0) {
isDefault = true;
return;
}
memset(bounds, 0, N * sizeof(unsigned));
memset(scaledDownBounds, 0, N * sizeof(unsigned));
bounds[N] = 0x10000;
ids[N - 1] = defval;
rids[defval] = N - 1;
isDefault = true;
build_scale_down();
}
template <unsigned N>
void PreflateSubModel<N>::build_scale_down() {
unsigned boundBits = ~0xFFFFu; // Make sure that upper bits are all set, to limit the range of zeroJunk
for (unsigned i = 0; i <= N; ++i) {
boundBits |= bounds[i];
}
unsigned zeroJunk = bitTrailingZeroes(boundBits);
scaleDownBits = (16 - zeroJunk);
for (unsigned i = 0; i <= N; ++i) {
scaledDownBounds[i] = bounds[i] >> zeroJunk;
}
isFixed = bounds[N - 1] == 0;
/* for (unsigned i = 0; i <= N; ++i) {
scaledDownBounds[i] = bounds[i];
}
scaleDownBits = 16;*/
}
static void encodeProb(ArithmeticEncoder& codec, const unsigned val) {
unsigned bits = bitLength(val);
// encode shift
codec.encodeBits(bits - 1, 4);
// and precision
if (bits >= 5) {
codec.encodeBits((val >> (bits - 5)) & 0xf, 4);
} else {
codec.encodeBits(val & ~(1 << (bits - 1)), bits - 1);
}
}
static void encodeId(ArithmeticEncoder& codec,
const unsigned id, const unsigned count) {
unsigned bits = bitLength(count - 1);
codec.encodeBits(id, bits);
}
static unsigned decodeProb(ArithmeticDecoder& codec) {
// encode shift
unsigned bits = codec.decodeBits(4) + 1;
// and precision
if (bits >= 5) {
return (codec.decodeBits(4) | 0x10) << (bits - 5);
} else {
return codec.decodeBits(bits - 1) | (1 << (bits - 1));
}
}
static unsigned decodeId(ArithmeticDecoder& codec, const unsigned count) {
unsigned bits = bitLength(count - 1);
return codec.decodeBits(bits);
}
template <unsigned N>
void PreflateSubModel<N>::write(ArithmeticEncoder& codec, const uint8_t) const {
unsigned zeros = 0;
for (unsigned i = 1; i < N; ++i) {
if (!bounds[i]) {
++zeros;
} else {
break;
}
}
codec.encodeBits(zeros, bitLength(N - 1));
// Transmit values
for (unsigned i = 1 + zeros; i < N; ++i) {
encodeProb(codec, bounds[i] - bounds[i - 1]);
}
// Transmit ids
for (unsigned i = zeros; i < N; ++i) {
encodeId(codec, ids[i], N);
}
}
template <unsigned N>
void PreflateSubModel<N>::read(ArithmeticDecoder& codec, const uint8_t) {
unsigned zeros = codec.decodeBits(bitLength(N - 1));
memset(bounds, 0, sizeof(bounds));
// Transmit values
for (unsigned i = 1 + zeros; i < N; ++i) {
bounds[i] = decodeProb(codec) + bounds[i - 1];
}
bounds[N] = 1 << 16;
// Transmit ids
for (unsigned i = zeros; i < N; ++i) {
ids[i] = decodeId(codec, N);
rids[ids[i]] = i;
}
build_scale_down();
}
template <unsigned NEG, unsigned POS>
void PreflateCorrectionSubModel<NEG, POS>::build_impl(const unsigned* arr, const int defval, const uint8_t prec) {
unsigned signArr[3] = {arr[NEG], sumArray(arr + NEG + 1, POS), sumArray(arr, NEG)};
sign.build_impl(signArr, defval == 0 ? 0 : (defval > 0 ? 1 : 2), prec);
unsigned posArr[POS + 1];
for (unsigned i = 0; i < POS; ++i) {
posArr[i] = arr[NEG + 1 + i];
}
pos.build_impl(posArr, defval > 0 && defval <= POS ? defval - 1 : POS - 1, prec);
unsigned negArr[NEG + 1];
for (unsigned i = 0; i < NEG; ++i) {
negArr[i] = arr[NEG - 1 - i];
}
neg.build_impl(negArr, -defval > 0 && -defval <= NEG ? -defval - 1 : NEG - 1, prec);
isDefault = sign.isDefault && pos.isDefault && neg.isDefault;
}
template <unsigned NEG, unsigned POS>
void PreflateCorrectionSubModel<NEG, POS>::buildDefault(const unsigned defval) {
sign.buildDefault(defval == 0 ? 0 : (defval > 0 ? 1 : 2));
pos.buildDefault(defval > 0 && defval <= POS ? defval - 1 : POS - 1);
neg.buildDefault(defval > 0 && defval <= NEG ? defval - 1 : NEG - 1);
isDefault = sign.isDefault && pos.isDefault && neg.isDefault;
}
template <unsigned NEG, unsigned POS>
void PreflateCorrectionSubModel<NEG, POS>::write(ArithmeticEncoder& codec, const uint8_t prec) const {
sign.write(codec, prec);
if (POS > 0) {
pos.write(codec, prec);
}
if (NEG > 0) {
neg.write(codec, prec);
}
}
template <unsigned NEG, unsigned POS>
void PreflateCorrectionSubModel<NEG, POS>::read(ArithmeticDecoder& codec, const uint8_t prec) {
sign.read(codec, prec);
if (POS > 0) {
pos.read(codec, prec);
}
if (NEG > 0) {
neg.read(codec, prec);
}
}
// -------------------------------------
PreflateBaseModel::PreflateBaseModel()
: encoder(nullptr), decoder(nullptr) {}
void PreflateBaseModel::setEncoderStream(ArithmeticEncoder* codec_) {
encoder = codec_;
}
void PreflateBaseModel::setDecoderStream(ArithmeticDecoder* codec_) {
decoder = codec_;
}
template <unsigned N>
void PreflateBaseModel::readSubModel(PreflateSubModel<N>& sm, const bool isFullDef, const PreflateModelCodec& cc,
const unsigned defVal, const uint8_t prec) {
if (isFullDef || cc.nonDefaultValue.decode(*decoder) == 0) {
sm.buildDefault(defVal);
} else {
sm.read(*decoder, prec);
}
}
template <unsigned N, unsigned M>
void PreflateBaseModel::readSubModel(PreflateCorrectionSubModel<N, M>& sm, const bool isFullDef, const PreflateModelCodec& cc,
const unsigned defVal, const uint8_t prec) {
if (isFullDef || cc.nonDefaultValue.decode(*decoder) == 0) {
sm.buildDefault(defVal);
} else {
sm.read(*decoder, prec);
}
}
template <unsigned N>
void PreflateBaseModel::writeSubModel(const PreflateSubModel<N>& sm, const bool isFullDef, const PreflateModelCodec& cc,
const unsigned defVal, const uint8_t prec) {
if (isFullDef) {
return;
}
bool ndef = !sm.isDefault;
cc.nonDefaultValue.encode(*encoder, ndef);
if (ndef) {
sm.write(*encoder, prec);
}
}
template <unsigned N, unsigned M>
void PreflateBaseModel::writeSubModel(const PreflateCorrectionSubModel<N, M>& sm, const bool isFullDef, const PreflateModelCodec& cc,
const unsigned defVal, const uint8_t prec) {
if (isFullDef) {
return;
}
bool ndef = !sm.isDefault;
cc.nonDefaultValue.encode(*encoder, ndef);
if (ndef) {
sm.write(*encoder, prec);
}
}
void PreflateBlockPredictionModel::read(const PreflateStatisticsCounter::BlockPrediction& blockModel, const PreflateModelCodec& cc) {
blockType.build(blockModel.blockType, PreflateTokenBlock::DYNAMIC_HUFF, cc.MBprecision);
EOBMisprediction.build(blockModel.EOBMisprediction, 0, cc.MBprecision);
nonZeroPadding.build(blockModel.nonZeroPadding, 0, cc.MBprecisionP1);
}
void PreflateBlockPredictionModel::readFromStream(const PreflateModelCodec& cc) {
readSubModel(blockType, cc.blockFullDefault, cc, PreflateTokenBlock::DYNAMIC_HUFF, cc.MBprecision);
readSubModel(EOBMisprediction, cc.blockFullDefault, cc, 0, cc.MBprecision);
readSubModel(nonZeroPadding, cc.blockFullDefault, cc, 0, cc.MBprecisionP1);
}
void PreflateBlockPredictionModel::writeToStream(const PreflateModelCodec& cc) {
writeSubModel(blockType, cc.blockFullDefault, cc, PreflateTokenBlock::DYNAMIC_HUFF, cc.MBprecision);
writeSubModel(EOBMisprediction, cc.blockFullDefault, cc, 0, cc.MBprecision);
writeSubModel(nonZeroPadding, cc.blockFullDefault, cc, 0, cc.MBprecisionP1);
}
void PreflateTreeCodePredictionModel::read(const PreflateStatisticsCounter::TreeCodePrediction& treecodeModel, const PreflateModelCodec& cc) {
TCCountMisprediction.build(treecodeModel.TCCountMisprediction, 0, cc.MBprecision);
LCountMisprediction.build(treecodeModel.LCountMisprediction, 0, cc.MBprecision);
DCountMisprediction.build(treecodeModel.DCountMisprediction, 0, cc.MBprecision);
for (unsigned i = 0; i < 4; ++i) {
LDTypeMisprediction[i].build(treecodeModel.LDTypeMisprediction[i], 0);
}
LDTypeReplacementBase.build(treecodeModel.LDTypeReplacement, 0);
TCBitlengthCorrection.build(treecodeModel.TCBitlengthCorrection, 0);
LDBitlengthCorrection.build(treecodeModel.LDBitlengthCorrection, 0);
LDRepeatCountCorrection.build(treecodeModel.LDRepeatCountCorrection, 0);
deriveLDTypeReplacement();
}
void PreflateTreeCodePredictionModel::readFromStream(const PreflateModelCodec& cc) {
readSubModel(TCCountMisprediction, cc.treecodeFullDefault, cc, 0, cc.MBprecision);
readSubModel(LCountMisprediction, cc.treecodeFullDefault, cc, 0, cc.MBprecision);
readSubModel(DCountMisprediction, cc.treecodeFullDefault, cc, 0, cc.MBprecision);
for (unsigned i = 0; i < 4; ++i) {
readSubModel(LDTypeMisprediction[i], cc.treecodeFullDefault, cc, 0);
}
readSubModel(LDTypeReplacementBase, cc.treecodeFullDefault, cc, 0);
readSubModel(TCBitlengthCorrection, cc.treecodeFullDefault, cc, 0);
readSubModel(LDBitlengthCorrection, cc.treecodeFullDefault, cc, 0);
readSubModel(LDRepeatCountCorrection, cc.treecodeFullDefault, cc, 0);
deriveLDTypeReplacement();
}
void PreflateTreeCodePredictionModel::writeToStream(const PreflateModelCodec& cc) {
writeSubModel(TCCountMisprediction, cc.treecodeFullDefault, cc, 0, cc.MBprecision);
writeSubModel(LCountMisprediction, cc.treecodeFullDefault, cc, 0, cc.MBprecision);
writeSubModel(DCountMisprediction, cc.treecodeFullDefault, cc, 0, cc.MBprecision);
for (unsigned i = 0; i < 4; ++i) {
writeSubModel(LDTypeMisprediction[i], cc.treecodeFullDefault, cc, 0);
}
writeSubModel(LDTypeReplacementBase, cc.treecodeFullDefault, cc, 0);
writeSubModel(TCBitlengthCorrection, cc.treecodeFullDefault, cc, 0);
writeSubModel(LDBitlengthCorrection, cc.treecodeFullDefault, cc, 0);
writeSubModel(LDRepeatCountCorrection, cc.treecodeFullDefault, cc, 0);
}
void PreflateTreeCodePredictionModel::deriveLDTypeReplacement() {
unsigned arr[4], arr_mp[2], miss[4], hit[4], sumhit;
LDTypeReplacementBase.extract(arr);
for (unsigned i = 0; i < 4; ++i) {
LDTypeMisprediction[i].extract(arr_mp);
if (arr_mp[1] == 0) {
DerivedLDTypeReplacement[i].buildDefault(i);
} else {
if (arr_mp[0] == 0) {
arr_mp[1] = 1;
}
sumhit = 0;
for (unsigned j = 0; j < 4; ++j) {
hit[j] = arr[j] * arr_mp[0];
miss[j] = arr[j] * arr_mp[1];
sumhit += hit[j];
}
miss[i] = sumhit - hit[i];
// Avoid the sum of all entries to exceed 32bit
for (unsigned j = 0; j < 4; ++j) {
if (miss[j] > 0 && miss[j] < 16) {
miss[j] = 1;
} else {
miss[j] >>= 4;
}
}
DerivedLDTypeReplacement[i].build(miss, i);
}
}
}
void PreflateTokenPredictionModel::read(const PreflateStatisticsCounter::TokenPrediction& tokenModel, const PreflateModelCodec& cc) {
LITMisprediction.build(tokenModel.LITMisprediction, 0);
REFMisprediction.build(tokenModel.REFMisprediction, 0);
LENCorrection.build(tokenModel.LENCorrection, 0);
DISTAfterLenCorrection.build(tokenModel.DISTAfterLenCorrection, 0);
DISTOnlyCorrection.build(tokenModel.DISTOnlyCorrection, 0);
IrregularLen258Encoding.build(tokenModel.LEN258IrregularEncoding, 0);
}
void PreflateTokenPredictionModel::readFromStream(const PreflateModelCodec& cc) {
readSubModel(LITMisprediction, cc.tokenFullDefault, cc, 0);
readSubModel(REFMisprediction, cc.tokenFullDefault, cc, 0);
readSubModel(LENCorrection, cc.tokenFullDefault, cc, 0);
readSubModel(DISTAfterLenCorrection, cc.tokenFullDefault, cc, 0);
readSubModel(DISTOnlyCorrection, cc.tokenFullDefault, cc, 0);
readSubModel(IrregularLen258Encoding, cc.tokenFullDefault, cc, 0);
}
void PreflateTokenPredictionModel::writeToStream(const PreflateModelCodec& cc) {
writeSubModel(LITMisprediction, cc.tokenFullDefault, cc, 0);
writeSubModel(REFMisprediction, cc.tokenFullDefault, cc, 0);
writeSubModel(LENCorrection, cc.tokenFullDefault, cc, 0);
writeSubModel(DISTAfterLenCorrection, cc.tokenFullDefault, cc, 0);
writeSubModel(DISTOnlyCorrection, cc.tokenFullDefault, cc, 0);
writeSubModel(IrregularLen258Encoding, cc.tokenFullDefault, cc, 0);
}
PreflatePredictionModel::PreflatePredictionModel() {}
PreflatePredictionModel::~PreflatePredictionModel() {}
void PreflatePredictionModel::read(const PreflateStatisticsCounter& model, const PreflateModelCodec& cc) {
block.read(model.block, cc);
treecode.read(model.treecode, cc);
token.read(model.token, cc);
}
void PreflatePredictionModel::setEncoderStream(ArithmeticEncoder* codec) {
block.setEncoderStream(codec);
treecode.setEncoderStream(codec);
token.setEncoderStream(codec);
}
void PreflatePredictionModel::setDecoderStream(ArithmeticDecoder* codec) {
block.setDecoderStream(codec);
treecode.setDecoderStream(codec);
token.setDecoderStream(codec);
}
void PreflatePredictionModel::readFromStream(const PreflateModelCodec& cc) {
block.readFromStream(cc);
treecode.readFromStream(cc);
token.readFromStream(cc);
}
void PreflatePredictionModel::writeToStream(const PreflateModelCodec& cc) {
block.writeToStream(cc);
treecode.writeToStream(cc);
token.writeToStream(cc);
}
// ------------------------------------
PreflateModelCodec::PreflateModelCodec() {}
void PreflateModelCodec::initDefault() {
blockFullDefault = true;
treecodeFullDefault = true;
tokenFullDefault = true;
totalModels = 0;
defaultingModels = 0;
unsigned arr[2] = {1, 0};
nonDefaultValue.build(arr, 0);
MBprecision = 16;
MBprecisionP1 = 16;
}
void PreflateModelCodec::read(const PreflateStatisticsCounter& m) {
totalModels = 0;
defaultingModels = 0;
unsigned total_block = m.block.totalModels();
unsigned defaulting_block = m.block.checkDefaultModels();
blockFullDefault = total_block == defaulting_block;
if (!blockFullDefault) {
totalModels += total_block;
defaultingModels += defaulting_block;
}
unsigned total_tree = m.treecode.totalModels();
unsigned defaulting_tree = m.treecode.checkDefaultModels();
treecodeFullDefault = total_tree == defaulting_tree;
if (!treecodeFullDefault) {
totalModels += total_tree;
defaultingModels += defaulting_tree;
}
unsigned total_token = m.token.totalModels();
unsigned defaulting_token = m.token.checkDefaultModels();
tokenFullDefault = total_token == defaulting_token;
if (!tokenFullDefault) {
totalModels += total_token;
defaultingModels += defaulting_token;
}
if (totalModels > 0) {
unsigned arr[2] = {defaultingModels, totalModels - defaultingModels};
nonDefaultValue.build(arr, 0);
}
MBprecision = 16;
MBprecisionP1 = 16;
}
void PreflateModelCodec::readFromStream(ArithmeticDecoder& codec) {
blockFullDefault = codec.decodeBits(1);
treecodeFullDefault = codec.decodeBits(1);
tokenFullDefault = codec.decodeBits(1);
totalModels = 0;
if (!blockFullDefault) {
totalModels += PreflateStatisticsCounter::BlockPrediction::totalModels();
}
if (!treecodeFullDefault) {
totalModels += PreflateStatisticsCounter::TreeCodePrediction::totalModels();
}
if (!tokenFullDefault) {
totalModels += PreflateStatisticsCounter::TokenPrediction::totalModels();
}
defaultingModels = PreflateBaseModel::decodeValue(codec, bitLength(totalModels));
if (totalModels) {
unsigned arr[2] = {defaultingModels, totalModels - defaultingModels};
nonDefaultValue.build(arr, 0);
}
MBprecision = 16;
MBprecisionP1 = 16;
}
void PreflateModelCodec::writeToStream(ArithmeticEncoder& codec) {
codec.encodeBits(blockFullDefault, 1);
codec.encodeBits(treecodeFullDefault, 1);
codec.encodeBits(tokenFullDefault, 1);
codec.encodeBits(defaultingModels, bitLength(totalModels));
}
// ------------------------------------
PreflatePredictionEncoder::PreflatePredictionEncoder()
: storage(nullptr)
, bos(nullptr)
, encoder(nullptr)
{}
void PreflatePredictionEncoder::start(const PreflatePredictionModel& model_, const PreflateParameters& params_,
const unsigned modelId_) {
PreflatePredictionModel::operator =(model_);
params = params_;
modelid = modelId_;
storage = new MemStream;
bos = new BitOutputStream(*storage);
encoder = new ArithmeticEncoder(*bos);
setEncoderStream(encoder);
}
std::vector<uint8_t> PreflatePredictionEncoder::end() {
setEncoderStream(nullptr);
encoder->flush();
delete encoder;
bos->flush();
delete bos;
std::vector<unsigned char> result = storage->extractData();
delete storage;
return result;
}
PreflatePredictionDecoder::PreflatePredictionDecoder()
: storage(nullptr)
, bis(nullptr)
, decoder(nullptr) {}
void PreflatePredictionDecoder::start(const PreflatePredictionModel& model_, const PreflateParameters& params_,
const std::vector<uint8_t>& storage_, size_t off0, size_t size) {
PreflatePredictionModel::operator =(model_);
params = params_;
storage = new MemStream(storage_, off0, size);
bis = new BitInputStream(*storage);
decoder = new ArithmeticDecoder(*bis);
setDecoderStream(decoder);
}
void PreflatePredictionDecoder::end() {
setDecoderStream(nullptr);
delete decoder;
delete bis;
delete storage;
decoder = nullptr;
bis = nullptr;
storage = nullptr;
}
// ------------------------------------
PreflateMetaEncoder::PreflateMetaEncoder()
: inError(false) {
}
PreflateMetaEncoder::~PreflateMetaEncoder() {}
unsigned PreflateMetaEncoder::addModel(const PreflateStatisticsCounter& counter, const PreflateParameters& params) {
unsigned modelId = modelList.size();
modelType m;
m.counter = counter;
m.mcodec.read(counter);
m.model.read(counter, m.mcodec);
m.params = params;
m.writtenId = 0;
modelList.push_back(m);
return modelId;
}
bool PreflateMetaEncoder::beginMetaBlockWithModel(PreflatePredictionEncoder& encoder, const unsigned modelId) {
if (modelId >= modelList.size()) {
return false;
}
encoder.start(modelList[modelId].model, modelList[modelId].params, modelId);
return true;
}
bool PreflateMetaEncoder::endMetaBlock(PreflatePredictionEncoder& encoder, const size_t uncompressed) {
if (encoder.modelId() >= modelList.size()) {
return false;
}
metaBlockInfo m;
std::vector<uint8_t> result = encoder.end();
m.modelId = encoder.modelId();
m.reconSize = result.size();
m.uncompressedSize = uncompressed;
blockList.push_back(m);
reconData.insert(reconData.end(), result.begin(), result.end());
return true;
}
std::vector<unsigned char> PreflateMetaEncoder::finish() {
MemStream mem;
BitOutputStream bos(mem);
bos.put(0, 1); // no extension used
bos.put(blockList.size() > 1, 1); // 1 or more meta blocks
if (blockList.size() > 1) {
bos.putVLI(blockList.size() - 2);
}
enum Mode {
CREATE_NEW_MODEL /*, REUSE_LAST_MODEL, REUSE_PREVIOUS_MODEL*/
};
for (unsigned i = 0, n = blockList.size(); i < n; ++i) {
const metaBlockInfo& mb = blockList[i];
Mode mode = CREATE_NEW_MODEL;
if (i > 0) {
bos.put(3, 2); // create new model
}
switch (mode) {
case CREATE_NEW_MODEL:
{
modelType& mt = modelList[mb.modelId];
bool perfectZLIB = mt.mcodec.blockFullDefault && mt.mcodec.treecodeFullDefault && mt.mcodec.tokenFullDefault
&& mt.params.zlibCompatible;
bos.put(!perfectZLIB, 1); // perfect zlib model
bos.put(mt.params.compLevel, 4);
bos.put(mt.params.memLevel, 4);
bos.put(mt.params.windowBits - 8, 3);
if (!perfectZLIB) {
bos.put(mt.params.zlibCompatible, 1);
if (!mt.params.zlibCompatible) {
bos.put(mt.params.veryFarMatchesDetected, 1);
bos.put(mt.params.matchesToStartDetected, 1);
}
bos.put(mt.params.log2OfMaxChainDepthM1, 4);
MemStream tmp_data;
{
BitOutputStream tmp_bos(tmp_data);
ArithmeticEncoder tmp_codec(tmp_bos);
mt.mcodec.writeToStream(tmp_codec);
mt.model.setEncoderStream(&tmp_codec);
mt.model.writeToStream(mt.mcodec);
mt.model.setEncoderStream(nullptr);
tmp_codec.flush();
tmp_bos.flush();
}
std::vector<uint8_t> tmp_res = tmp_data.extractData();
// write length (vli) and model data
bos.putVLI(tmp_res.size());
bos.putBytes(tmp_res.data(), tmp_res.size());
}
break;
}
}
// for the last block, the size of the reconstruction data and processed uncompressed data
// is implicitly going to end of stream
// -------------------
if (i != n - 1) {
bos.putVLI(mb.reconSize);
bos.putVLI(mb.uncompressedSize);
}
}
bos.flush();
std::vector<uint8_t> result = mem.extractData();
result.insert(result.end(), reconData.begin(), reconData.end());
return result;
}
PreflateMetaDecoder::PreflateMetaDecoder(const std::vector<uint8_t>& reconData_, const uint64_t uncompressedSize_)
: inError(false)
, reconData(reconData_)
, uncompressedSize(uncompressedSize_) {
if (reconData.size() == 0) {
inError = true;
return;
}
MemStream mem(reconData);
BitInputStream bis(mem);
bool extension = bis.get(1);
if (extension) {
inError = true;
return;
}
bool singleBlock = bis.get(1) == 0;
size_t blockCount;
if (singleBlock) {
blockCount = 1;
} else {
blockCount = 2 + bis.getVLI();
}
enum Mode {
CREATE_NEW_MODEL /*, REUSE_LAST_MODEL, REUSE_PREVIOUS_MODEL*/
};
for (size_t i = 0; i < blockCount; ++i) {
metaBlockInfo mb;
Mode mode = CREATE_NEW_MODEL;
if (i > 0) {
if (bis.get(2) != 3) { // must create new model for the moment
inError = true;
return;
}
}
switch (mode) {
case CREATE_NEW_MODEL:
{
modelType mt;
memset(&mt, 0, sizeof(mt));
bool perfectZLIB = bis.get(1) == 0;
mt.params.compLevel = bis.get(4);
mt.params.memLevel = bis.get(4);
mt.params.windowBits = bis.get(3) + 8;
if (perfectZLIB) {
mt.params.zlibCompatible = true;
mt.mcodec.blockFullDefault = true;
mt.mcodec.treecodeFullDefault = true;
mt.mcodec.tokenFullDefault = true;
mt.model.readFromStream(mt.mcodec); // initialize with default model
} else {
mt.params.zlibCompatible = bis.get(1);
if (!mt.params.zlibCompatible) {
mt.params.veryFarMatchesDetected = bis.get(1);
mt.params.matchesToStartDetected = bis.get(1);
}
mt.params.log2OfMaxChainDepthM1 = bis.get(4);
// read length (vli) and model data
size_t res_size = bis.getVLI();
// interpret model data
{
MemStream tmp_mem;
bis.copyBytesTo(tmp_mem, res_size);
tmp_mem.seek(0);
BitInputStream tmp_bis(tmp_mem);
ArithmeticDecoder tmp_codec(tmp_bis);
mt.mcodec.readFromStream(tmp_codec);
mt.model.setDecoderStream(&tmp_codec);
mt.model.readFromStream(mt.mcodec);
mt.model.setDecoderStream(nullptr);
}
}
mb.modelId = modelList.size();
modelList.push_back(mt);
break;
}
}
// for the last block, the size of the reconstruction data and processed uncompressed data
// is implicitly going to end of stream
// -------------------
if (i != blockCount - 1) {
mb.reconSize = bis.getVLI();
mb.uncompressedSize = bis.getVLI();
}
blockList.push_back(mb);
}
bis.skipToByte();
size_t reconStart = bis.bitPos() >> 3;
uint64_t uncStart = 0;
for (size_t i = 0; i < blockCount; ++i) {
blockList[i].reconStartOfs = reconStart;
blockList[i].uncompressedStartOfs = uncStart;
if (i != blockCount - 1) {
reconStart += blockList[i].reconSize;
uncStart += blockList[i].uncompressedSize;
if (reconStart > reconData.size() || uncStart > uncompressedSize) {
inError = true;
return;
}
} else {
blockList[i].reconSize = reconData.size() - blockList[i].reconStartOfs;
blockList[i].uncompressedSize = uncompressedSize - blockList[i].uncompressedStartOfs;
}
}
}
PreflateMetaDecoder::~PreflateMetaDecoder() {}
bool PreflateMetaDecoder::beginMetaBlock(PreflatePredictionDecoder& decoder, PreflateParameters& params, const size_t index) {
if (index >= blockList.size()) {
return false;
}
const auto& mb = blockList[index];
if (mb.modelId >= modelList.size()) {
return false;
}
const auto& model = modelList[mb.modelId];
params = model.params;
decoder.start(model.model, model.params, reconData, mb.reconStartOfs, mb.reconSize);
return true;
}
bool PreflateMetaDecoder::endMetaBlock(PreflatePredictionDecoder& decoder) {
decoder.end();
return true;
}
void PreflateMetaDecoder::finish() {}

View File

@@ -0,0 +1,628 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PREFLATE_STATISTICAL_CODEC_H
#define PREFLATE_STATISTICAL_CODEC_H
#include <vector>
#include "support/arithmetic_coder.h"
#include "support/bit_helper.h"
#include "support/bitstream.h"
#include "support/memstream.h"
#include "preflate_parameter_estimator.h"
#include "preflate_statistical_model.h"
template <unsigned N>
struct PreflateSubModel {
static const unsigned L = N;
PreflateSubModel() {}
void build(const unsigned(&arr)[N], const unsigned defval, const uint8_t prec = 16) {
build_impl(arr, defval, prec);
}
void buildDefault(const unsigned defval);
void extract(unsigned(&arr)[N]) {
for (unsigned i = 0; i < N; ++i) {
arr[i] = bounds[rids[i] + 1] - bounds[rids[i]];
}
}
void read(ArithmeticDecoder&, const uint8_t);
void write(ArithmeticEncoder&, const uint8_t) const;
void encode(ArithmeticEncoder& codec, const unsigned item) const {
if (!isFixed) {
size_t idx = rids[item];
codec.encodeShiftScale(scaleDownBits, scaledDownBounds[idx], scaledDownBounds[idx + 1]);
}
}
unsigned decode(ArithmeticDecoder& codec) const {
if (isFixed) {
return ids[N - 1];
}
unsigned val = codec.decodeShiftScale(scaleDownBits, scaledDownBounds, N);
return ids[val];
}
bool isEqualTo(const PreflateSubModel<N>& m) const;
unsigned bounds[N + 1];
unsigned scaledDownBounds[N + 1];
unsigned short ids[N + 1], rids[N + 1];
uint8_t scaleDownBits;
bool isDefault, isFixed;
private:
void build_impl(const unsigned* arr, const unsigned defval, const uint8_t prec);
void build_scale_down();
template <unsigned NEG, unsigned POS>
friend struct PreflateCorrectionSubModel;
};
template <>
struct PreflateSubModel<0u> {
static const unsigned L = 0u;
PreflateSubModel() {}
// void build(const unsigned(&arr)[1], const unsigned defval, const uint8_t prec = 16) {}
void buildDefault(const unsigned defval) {}
void read(ArithmeticDecoder&, const uint8_t) {}
void write(ArithmeticEncoder&, const uint8_t) const {}
void encode(ArithmeticEncoder& codec, const unsigned item) const {}
unsigned decode(ArithmeticDecoder& codec) const { return 0; }
bool isEqualTo(const PreflateSubModel<0u>& m) const { return true; }
enum { isDefault = 1, isFixed = 1 };
private:
void build_impl(const unsigned* arr, const unsigned defval, const uint8_t prec) {}
template <unsigned NEG, unsigned POS>
friend struct PreflateCorrectionSubModel;
};
template <unsigned NEG, unsigned POS>
struct PreflateCorrectionSubModel {
static const unsigned LNEG = NEG;
static const unsigned LPOS = POS;
PreflateCorrectionSubModel() {}
void build(const unsigned(&arr)[NEG + 1 + POS], const int defval, const uint8_t prec = 16) {
build_impl(arr, defval, prec);
}
void buildDefault(const unsigned defval);
void read(ArithmeticDecoder&, const uint8_t);
void write(ArithmeticEncoder&, const uint8_t) const;
void encode(ArithmeticEncoder& codec, const unsigned actvalue,
const unsigned refvalue,
const unsigned minvalue,
const unsigned maxvalue) {
int diff = actvalue - refvalue;
if (diff == 0) {
sign.encode(codec, 0);
return;
}
if (diff > 0) {
sign.encode(codec, 1);
if (diff >= (int)POS) {
pos.encode(codec, POS - 1);
codec.encodeBits(diff - POS, bitLength(maxvalue - POS - refvalue));
} else {
pos.encode(codec, diff - 1);
}
} else {
sign.encode(codec, 2);
if (-diff >= (int)NEG) {
neg.encode(codec, NEG - 1);
codec.encodeBits(-diff - NEG, bitLength(refvalue - NEG - minvalue));
} else {
neg.encode(codec, -diff - 1);
}
}
}
unsigned decode(ArithmeticDecoder& codec,
const unsigned refvalue,
const unsigned minvalue,
const unsigned maxvalue) {
unsigned s = sign.decode(codec);
if (s == 0) {
return refvalue;
}
if (s == 1) {
int diff = pos.decode(codec);
if (diff >= (int)(POS - 1)) {
return refvalue + codec.decodeBits(bitLength(maxvalue - POS - refvalue)) + POS;
} else {
return refvalue + diff + 1;
}
} else {
int diff = neg.decode(codec);
if (diff >= (int)(NEG - 1)) {
return refvalue - codec.decodeBits(bitLength(refvalue - NEG - minvalue)) - NEG;
} else {
return refvalue - diff - 1;
}
}
}
bool isEqualTo(const PreflateCorrectionSubModel<NEG, POS>& m) const;
PreflateSubModel<3> sign;
PreflateSubModel<POS> pos;
PreflateSubModel<NEG> neg;
bool isDefault;
private:
void build_impl(const unsigned* arr, const int defval, const uint8_t prec);
};
struct PreflateModelCodec {
PreflateSubModel<2> nonDefaultValue;
uint8_t MBprecision;
uint8_t MBprecisionP1;
bool blockFullDefault;
bool treecodeFullDefault;
bool tokenFullDefault;
unsigned totalModels, defaultingModels;
PreflateModelCodec();
void initDefault();
void read(const PreflateStatisticsCounter&);
void readFromStream(ArithmeticDecoder&);
void writeToStream(ArithmeticEncoder&);
};
struct PreflateBaseModel {
public:
PreflateBaseModel();
void setEncoderStream(ArithmeticEncoder*);
void setDecoderStream(ArithmeticDecoder*);
static void encodeValue(ArithmeticEncoder& codec, const unsigned value, const unsigned maxBits) {
#ifdef _DEBUG
_ASSERT(value < (1 << maxBits));
#endif
return codec.encodeBits(value, maxBits);
}
void encodeValue(const unsigned value, const unsigned maxBits) {
encodeValue(*encoder, value, maxBits);
}
static unsigned decodeValue(ArithmeticDecoder& codec, const unsigned maxBits) {
return codec.decodeBits(maxBits);
}
unsigned decodeValue(const unsigned maxBits) {
return decodeValue(*decoder, maxBits);
}
protected:
template <unsigned N>
void readSubModel(PreflateSubModel<N>& sm, const bool isFullDef, const PreflateModelCodec& cc,
const unsigned defVal, const uint8_t prec = 16);
template <unsigned N, unsigned M>
void readSubModel(PreflateCorrectionSubModel<N, M>& sm, const bool isFullDef, const PreflateModelCodec& cc,
const unsigned defVal, const uint8_t prec = 16);
template <unsigned N>
void writeSubModel(const PreflateSubModel<N>& sm, const bool isFullDef, const PreflateModelCodec& cc,
const unsigned defVal, const uint8_t prec = 16);
template <unsigned N, unsigned M>
void writeSubModel(const PreflateCorrectionSubModel<N, M>& sm, const bool isFullDef, const PreflateModelCodec& cc,
const unsigned defVal, const uint8_t prec = 16);
ArithmeticEncoder* encoder;
ArithmeticDecoder* decoder;
};
struct PreflateBlockPredictionModel : public PreflateBaseModel {
public:
void read(const PreflateStatisticsCounter::BlockPrediction&, const PreflateModelCodec&);
void readFromStream(const PreflateModelCodec&);
void writeToStream(const PreflateModelCodec&);
unsigned decodeBlockType() {
return blockType.decode(*decoder);
}
bool decodeEOBMisprediction() {
return EOBMisprediction.decode(*decoder);
}
bool decodeNonZeroPadding() {
return nonZeroPadding.decode(*decoder);
}
void encodeBlockType(const unsigned type) {
blockType.encode(*encoder, type);
}
void encodeEOBMisprediction(const bool misprediction) {
EOBMisprediction.encode(*encoder, misprediction);
}
void encodeNonZeroPadding(const bool nonzeropadding) {
nonZeroPadding.encode(*encoder, nonzeropadding);
}
bool isEqualTo(const PreflateBlockPredictionModel& m) const;
private:
PreflateSubModel<3> blockType;
PreflateSubModel<2> EOBMisprediction;
PreflateSubModel<2> nonZeroPadding;
unsigned precision;
};
struct PreflateTreeCodePredictionModel : public PreflateBaseModel {
public:
void read(const PreflateStatisticsCounter::TreeCodePrediction&, const PreflateModelCodec& cc);
void readFromStream(const PreflateModelCodec& cc);
void writeToStream(const PreflateModelCodec& cc);
bool decodeTreeCodeCountMisprediction() {
return TCCountMisprediction.decode(*decoder);
}
bool decodeLiteralCountMisprediction() {
return LCountMisprediction.decode(*decoder);
}
bool decodeDistanceCountMisprediction() {
return DCountMisprediction.decode(*decoder);
}
int decodeTreeCodeBitLengthCorrection(unsigned predval) {
return TCBitlengthCorrection.decode(*decoder, predval, 0, 7);
}
unsigned decodeLDTypeCorrection(unsigned predtype) {
return DerivedLDTypeReplacement[predtype].decode(*decoder);
}
unsigned decodeRepeatCountCorrection(const unsigned predval, const unsigned ldtype) {
static const uint8_t minVal[4] = {0, 3, 3, 11};
static const uint8_t lenVal[4] = {0, 3, 7, 127};
return LDRepeatCountCorrection.decode(*decoder, predval, minVal[ldtype], minVal[ldtype] + lenVal[ldtype]);
}
int decodeLDBitLengthCorrection(unsigned predval) {
return LDBitlengthCorrection.decode(*decoder, predval, 0, 15);
}
void encodeTreeCodeCountMisprediction(const bool misprediction) {
TCCountMisprediction.encode(*encoder, misprediction);
}
void encodeLiteralCountMisprediction(const bool misprediction) {
LCountMisprediction.encode(*encoder, misprediction);
}
void encodeDistanceCountMisprediction(const bool misprediction) {
DCountMisprediction.encode(*encoder, misprediction);
}
void encodeTreeCodeBitLengthCorrection(const unsigned predval, const unsigned actval) {
TCBitlengthCorrection.encode(*encoder, actval, predval, 0, 7);
}
void encodeLDTypeCorrection(const unsigned predval, const unsigned actval) {
DerivedLDTypeReplacement[predval].encode(*encoder, actval);
}
void encodeRepeatCountCorrection(const unsigned predval, const unsigned actval, unsigned ldtype) {
static const uint8_t minVal[4] = {0, 3, 3, 11};
static const uint8_t lenVal[4] = {0, 3, 7, 127};
LDRepeatCountCorrection.encode(*encoder, actval, predval, minVal[ldtype], minVal[ldtype] + lenVal[ldtype]);
}
void encodeLDBitLengthCorrection(const unsigned predval, const unsigned actval) {
LDBitlengthCorrection.encode(*encoder, actval, predval, 0, 15);
}
bool isEqualTo(const PreflateTreeCodePredictionModel& m) const;
private:
void deriveLDTypeReplacement();
PreflateSubModel<2> TCCountMisprediction;
PreflateSubModel<2> LCountMisprediction;
PreflateSubModel<2> DCountMisprediction;
PreflateSubModel<2> LDTypeMisprediction[4];
PreflateSubModel<4> LDTypeReplacementBase;
PreflateCorrectionSubModel<1, 1> LDRepeatCountCorrection;
PreflateCorrectionSubModel<3, 3> TCBitlengthCorrection;
PreflateCorrectionSubModel<4, 4> LDBitlengthCorrection;
PreflateSubModel<4> DerivedLDTypeReplacement[4];
};
struct PreflateTokenPredictionModel : public PreflateBaseModel {
public:
void read(const PreflateStatisticsCounter::TokenPrediction&, const PreflateModelCodec& cc);
void readFromStream(const PreflateModelCodec& cc);
void writeToStream(const PreflateModelCodec& cc);
bool decodeLiteralPredictionWrong() {
return LITMisprediction.decode(*decoder);
}
bool decodeReferencePredictionWrong() {
return REFMisprediction.decode(*decoder);
}
int decodeLenCorrection(const unsigned predval) {
return LENCorrection.decode(*decoder, predval, 3, 258);
}
unsigned decodeDistOnlyCorrection() {
return DISTOnlyCorrection.decode(*decoder, 0, 0, 32767);
}
unsigned decodeDistAfterLenCorrection() {
return DISTAfterLenCorrection.decode(*decoder, 0, 0, 32767);
}
bool decodeIrregularLen258() {
return IrregularLen258Encoding.decode(*decoder);
}
void encodeLiteralPredictionWrong(const bool misprediction) {
LITMisprediction.encode(*encoder, misprediction);
}
void encodeReferencePredictionWrong(const bool misprediction) {
REFMisprediction.encode(*encoder, misprediction);
}
void encodeLenCorrection(const unsigned predval, const unsigned actval) {
LENCorrection.encode(*encoder, actval, predval, 3, 258);
}
void encodeDistOnlyCorrection(const unsigned hops) {
DISTOnlyCorrection.encode(*encoder, hops, 0, 0, 32767);
}
void encodeDistAfterLenCorrection(const unsigned hops) {
DISTAfterLenCorrection.encode(*encoder, hops, 0, 0, 32767);
}
void encodeIrregularLen258(const bool irregular) {
IrregularLen258Encoding.encode(*encoder, irregular);
}
bool isEqualTo(const PreflateTokenPredictionModel& m) const;
private:
PreflateSubModel<2> LITMisprediction;
PreflateSubModel<2> REFMisprediction;
PreflateCorrectionSubModel<6, 6> LENCorrection;
PreflateCorrectionSubModel<0, 3> DISTAfterLenCorrection;
PreflateCorrectionSubModel<0, 3> DISTOnlyCorrection;
PreflateSubModel<2> IrregularLen258Encoding;
};
struct PreflatePredictionModel {
PreflatePredictionModel();
~PreflatePredictionModel();
void read(const PreflateStatisticsCounter& model, const PreflateModelCodec& cc);
void setEncoderStream(ArithmeticEncoder* codec);
void setDecoderStream(ArithmeticDecoder* codec);
void readFromStream(const PreflateModelCodec& cc);
void writeToStream(const PreflateModelCodec& cc);
bool isEqualTo(const PreflatePredictionModel& m) const;
protected:
// Blocks
PreflateBlockPredictionModel block;
// Tree codes
PreflateTreeCodePredictionModel treecode;
// Tokens
PreflateTokenPredictionModel token;
};
struct PreflatePredictionEncoder : public PreflatePredictionModel {
PreflatePredictionEncoder();
void start(const PreflatePredictionModel&, const PreflateParameters&, const unsigned modelId);
std::vector<uint8_t> end();
void encodeValue(const unsigned value, const unsigned maxBits) {
encoder->encodeBits(value, maxBits);
}
// Block
void encodeBlockType(const unsigned type) {
block.encodeBlockType(type);
}
void encodeEOBMisprediction(const bool misprediction) {
block.encodeEOBMisprediction(misprediction);
}
void encodeNonZeroPadding(const bool nonzeropadding) {
block.encodeNonZeroPadding(nonzeropadding);
}
// Tree codes
void encodeTreeCodeCountMisprediction(const bool misprediction) {
treecode.encodeTreeCodeCountMisprediction(misprediction);
}
void encodeLiteralCountMisprediction(const bool misprediction) {
treecode.encodeLiteralCountMisprediction(misprediction);
}
void encodeDistanceCountMisprediction(const bool misprediction) {
treecode.encodeDistanceCountMisprediction(misprediction);
}
void encodeTreeCodeBitLengthCorrection(const unsigned predval, const unsigned actval) {
treecode.encodeTreeCodeBitLengthCorrection(predval, actval);
}
void encodeLDTypeCorrection(const unsigned predval, const unsigned actval) {
treecode.encodeLDTypeCorrection(predval, actval);
}
void encodeRepeatCountCorrection(const unsigned predval, const unsigned actval, unsigned ldtype) {
treecode.encodeRepeatCountCorrection(predval, actval, ldtype);
}
void encodeLDBitLengthCorrection(const unsigned predval, const unsigned actval) {
treecode.encodeLDBitLengthCorrection(predval, actval);
}
// Token
void encodeLiteralPredictionWrong(const bool misprediction) {
token.encodeLiteralPredictionWrong(misprediction);
}
void encodeReferencePredictionWrong(const bool misprediction) {
token.encodeReferencePredictionWrong(misprediction);
}
void encodeLenCorrection(const unsigned predval, const unsigned actval) {
token.encodeLenCorrection(predval, actval);
}
void encodeDistOnlyCorrection(const unsigned hops) {
token.encodeDistOnlyCorrection(hops);
}
void encodeDistAfterLenCorrection(const unsigned hops) {
token.encodeDistAfterLenCorrection(hops);
}
void encodeIrregularLen258(const bool irregular) {
token.encodeIrregularLen258(irregular);
}
const PreflateParameters& parameters() const {
return params;
}
unsigned modelId() const {
return modelid;
}
private:
PreflateParameters params;
unsigned modelid;
MemStream* storage;
BitOutputStream* bos;
ArithmeticEncoder* encoder;
};
struct PreflatePredictionDecoder : public PreflatePredictionModel {
PreflatePredictionDecoder();
void start(const PreflatePredictionModel&, const PreflateParameters&,
const std::vector<uint8_t>&, size_t off0, size_t size);
void end();
unsigned decodeValue(const unsigned maxBits) {
return decoder->decodeBits(maxBits);
}
// Block
unsigned decodeBlockType() {
return block.decodeBlockType();
}
bool decodeEOBMisprediction() {
return block.decodeEOBMisprediction();
}
bool decodeNonZeroPadding() {
return block.decodeNonZeroPadding();
}
// Tree codes
bool decodeTreeCodeCountMisprediction() {
return treecode.decodeTreeCodeCountMisprediction();
}
bool decodeLiteralCountMisprediction() {
return treecode.decodeLiteralCountMisprediction();
}
bool decodeDistanceCountMisprediction() {
return treecode.decodeDistanceCountMisprediction();
}
int decodeTreeCodeBitLengthCorrection(unsigned predval) {
return treecode.decodeTreeCodeBitLengthCorrection(predval);
}
unsigned decodeLDTypeCorrection(unsigned predtype) {
return treecode.decodeLDTypeCorrection(predtype);
}
unsigned decodeRepeatCountCorrection(const unsigned predval, const unsigned ldtype) {
return treecode.decodeRepeatCountCorrection(predval, ldtype);
}
unsigned decodeLDBitLengthCorrection(unsigned predval) {
return treecode.decodeLDBitLengthCorrection(predval);
}
// Token
bool decodeLiteralPredictionWrong() {
return token.decodeLiteralPredictionWrong();
}
bool decodeReferencePredictionWrong() {
return token.decodeReferencePredictionWrong();
}
int decodeLenCorrection(const unsigned predval) {
return token.decodeLenCorrection(predval);
}
unsigned decodeDistOnlyCorrection() {
return token.decodeDistOnlyCorrection();
}
unsigned decodeDistAfterLenCorrection() {
return token.decodeDistAfterLenCorrection();
}
bool decodeIrregularLen258() {
return token.decodeIrregularLen258();
}
private:
PreflateParameters params;
MemStream* storage;
BitInputStream* bis;
ArithmeticDecoder* decoder;
};
struct PreflateMetaEncoder {
PreflateMetaEncoder();
~PreflateMetaEncoder();
bool error() const {
return inError;
}
unsigned addModel(const PreflateStatisticsCounter&, const PreflateParameters&);
bool beginMetaBlockWithModel(PreflatePredictionEncoder&, const unsigned modelId);
bool endMetaBlock(PreflatePredictionEncoder&, const size_t uncompressed);
std::vector<unsigned char> finish();
private:
struct modelType {
unsigned writtenId;
PreflateStatisticsCounter counter;
PreflatePredictionModel model;
PreflateParameters params;
PreflateModelCodec mcodec;
};
struct metaBlockInfo {
unsigned modelId;
size_t reconSize;
size_t uncompressedSize;
};
bool inError;
std::vector<modelType> modelList;
std::vector<metaBlockInfo> blockList;
std::vector<uint8_t> reconData;
};
struct PreflateMetaDecoder {
PreflateMetaDecoder(const std::vector<uint8_t>& reconData, const uint64_t uncompressedSize);
~PreflateMetaDecoder();
bool error() const {
return inError;
}
size_t metaBlockCount() const {
return blockList.size();
}
uint64_t metaBlockUncompressedStartOfs(const size_t metaBlockId) const {
return blockList[metaBlockId].uncompressedStartOfs;
}
size_t metaBlockUncompressedSize(const size_t metaBlockId) const {
return blockList[metaBlockId].uncompressedSize;
}
bool beginMetaBlock(PreflatePredictionDecoder&, PreflateParameters&, const size_t index);
bool endMetaBlock(PreflatePredictionDecoder&);
void finish();
private:
struct modelType {
PreflatePredictionModel model;
PreflateParameters params;
PreflateModelCodec mcodec;
};
struct metaBlockInfo {
unsigned modelId;
size_t reconStartOfs;
size_t reconSize;
uint64_t uncompressedStartOfs;
uint64_t uncompressedSize;
};
bool inError;
const std::vector<uint8_t>& reconData;
const uint64_t uncompressedSize;
std::vector<modelType> modelList;
std::vector<metaBlockInfo> blockList;
};
bool isEqual(const PreflatePredictionModel&, const PreflatePredictionModel&);
#endif /* PREFLATE_STATISTICAL_CODEC_H */

View File

@@ -0,0 +1,187 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <algorithm>
#include <string.h>
#include "preflate_statistical_codec.h"
#include "preflate_statistical_model.h"
#include "support/array_helper.h"
#include "support/bit_helper.h"
#include <stdint.h>
template <unsigned N>
bool PreflateSubModel<N>::isEqualTo(const PreflateSubModel<N>& m) const {
if (N == 0 || m.bounds[N] == 0) {
return true;
}
for (unsigned i = 0; i < N; ++i) {
if (bounds[i] != m.bounds[i]) {
return false;
}
if (bounds[i + 1] > 0 && ids[i] != m.ids[i]) {
return false;
}
}
if (bounds[N] != m.bounds[N]) {
return false;
}
return true;
}
template <unsigned NEG, unsigned POS>
bool PreflateCorrectionSubModel<NEG, POS>::isEqualTo(const PreflateCorrectionSubModel<NEG, POS>& m) const {
return sign.isEqualTo(m.sign)
&& pos.isEqualTo(m.pos)
&& neg.isEqualTo(m.neg);
}
bool PreflateBlockPredictionModel::isEqualTo(const PreflateBlockPredictionModel& m) const {
return blockType.isEqualTo(m.blockType)
&& EOBMisprediction.isEqualTo(m.EOBMisprediction)
&& nonZeroPadding.isEqualTo(m.nonZeroPadding);
}
bool PreflateTreeCodePredictionModel::isEqualTo(const PreflateTreeCodePredictionModel& m) const {
return TCBitlengthCorrection.isEqualTo(m.TCBitlengthCorrection)
&& TCCountMisprediction.isEqualTo(m.TCCountMisprediction)
&& LCountMisprediction.isEqualTo(m.LCountMisprediction)
&& DCountMisprediction.isEqualTo(m.DCountMisprediction)
&& LDTypeMisprediction[0].isEqualTo(m.LDTypeMisprediction[0])
&& LDTypeMisprediction[1].isEqualTo(m.LDTypeMisprediction[1])
&& LDTypeMisprediction[2].isEqualTo(m.LDTypeMisprediction[2])
&& LDTypeMisprediction[3].isEqualTo(m.LDTypeMisprediction[3])
&& LDTypeReplacementBase.isEqualTo(m.LDTypeReplacementBase)
&& LDRepeatCountCorrection.isEqualTo(m.LDRepeatCountCorrection)
&& LDBitlengthCorrection.isEqualTo(m.LDBitlengthCorrection);
}
bool PreflateTokenPredictionModel::isEqualTo(const PreflateTokenPredictionModel& m) const {
return LITMisprediction.isEqualTo(m.LITMisprediction)
&& REFMisprediction.isEqualTo(m.REFMisprediction)
&& LENCorrection.isEqualTo(m.LENCorrection)
&& DISTAfterLenCorrection.isEqualTo(m.DISTAfterLenCorrection)
&& DISTOnlyCorrection.isEqualTo(m.DISTOnlyCorrection)
&& IrregularLen258Encoding.isEqualTo(m.IrregularLen258Encoding);
}
bool PreflatePredictionModel::isEqualTo(const PreflatePredictionModel& m) const {
return block.isEqualTo(m.block)
&& treecode.isEqualTo(m.treecode)
&& token.isEqualTo(m.token);
}
bool isEqual(const PreflatePredictionModel& m1, const PreflatePredictionModel& m2) {
return m1.isEqualTo(m2);
}
// ----------------------------
void printFlagStatistics(const char *txt, unsigned(&flag)[2]) {
if (flag[1]) {
printf("%s %g%% (%d)", txt, flag[1] * 100.0 / (flag[0] + flag[1]), flag[0] + flag[1]);
}
}
void printCorrectionStatistics(const char *txt,
unsigned data[], unsigned size, unsigned sum, unsigned offset) {
if (data[offset] == sum) {
return;
}
bool on = false;
for (unsigned i = 0; i < size; ++i) {
if (data[i]) {
if (!on) {
printf("%s:", txt);
}
on = true;
if (i != offset && (i == 0 || i + 1 == size)) {
printf(" %sx %g%%", i == 0 ? "-" : "+", data[i] * 100.0 / sum);
} else {
printf(" %s%d %g%%", i == offset ? "" : (i < offset ? "-" : "+"), (int)labs((int)(i - offset)), data[i] * 100.0 / sum);
}
}
}
if (on) {
printf(" (%d)", sum);
}
}
template <unsigned N>
void printCorrectionStatistics(const char *txt, unsigned (&data)[N], unsigned sum, int offset) {
printCorrectionStatistics(txt, data, N, sum, offset);
}
// ----------------------------
void PreflateStatisticsCounter::BlockPrediction::print() {
unsigned sum = sumArray(blockType);
if (blockType[0]) {
printf(" ->STORE %g%%", blockType[0] * 100.0 / sum);
}
if (blockType[1] && blockType[1] != sum) {
printf(" ->DYNHUF %g%%", blockType[1] * 100.0 / sum);
}
if (blockType[2]) {
printf(" ->STATHUF %g%%", blockType[2] * 100.0 / sum);
}
printFlagStatistics(", EOB MP", EOBMisprediction);
printFlagStatistics(", PAD!=0", nonZeroPadding);
}
void PreflateStatisticsCounter::TreeCodePrediction::print() {
printFlagStatistics(", !CT SZ MP", TCCountMisprediction);
printFlagStatistics(", !L SZ MP", LCountMisprediction);
printFlagStatistics(", !D SZ MP", DCountMisprediction);
printFlagStatistics(", !T B MP", LDTypeMisprediction[0]);
printFlagStatistics(", !T R MP", LDTypeMisprediction[1]);
printFlagStatistics(", !T 0s MP", LDTypeMisprediction[2]);
printFlagStatistics(", !T 0l MP", LDTypeMisprediction[3]);
unsigned sum = sumArray(LDTypeReplacement);
if (LDTypeReplacement[0]) {
printf(" ->T B %g", LDTypeReplacement[0] * 100.0 / sum);
}
if (LDTypeReplacement[1]) {
printf(" ->T R %g", LDTypeReplacement[1] * 100.0 / sum);
}
if (LDTypeReplacement[2]) {
printf(" ->T 0s %g", LDTypeReplacement[2] * 100.0 / sum);
}
if (LDTypeReplacement[3]) {
printf(" ->T 0l %g", LDTypeReplacement[3] * 100.0 / sum);
}
sum = sumArray(TCBitlengthCorrection);
printCorrectionStatistics(", C BL", TCBitlengthCorrection, sum, 3);
sum = sumArray(LDRepeatCountCorrection);
printCorrectionStatistics(" LD RP", LDRepeatCountCorrection, sum, 1);
sum = sumArray(LDBitlengthCorrection);
printCorrectionStatistics(", LD BL", LDBitlengthCorrection, sum, 4);
}
void PreflateStatisticsCounter::TokenPrediction::print() {
printFlagStatistics(", !LIT MP", LITMisprediction);
printFlagStatistics(", !REF MP", REFMisprediction);
unsigned sum = sumArray(LENCorrection);
printCorrectionStatistics(" L", LENCorrection, sum, 6);
sum = sumArray(DISTAfterLenCorrection);
printCorrectionStatistics(" L->D+", DISTAfterLenCorrection, sum, 0);
sum = sumArray(DISTOnlyCorrection);
printCorrectionStatistics(" ->D+", DISTOnlyCorrection, sum, 0);
printFlagStatistics(", !L258 MP", LEN258IrregularEncoding);
}
void PreflateStatisticsCounter::print() {
block.print();
treecode.print();
token.print();
printf("\n");
}

View File

@@ -0,0 +1,53 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include "preflate_statistical_model.h"
#include "preflate_token.h"
#include "support/array_helper.h"
#include <stdio.h>
unsigned PreflateStatisticsCounter::BlockPrediction::checkDefaultModels() const {
unsigned cnt = 0;
cnt += sumArray(blockType) == blockType[PreflateTokenBlock::DYNAMIC_HUFF];
cnt += sumArray(EOBMisprediction) == EOBMisprediction[0];
cnt += sumArray(nonZeroPadding) == nonZeroPadding[0];
return cnt;
}
unsigned PreflateStatisticsCounter::TreeCodePrediction::checkDefaultModels() const {
unsigned cnt = 0;
cnt += sumArray(TCCountMisprediction) == TCCountMisprediction[0];
cnt += sumArray(TCBitlengthCorrection) == TCBitlengthCorrection[3];
cnt += sumArray(LCountMisprediction) == LCountMisprediction[0];
cnt += sumArray(DCountMisprediction) == DCountMisprediction[0];
for (unsigned i = 0; i < 4; ++i) {
cnt += sumArray(LDTypeMisprediction[i]) == LDTypeMisprediction[i][0];
}
cnt += sumArray(LDTypeReplacement) == 0;
cnt += sumArray(LDRepeatCountCorrection) == LDRepeatCountCorrection[1];
cnt += sumArray(LDBitlengthCorrection) == LDBitlengthCorrection[4];
return cnt;
}
unsigned PreflateStatisticsCounter::TokenPrediction::checkDefaultModels() const {
unsigned cnt = 0;
cnt += sumArray(LITMisprediction) == LITMisprediction[0];
cnt += sumArray(REFMisprediction) == REFMisprediction[0];
cnt += sumArray(LENCorrection) == LENCorrection[6];
cnt += sumArray(DISTAfterLenCorrection) == DISTAfterLenCorrection[0];
cnt += sumArray(DISTOnlyCorrection) == DISTOnlyCorrection[0];
cnt += sumArray(LEN258IrregularEncoding) == LEN258IrregularEncoding[0];
return cnt;
}

View File

@@ -0,0 +1,143 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PREFLATE_STATISTICS_COUNTER_H
#define PREFLATE_STATISTICS_COUNTER_H
#include <algorithm>
struct PreflateStatisticsCounter {
struct BlockPrediction {
public:
void incBlockType(const unsigned bt) {
blockType[bt]++;
}
void incEOBPredictionWrong(const bool mispredicted) {
EOBMisprediction[mispredicted]++;
}
void incNonZeroPadding(const bool nonzeropadding) {
nonZeroPadding[nonzeropadding]++;
}
static unsigned totalModels() {
return 3;
}
unsigned checkDefaultModels() const;
void print();
private:
unsigned blockType[3]; // stored, dynamic huff, static huff
unsigned EOBMisprediction[2]; // no, yes
unsigned nonZeroPadding[2]; // no, yes
friend struct PreflateBlockPredictionModel;
};
struct TreeCodePrediction {
public:
void incTreeCodeCountPredictionWrong(const bool mispredicted) {
TCCountMisprediction[mispredicted]++;
}
void incTreeCodeLengthDiffToPrediction(const int len_diff) {
TCBitlengthCorrection[max(min(len_diff, 3), -3) + 3]++;
}
void incLiteralCountPredictionWrong(const bool mispredicted) {
LCountMisprediction[mispredicted]++;
}
void incDistanceCountPredictionWrong(const bool mispredicted) {
DCountMisprediction[mispredicted]++;
}
void incLDCodeTypePredictionWrong(const unsigned codetype, const bool mispredicted) {
LDTypeMisprediction[codetype][mispredicted]++;
}
void incLDCodeTypeReplacement(const unsigned replacement_codetype) {
LDTypeReplacement[replacement_codetype]++;
}
void incLDCodeRepeatDiffToPrediction(const int len_diff) {
LDRepeatCountCorrection[max(min(len_diff, 1), -1) + 1]++;
}
void incLDCodeLengthDiffToPrediction(const int len_diff) {
LDBitlengthCorrection[max(min(len_diff, 4), -4) + 4]++;
}
static unsigned totalModels() {
return 11;
}
unsigned checkDefaultModels() const;
void print();
private:
unsigned TCCountMisprediction[2]; // no, yes
unsigned TCBitlengthCorrection[7]; // -x, -2, -1, 0, +1, +2, +x
unsigned LCountMisprediction[2]; // no, yes
unsigned DCountMisprediction[2]; // no, yes
unsigned LDTypeMisprediction[4][2]; // types: BL,REP,REPZS,REPZL; no, yes
unsigned LDTypeReplacement[4]; // replacement type: BL,REP,REPZS,REPZL
unsigned LDRepeatCountCorrection[3]; // -x, 0, +x
unsigned LDBitlengthCorrection[9]; // -x, -3, -2, -1, 0, +1, +2, +3, +x
friend struct PreflateTreeCodePredictionModel;
};
struct TokenPrediction {
public:
void incLiteralPredictionWrong(const bool mispredicted) {
LITMisprediction[mispredicted]++;
}
void incReferencePredictionWrong(const bool mispredicted) {
REFMisprediction[mispredicted]++;
}
void incLengthDiffToPrediction(const int len_diff) {
LENCorrection[max(min(len_diff, 6), -6) + 6]++;
}
void incIrregularLength258Encoding(const bool irregular) {
LEN258IrregularEncoding[irregular]++;
}
void incDistanceDiffToPredictionAfterIncorrectLengthPrediction(const int len_diff) {
DISTAfterLenCorrection[min(len_diff, 3)]++;
}
void incDistanceDiffToPredictionAfterCorrectLengthPrediction(const int len_diff) {
DISTOnlyCorrection[min(len_diff, 3)]++;
}
static unsigned totalModels() {
return 6;
}
unsigned checkDefaultModels() const;
void print();
private:
unsigned LITMisprediction[2]; // no, yes
unsigned REFMisprediction[2]; // no, yes
unsigned LENCorrection[13]; // -x, -5, -4, -3, -2, -1, 0, +1, +2, +3, +4, +5, +x (bytes)
unsigned LEN258IrregularEncoding[2]; // no, yes
unsigned DISTAfterLenCorrection[4]; // +0, +1, +2, +x (hops)
unsigned DISTOnlyCorrection[4]; // +0, +1, +2, +x (hops)
friend struct PreflateTokenPredictionModel;
};
public:
PreflateStatisticsCounter() {}
BlockPrediction block;
TreeCodePrediction treecode;
TokenPrediction token;
void print();
};
#endif /* PREFLATE_STATISTICS_COUNTER_H */

View File

@@ -0,0 +1,44 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include "preflate_token.h"
bool isEqual(const PreflateTokenBlock& b1, const PreflateTokenBlock& b2) {
if (b1.type != b2.type) {
return false;
}
// if (b1.uncompressedLen != b2.uncompressedLen) {
// return false;
// }
if (b1.type != PreflateTokenBlock::STORED) {
if (b1.type == PreflateTokenBlock::DYNAMIC_HUFF) {
if (b1.ncode != b2.ncode || b1.nlen != b2.nlen || b1.ndist != b2.ndist) {
return false;
}
if (b1.treecodes != b2.treecodes) {
return false;
}
}
if (b1.tokens.size() != b2.tokens.size()) {
return false;
}
for (unsigned i = 0, n = b1.tokens.size(); i < n; ++i) {
if (b1.tokens[i].len != b2.tokens[i].len || b1.tokens[i].dist != b2.tokens[i].dist) {
return false;
}
}
}
return true;
}

View File

@@ -0,0 +1,87 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PREFLATE_TOKEN_H
#define PREFLATE_TOKEN_H
#include <stdint.h>
#include <vector>
/* len: 1 for literal, >= 3 for reference */
struct PreflateToken {
enum typeLit {
LITERAL
};
enum typeRef {
REFERENCE
};
enum typeNon {
NONE
};
unsigned short len : 9;
unsigned short irregular258 : 1;
unsigned short dist;
PreflateToken(typeNon n) : len(0), irregular258(0), dist(0) {}
PreflateToken(typeLit l) : len(1), irregular258(0), dist(0) {}
PreflateToken(typeRef r, unsigned short l, unsigned short d, bool irregular258_ = false)
: len(l), irregular258(irregular258_), dist(d) {}
};
struct PreflateTokenBlock {
enum Type {
STORED, DYNAMIC_HUFF, STATIC_HUFF
};
enum StoredBlockType {
STORED_X
};
enum HuffBlockType {
DYNAMIC_HUFF_X, STATIC_HUFF_X
};
Type type;
uint64_t uncompressedStartPos;
uint64_t uncompressedLen;
int32_t contextLen; // prefix size required to handle all references
unsigned short nlen, ndist, ncode;
uint8_t paddingBitCount, paddingBits;
std::vector<unsigned char> treecodes;
std::vector<PreflateToken> tokens;
PreflateTokenBlock()
: type(STORED)
, uncompressedLen(0) {}
PreflateTokenBlock(StoredBlockType, int len_)
: type(STORED)
, uncompressedLen(len_) {}
PreflateTokenBlock(HuffBlockType t)
: type(t == DYNAMIC_HUFF_X ? DYNAMIC_HUFF : STATIC_HUFF)
, uncompressedLen(0) {}
void setHuffLengths(int nlen_, int ndist_, int ncode_) {
nlen = nlen_;
ndist = ndist_;
ncode = ncode_;
}
void addTreeCode(int code) {
treecodes.push_back(code);
}
void addToken(const PreflateToken& token) {
tokens.push_back(token);
}
};
bool isEqual(const PreflateTokenBlock&, const PreflateTokenBlock&);
#endif /* PREFLATE_TOKEN_H */

View File

@@ -0,0 +1,510 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <algorithm>
#include "preflate_constants.h"
#include "preflate_statistical_model.h"
#include "preflate_token_predictor.h"
#include "support/bit_helper.h"
PreflateTokenPredictor::PreflateTokenPredictor(
const PreflateParameters& params_,
const std::vector<unsigned char>& dump,
const size_t offset)
: state(hash, seq, params_.config(), params_.windowBits, params_.memLevel)
, hash(dump, params_.memLevel)
, seq(dump)
, params(params_)
, predictionFailure(false)
, fast(params_.isFastCompressor())
, prevLen(0)
, pendingToken(PreflateToken::NONE)
, emptyBlockAtEnd(false) {
if (state.availableInputSize() >= 2) {
hash.updateRunningHash(state.inputCursor()[0]);
hash.updateRunningHash(state.inputCursor()[1]);
seq.updateSeq(2);
}
hash.updateHash(offset);
seq.updateSeq(offset);
}
bool PreflateTokenPredictor::predictEOB() {
return state.availableInputSize() == 0 || currentTokenCount == state.maxTokenCount;
}
void PreflateTokenPredictor::commitToken(const PreflateToken& t) {
if (fast && t.len > state.lazyMatchLength()) {
hash.skipHash(t.len);
} else {
hash.updateHash(t.len);
}
seq.updateSeq(t.len);
}
# define TOO_FAR 4096
/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
PreflateToken PreflateTokenPredictor::predictToken() {
if (state.currentInputPos() == 0 || state.availableInputSize() < PreflateConstants::MIN_MATCH) {
return PreflateToken(PreflateToken::LITERAL);
}
PreflateToken match(PreflateToken::NONE);
unsigned hash = state.calculateHash();
if (pendingToken.len > 1) {
match = pendingToken;
} else {
unsigned head = state.getCurrentHashHead(hash);
if (!fast && seq.valid(state.currentInputPos())) {
match = state.seqMatch(state.currentInputPos(), head, prevLen,
params.veryFarMatchesDetected,
params.matchesToStartDetected,
params.zlibCompatible ? 0 : (1 << params.log2OfMaxChainDepthM1));
} else {
match = state.match(head, prevLen, 0,
params.veryFarMatchesDetected,
params.matchesToStartDetected,
params.zlibCompatible ? 0 : (1 << params.log2OfMaxChainDepthM1));
}
}
prevLen = 0;
pendingToken = PreflateToken(PreflateToken::NONE);
if (match.len < PreflateConstants::MIN_MATCH) {
return PreflateToken(PreflateToken::LITERAL);
}
if (fast) {
return match;
}
if (match.len == 3 && match.dist > TOO_FAR) {
return PreflateToken(PreflateToken::LITERAL);
}
if (match.len < state.lazyMatchLength() && state.availableInputSize() >= (unsigned)match.len + 2) {
PreflateToken matchNext(PreflateToken::NONE);
unsigned hashNext = state.calculateHashNext();
unsigned headNext = state.getCurrentHashHead(hashNext);
if (!fast && seq.valid(state.currentInputPos() + 1)) {
matchNext = state.seqMatch(state.currentInputPos() + 1, headNext, match.len,
params.veryFarMatchesDetected,
params.matchesToStartDetected,
params.zlibCompatible ? 0 : (2 << params.log2OfMaxChainDepthM1));
} else {
matchNext = state.match(headNext, match.len, 1,
params.veryFarMatchesDetected,
params.matchesToStartDetected,
params.zlibCompatible ? 0 : (2 << params.log2OfMaxChainDepthM1));
if (((hashNext ^ hash) & this->hash.hashMask) == 0) {
unsigned maxSize = min(state.availableInputSize() - 1, (unsigned)PreflateConstants::MAX_MATCH);
unsigned rle = 0;
const unsigned char *c = state.inputCursor();
unsigned char b = c[0];
while (rle < maxSize && c[1 + rle] == b) {
++rle;
}
if (rle > match.len && rle >= matchNext.len) {
matchNext.len = rle;
matchNext.dist = 1;
}
}
}
if (matchNext.len > match.len) {
prevLen = match.len;
pendingToken = matchNext;
if (!params.zlibCompatible) {
prevLen = 0;
pendingToken = PreflateToken(PreflateToken::NONE);
}
return PreflateToken(PreflateToken::LITERAL);
}
}
return match;
}
bool PreflateTokenPredictor::repredictReference(PreflateToken& token) {
if (state.currentInputPos() == 0 || state.availableInputSize() < PreflateConstants::MIN_MATCH) {
return false;
}
unsigned hash = state.calculateHash();
unsigned head = state.getCurrentHashHead(hash);
PreflateToken match = state.match(head, /*prevLen*/0, 0,
params.veryFarMatchesDetected,
params.matchesToStartDetected,
(2 << params.log2OfMaxChainDepthM1));
prevLen = 0;
pendingToken = PreflateToken(PreflateToken::NONE);
if (match.len < PreflateConstants::MIN_MATCH) {
return false;
}
token = match;
return true;
}
PreflateRematchInfo PreflateTokenPredictor::repredictMatch(const PreflateToken& token) {
unsigned hash = state.calculateHash();
unsigned head = state.getCurrentHashHead(hash);
PreflateRematchInfo i = state.rematchInfo(head, token);
return i;
}
unsigned PreflateTokenPredictor::recalculateDistance(const PreflateToken& token, const unsigned hops) {
return state.hopMatch(token, hops);
}
void PreflateTokenPredictor::analyzeBlock(
const unsigned blockno,
const PreflateTokenBlock& block) {
currentTokenCount = 0;
prevLen = 0;
pendingToken = PreflateToken(PreflateToken::NONE);
if (blockno != analysisResults.size() || predictionFailure) {
return;
}
analysisResults.push_back(BlockAnalysisResult());
BlockAnalysisResult& analysis = analysisResults[blockno];
analysis.type = block.type;
analysis.tokenCount = block.tokens.size();
analysis.tokenInfo.resize(analysis.tokenCount);
analysis.blockSizePredicted = true;
analysis.inputEOF = false;
if (analysis.type == PreflateTokenBlock::STORED) {
analysis.tokenCount = block.uncompressedLen;
hash.updateHash(block.uncompressedLen);
seq.updateSeq(block.uncompressedLen);
analysis.inputEOF = state.availableInputSize() == 0;
analysis.paddingBits = block.paddingBits;
analysis.paddingCounts = block.paddingBitCount;
return;
}
for (unsigned i = 0, n = block.tokens.size(); i < n; ++i) {
PreflateToken targetToken = block.tokens[i];
//if (blockno == 0 && i == 0x6dd) {
// puts("hi");
//}
if (predictEOB()) {
analysis.blockSizePredicted = false;
}
PreflateToken predictedToken = predictToken();
#ifdef _DEBUG
// printf("B%dT%d: TGT(%d,%d) -> PRD(%d,%d)\n", blockno, i, targetToken.len, targetToken.dist, predictedToken.len, predictedToken.dist);
#endif
if (targetToken.len == 1) {
if (predictedToken.len > 1) {
analysis.tokenInfo[currentTokenCount] = 2; // badly predicted LIT
} else {
analysis.tokenInfo[currentTokenCount] = 0; // perfectly predicted LIT
}
} else {
if (predictedToken.len == 1) {
analysis.tokenInfo[currentTokenCount] = 3; // badly predicted REF
if (!repredictReference(predictedToken)) {
predictionFailure = true;
return;
}
} else {
analysis.tokenInfo[currentTokenCount] = 1; // well predicted REF
}
PreflateRematchInfo rematch;
if (predictedToken.len != targetToken.len) {
analysis.tokenInfo[currentTokenCount] += 4; // bad LEN prediction, adds two corrective actions
analysis.correctives.push_back(predictedToken.len);
analysis.correctives.push_back(targetToken.len - predictedToken.len);
rematch = repredictMatch(targetToken);
if (rematch.requestedMatchDepth >= 0xffff) {
predictionFailure = true;
return;
}
analysis.correctives.push_back(rematch.condensedHops - 1);
} else {
if (targetToken.dist != predictedToken.dist) {
analysis.tokenInfo[currentTokenCount] += 8; // bad DIST ONLY prediction, adds one corrective action
rematch = repredictMatch(targetToken);
if (rematch.requestedMatchDepth >= 0xffff) {
predictionFailure = true;
return;
}
analysis.correctives.push_back(rematch.condensedHops - 1);
}
}
}
if (targetToken.len == 258) {
analysis.tokenInfo[currentTokenCount] += 16;
if (targetToken.irregular258) {
analysis.tokenInfo[currentTokenCount] += 32;
}
}
commitToken(targetToken);
++currentTokenCount;
}
if (!predictEOB()) {
analysis.blockSizePredicted = false;
}
analysis.inputEOF = state.availableInputSize() == 0;
}
void PreflateTokenPredictor::encodeBlock(
PreflatePredictionEncoder* codec,
const unsigned blockno) {
BlockAnalysisResult& analysis = analysisResults[blockno];
codec->encodeBlockType(analysis.type);
if (analysis.type == PreflateTokenBlock::STORED) {
codec->encodeValue(analysis.tokenCount, 16);
bool pad = analysis.paddingBits != 0;
codec->encodeNonZeroPadding(pad);
if (pad) {
unsigned bitsToSave = bitLength(analysis.paddingBits);
codec->encodeValue(bitsToSave, 3);
if (bitsToSave > 1) {
codec->encodeValue(analysis.paddingBits & ((1 << (bitsToSave - 1)) - 1), bitsToSave - 1);
}
}
return;
}
codec->encodeEOBMisprediction(!analysis.blockSizePredicted);
if (!analysis.blockSizePredicted) {
unsigned blocksizeBits = bitLength(analysis.tokenCount);
codec->encodeValue(blocksizeBits, 5);
if (blocksizeBits >= 2) {
codec->encodeValue(analysis.tokenCount, blocksizeBits);
}
}
unsigned correctivePos = 0;
for (unsigned i = 0, n = analysis.tokenCount; i < n; ++i) {
unsigned char info = analysis.tokenInfo[i];
switch (info & 3) {
case 0: // well predicted LIT
codec->encodeLiteralPredictionWrong(false);
continue;
case 2: // badly predicted LIT
codec->encodeReferencePredictionWrong(true);
continue;
case 1: // well predicted REF
codec->encodeReferencePredictionWrong(false);
break;
case 3: // badly predicted REF
codec->encodeLiteralPredictionWrong(true);
break;
}
if (info & 4) {
int pred = analysis.correctives[correctivePos++];
int diff = analysis.correctives[correctivePos++];
int hops = analysis.correctives[correctivePos++];
codec->encodeLenCorrection(pred, pred + diff);
codec->encodeDistAfterLenCorrection(hops);
} else {
codec->encodeLenCorrection(3, 3);
if (info & 8) {
int hops = analysis.correctives[correctivePos++];
codec->encodeDistOnlyCorrection(hops);
} else {
codec->encodeDistOnlyCorrection(0);
}
}
if (info & 16) {
codec->encodeIrregularLen258((info & 32) != 0);
}
}
}
void PreflateTokenPredictor::encodeEOF(
PreflatePredictionEncoder* codec,
const unsigned blockno,
const bool lastBlock) {
BlockAnalysisResult& analysis = analysisResults[blockno];
if (analysis.inputEOF) {
codec->encodeValue(!lastBlock, 1);
} else {
// If we still have input left, this shouldn't be the last block
if (lastBlock) {
predictionFailure = true;
}
}
}
void PreflateTokenPredictor::updateCounters(
PreflateStatisticsCounter* model,
const unsigned blockno) {
BlockAnalysisResult& analysis = analysisResults[blockno];
model->block.incBlockType(analysis.type);
if (analysis.type == PreflateTokenBlock::STORED) {
model->block.incNonZeroPadding(analysis.paddingBits != 0);
return;
}
model->block.incEOBPredictionWrong(!analysis.blockSizePredicted);
unsigned correctivePos = 0;
for (unsigned i = 0, n = analysis.tokenCount; i < n; ++i) {
unsigned char info = analysis.tokenInfo[i];
switch (info & 3) {
case 0: // well predicted LIT
model->token.incLiteralPredictionWrong(false);
continue;
case 2: // badly predicted LIT
model->token.incReferencePredictionWrong(true);
continue;
case 1: // well predicted REF
model->token.incReferencePredictionWrong(false);
break;
case 3: // badly predicted REF
model->token.incLiteralPredictionWrong(true);
break;
}
if (info & 4) {
/*int pred = analysis.correctives[*/correctivePos++/*]*/;
int diff = analysis.correctives[correctivePos++];
int hops = analysis.correctives[correctivePos++];
model->token.incLengthDiffToPrediction(diff);
model->token.incDistanceDiffToPredictionAfterIncorrectLengthPrediction(hops);
} else {
model->token.incLengthDiffToPrediction(0);
if (info & 8) {
int hops = analysis.correctives[correctivePos++];
model->token.incDistanceDiffToPredictionAfterCorrectLengthPrediction(hops);
} else {
model->token.incDistanceDiffToPredictionAfterCorrectLengthPrediction(0);
}
}
if (info & 16) {
model->token.incIrregularLength258Encoding((info & 32) != 0);
}
}
}
PreflateTokenBlock PreflateTokenPredictor::decodeBlock(
PreflatePredictionDecoder* codec) {
PreflateTokenBlock block;
currentTokenCount = 0;
prevLen = 0;
pendingToken = PreflateToken(PreflateToken::NONE);
unsigned blocksize = 0;
bool checkEOB = true;
unsigned bt = codec->decodeBlockType();
switch (bt) {
case PreflateTokenBlock::STORED:
block.type = PreflateTokenBlock::STORED;
block.uncompressedLen = codec->decodeValue(16);
block.paddingBits = 0;
block.paddingBitCount = 0;
if (codec->decodeNonZeroPadding()) {
block.paddingBitCount = codec->decodeValue(3);
if (block.paddingBitCount > 0) {
block.paddingBits = (1 << (block.paddingBitCount - 1)) + codec->decodeValue(block.paddingBitCount - 1);
} else {
block.paddingBits = 0;
}
}
hash.updateHash(block.uncompressedLen);
seq.updateSeq(block.uncompressedLen);
return block;
case PreflateTokenBlock::STATIC_HUFF:
block.type = PreflateTokenBlock::STATIC_HUFF;
break;
case PreflateTokenBlock::DYNAMIC_HUFF:
block.type = PreflateTokenBlock::DYNAMIC_HUFF;
break;
}
if (codec->decodeEOBMisprediction()) {
unsigned blocksizeBits = codec->decodeValue(5);
if (blocksizeBits >= 2) {
blocksize = codec->decodeValue(blocksizeBits);
} else {
blocksize = blocksizeBits;
}
block.tokens.reserve(blocksize);
checkEOB = false;
} else {
block.tokens.reserve(1 << (6 + params.memLevel));
}
while ((checkEOB && !predictEOB())
|| (!checkEOB && currentTokenCount < blocksize)) {
PreflateToken predictedToken = predictToken();
// printf("P(%d,%d)\n", predictedToken.len, predictedToken.dist);
if (predictedToken.len == 1) {
unsigned notok = codec->decodeLiteralPredictionWrong();
if (!notok) {
block.tokens.push_back(predictedToken);
commitToken(predictedToken);
++currentTokenCount;
continue;
}
if (!repredictReference(predictedToken)) {
predictionFailure = true;
return PreflateTokenBlock();
}
} else {
unsigned notok = codec->decodeReferencePredictionWrong();
if (notok) {
predictedToken.len = 1;
predictedToken.dist = 0;
block.tokens.push_back(predictedToken);
commitToken(predictedToken);
++currentTokenCount;
continue;
}
}
unsigned newLen = codec->decodeLenCorrection(predictedToken.len);
if (newLen != predictedToken.len) {
unsigned hops = codec->decodeDistAfterLenCorrection();
predictedToken.len = newLen;
predictedToken.dist = state.firstMatch(predictedToken.len);
if (hops) {
predictedToken.dist = recalculateDistance(predictedToken, hops);
}
if (predictedToken.len < 3 || predictedToken.len > 258
|| predictedToken.dist == 0) {
predictionFailure = true;
return PreflateTokenBlock();
}
} else {
unsigned hops = codec->decodeDistOnlyCorrection();
if (hops) {
predictedToken.dist = recalculateDistance(predictedToken, hops);
if (predictedToken.dist == 0) {
predictionFailure = true;
return PreflateTokenBlock();
}
}
}
if (predictedToken.len == 258) {
predictedToken.irregular258 = codec->decodeIrregularLen258();
}
block.tokens.push_back(predictedToken);
commitToken(predictedToken);
++currentTokenCount;
}
return block;
}
bool PreflateTokenPredictor::decodeEOF(PreflatePredictionDecoder* codec) {
if (state.availableInputSize() == 0) {
return codec->decodeValue(1) == 0;
}
return false;
}
bool PreflateTokenPredictor::inputEOF() {
return state.availableInputSize() == 0;
}

View File

@@ -0,0 +1,76 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PREFLATE_TOKEN_PREDICTOR_H
#define PREFLATE_TOKEN_PREDICTOR_H
#include <vector>
#include "preflate_parameter_estimator.h"
#include "preflate_predictor_state.h"
#include "preflate_statistical_codec.h"
struct PreflateStatisticalModel;
struct PreflateStatisticalCodec;
struct PreflateTokenPredictor {
PreflatePredictorState state;
PreflateHashChainExt hash;
PreflateSeqChain seq;
PreflateParameters params;
bool predictionFailure;
bool fast;
unsigned prevLen;
PreflateToken pendingToken;
unsigned currentTokenCount;
bool emptyBlockAtEnd;
struct BlockAnalysisResult {
PreflateTokenBlock::Type type;
unsigned tokenCount;
bool blockSizePredicted;
bool inputEOF;
bool lastBlock;
uint8_t paddingBits, paddingCounts;
std::vector<unsigned char> tokenInfo;
std::vector<signed> correctives;
};
std::vector<BlockAnalysisResult> analysisResults;
PreflateTokenPredictor(const PreflateParameters& params,
const std::vector<unsigned char>& uncompressed,
const size_t offset);
void analyzeBlock(const unsigned blockno,
const PreflateTokenBlock& block);
void updateCounters(PreflateStatisticsCounter*,
const unsigned blockno);
void encodeBlock(PreflatePredictionEncoder*,
const unsigned blockno);
void encodeEOF(PreflatePredictionEncoder*,
const unsigned blockno,
const bool lastBlock);
PreflateTokenBlock decodeBlock(PreflatePredictionDecoder*);
bool decodeEOF(PreflatePredictionDecoder*);
bool inputEOF();
bool predictEOB();
PreflateToken predictToken();
bool repredictReference(PreflateToken& token);
PreflateRematchInfo repredictMatch(const PreflateToken&);
unsigned recalculateDistance(const PreflateToken&, const unsigned hops);
void commitToken(const PreflateToken&);
};
#endif /* PREFLATE_TOKEN_PREDICTOR_H */

View File

@@ -0,0 +1,647 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <algorithm>
#include <string.h>
#include "preflate_constants.h"
#include "preflate_statistical_codec.h"
#include "preflate_statistical_model.h"
#include "preflate_tree_predictor.h"
PreflateTreePredictor::PreflateTreePredictor(
const std::vector<unsigned char>& dump,
const size_t off)
: input(dump)
, predictionFailure(false) {
input.advance(off);
}
struct FreqIdxPair {
unsigned freq;
unsigned idx;
};
struct TreeNode {
unsigned parent;
unsigned idx;
};
/* ===========================================================================
* Compares to subtrees, using the tree depth as tie breaker when
* the subtrees have equal frequency. This minimizes the worst case length.
*/
bool pq_smaller(const FreqIdxPair& p1, const FreqIdxPair& p2, const unsigned char* nodeDepth) {
return p1.freq < p2.freq || (p1.freq == p2.freq && nodeDepth[p1.idx] <= nodeDepth[p2.idx]);
}
/* ===========================================================================
* Restore the heap property by moving down the tree starting at node k,
* exchanging a node with the smallest of its two sons if necessary, stopping
* when the heap property is re-established (each father smaller than its
* two sons).
*/
void pq_downheap(FreqIdxPair* ptr, const unsigned index, const unsigned len, const unsigned char* depth) {
unsigned k = index;
FreqIdxPair v = ptr[k];
unsigned j = k * 2 + 1; /* left son of k */
while (j < len) {
/* Set j to the smallest of the two sons: */
if (j + 1 < len && pq_smaller(ptr[j + 1], ptr[j], depth)) {
j++;
}
/* Exit if v is smaller than both sons */
if (pq_smaller(v, ptr[j], depth)) break;
/* Exchange v with the smallest son */
ptr[k] = ptr[j];
k = j;
/* And continue down the tree, setting j to the left son of k */
j = k * 2 + 1;
}
ptr[k] = v;
}
void pq_makeheap(FreqIdxPair* ptr, const unsigned len, const unsigned char* depth) {
for (unsigned n = (len - 1) / 2 + 1; n > 0; n--) {
pq_downheap(ptr, n - 1, len, depth);
}
}
FreqIdxPair pq_remove(FreqIdxPair* ptr, unsigned& len, const unsigned char* depth) {
FreqIdxPair result = ptr[0];
ptr[0] = ptr[--len];
pq_downheap(ptr, 0, len, depth);
return result;
}
unsigned PreflateTreePredictor::calcBitLengths(
unsigned char* symBitLen,
const unsigned* symFreq,
const unsigned symCount,
const unsigned maxBits,
const unsigned minMaxCode) {
FreqIdxPair toSort[PreflateConstants::LITLEN_CODE_COUNT];
TreeNode nodes[PreflateConstants::LITLEN_CODE_COUNT * 2 + 1];
unsigned char nodeBitLen[PreflateConstants::LITLEN_CODE_COUNT * 2 + 1];
unsigned char nodeDepth[PreflateConstants::LITLEN_CODE_COUNT * 2 + 1];
memset(nodeBitLen, 0, sizeof(nodeBitLen));
memset(nodeDepth, 0, sizeof(nodeDepth));
unsigned maxCode = 0, len = 0, nodeCount = 0, nodeId = symCount;
for (unsigned i = 0; i < symCount; ++i) {
if (symFreq[i]) {
toSort[len++] = FreqIdxPair {symFreq[i], maxCode = i};
}
}
if (len < 2) {
memset(symBitLen, 0, symCount);
symBitLen[maxCode] = 1;
symBitLen[maxCode < 2 ? ++maxCode : 0] = 1;
return max(minMaxCode, maxCode + 1);
}
pq_makeheap(toSort, len, nodeDepth);
while (len > 1) {
FreqIdxPair least1 = pq_remove(toSort, len, nodeDepth);
FreqIdxPair least2 = toSort[0];
toSort[0] = FreqIdxPair {least1.freq + least2.freq, nodeId};
nodes[nodeCount++] = TreeNode {nodeId, least1.idx};
nodes[nodeCount++] = TreeNode {nodeId, least2.idx};
nodeDepth[nodeId] = max(nodeDepth[least1.idx], nodeDepth[least2.idx]) + 1;
// note? original code put new entry at top of heap, and moved it downwards
// while push_heap pushes it upwards
pq_downheap(toSort, 0, len, nodeDepth);
nodeId++;
}
unsigned overflow = 0;
unsigned bl_count[16];
memset(bl_count, 0, sizeof(bl_count));
unsigned orgNodeCount = nodeCount;
while (nodeCount-- > 0) {
unsigned char newLen = nodeBitLen[nodes[nodeCount].parent] + 1;
if (newLen > maxBits) {
newLen = maxBits;
++overflow;
}
unsigned idx = nodes[nodeCount].idx;
nodeBitLen[idx] = newLen;
if (idx < symCount) {
bl_count[newLen]++;
}
}
if (overflow) {
unsigned bits;
do {
for (bits = maxBits - 1; bl_count[bits] == 0; bits--) {
}
bl_count[bits]--; /* move one leaf down the tree */
bl_count[bits + 1] += 2; /* move one overflow item as its brother */
bl_count[maxBits]--;
/* The brother of the overflow item also moves one step up,
* but this does not affect bl_count[max_length]
*/
overflow -= 2;
} while (overflow > 0);
for (bits = maxBits, nodeCount = orgNodeCount; nodeCount > 0; ) {
--nodeCount;
unsigned idx = nodes[nodeCount].idx;
if (idx >= symCount) {
continue;
}
while (bl_count[bits] == 0) {
bits--;
}
nodeBitLen[idx] = bits;
bl_count[bits]--;
}
}
memcpy(symBitLen, nodeBitLen, symCount);
return max(minMaxCode, maxCode + 1);
}
TreeCodeType PreflateTreePredictor::predictCodeType(const unsigned char* symBitLen,
const unsigned symCount,
const bool first) {
unsigned char code = symBitLen[0];
if (code == 0) {
unsigned char curlen = 1;
unsigned char maxCurLen = min(symCount, 11u);
while (curlen < maxCurLen && symBitLen[curlen] == 0) {
++curlen;
}
if (curlen >= 11) {
return TCT_REPZL;
}
if (curlen >= 3) {
return TCT_REPZS;
}
return TCT_BITS;
}
if (!first && code == symBitLen[-1]) {
unsigned char curlen = 1;
unsigned char maxCurLen = min(symCount, 3u);
while (curlen < maxCurLen && symBitLen[curlen] == code) {
++curlen;
}
if (curlen >= 3) {
return TCT_REP;
}
}
return TCT_BITS;
}
unsigned char PreflateTreePredictor::predictCodeData(const unsigned char* symBitLen,
const TreeCodeType type,
const unsigned symCount,
const bool first) {
unsigned char code = symBitLen[0];
switch (type) {
default:
case TCT_BITS:
return code;
case TCT_REP:
{
unsigned char curlen = 3;
unsigned char maxCurLen = min(symCount, 6u);
while (curlen < maxCurLen && symBitLen[curlen] == code) {
++curlen;
}
return curlen;
}
case TCT_REPZS:
case TCT_REPZL:
{
unsigned char curlen = type == TCT_REPZS ? 3 : 11;
unsigned char maxCurLen = min(symCount, type == TCT_REPZS ? 10u : 138u);
while (curlen < maxCurLen && symBitLen[curlen] == 0) {
++curlen;
}
return curlen;
}
}
}
void PreflateTreePredictor::predictLDTrees(
BlockAnalysisResult& analysis,
unsigned* frequencies,
const unsigned char* symBitLen,
const unsigned symLCount,
const unsigned symDCount,
const unsigned char* targetCodes,
const unsigned targetCodeSize) {
memset(frequencies, 0, sizeof(unsigned) * PreflateConstants::CODETREE_CODE_COUNT);
const unsigned char* ptr = symBitLen;
const unsigned char* code = targetCodes;
unsigned codeSize = targetCodeSize;
unsigned count1 = symLCount;
unsigned count2 = symDCount;
bool first = true;
while (codeSize > 0) {
TreeCodeType targetTreeCodeType;
switch (code[0]) {
case 16: targetTreeCodeType = TCT_REP; break;
case 17: targetTreeCodeType = TCT_REPZS; break;
case 18: targetTreeCodeType = TCT_REPZL; break;
default: targetTreeCodeType = TCT_BITS; break;
}
if (codeSize < 2 && targetTreeCodeType != TCT_BITS) {
predictionFailure = true;
return;
}
TreeCodeType predictedTreeCodeType = predictCodeType(ptr, count1, first);
unsigned char info = predictedTreeCodeType | ((targetTreeCodeType != predictedTreeCodeType) << 2);
if (targetTreeCodeType != predictedTreeCodeType) {
analysis.correctives.push_back(targetTreeCodeType);
}
unsigned char targetTreeCodeData = code[targetTreeCodeType != TCT_BITS];
unsigned l = 1 + (targetTreeCodeType != TCT_BITS);
code += l;
codeSize -= l;
unsigned char predictedTreeCodeData = predictCodeData(ptr, targetTreeCodeType, count1, first);
first = false;
if (targetTreeCodeType != TCT_BITS) {
analysis.correctives.push_back(predictedTreeCodeData);
if (targetTreeCodeData != predictedTreeCodeData) {
info |= 8;
analysis.correctives.push_back(targetTreeCodeData);
}
} else {
analysis.correctives.push_back(predictedTreeCodeData);
analysis.correctives.push_back(targetTreeCodeData - predictedTreeCodeData);
}
if (targetTreeCodeType != TCT_BITS) {
frequencies[targetTreeCodeType + 15]++;
l = targetTreeCodeData;
} else {
frequencies[targetTreeCodeData]++;
l = 1;
}
ptr += l;
if (count1 > l) {
count1 -= l;
} else {
count1 += count2;
count2 = 0;
first = true;
if (count1 >= l) {
count1 -= l;
} else {
predictionFailure = true;
return;
}
}
analysis.tokenInfo.push_back(info);
}
analysis.tokenInfo.push_back(0xff);
if (count1 + count2 != 0) {
predictionFailure = true;
}
}
void PreflateTreePredictor::collectTokenStatistics(
unsigned Lcodes[],
unsigned Dcodes[],
unsigned& Lcount,
unsigned& Dcount,
const PreflateTokenBlock& block) {
memset(Lcodes, 0, sizeof(unsigned) * PreflateConstants::LITLEN_CODE_COUNT);
memset(Dcodes, 0, sizeof(unsigned) * PreflateConstants::DIST_CODE_COUNT);
Lcount = 0;
Dcount = 0;
for (unsigned i = 0, n = block.tokens.size(); i < n; ++i) {
PreflateToken targetToken = block.tokens[i];
if (targetToken.len == 1) {
Lcodes[input.curChar()]++;
Lcount++;
input.advance(1);
} else {
Lcodes[PreflateConstants::NONLEN_CODE_COUNT + PreflateConstants::LCode(targetToken.len)]++;
Lcount++;
Dcodes[PreflateConstants::DCode(targetToken.dist)]++;
Dcount++;
input.advance(targetToken.len);
}
}
Lcodes[256] = 1;
}
unsigned PreflateTreePredictor::buildLBitlenghs(
unsigned char bitLengths[],
unsigned Lcodes[]) {
return calcBitLengths(bitLengths, Lcodes, PreflateConstants::LITLEN_CODE_COUNT, 15, PreflateConstants::NONLEN_CODE_COUNT);
}
unsigned PreflateTreePredictor::buildDBitlenghs(
unsigned char bitLengths[],
unsigned Dcodes[]) {
return calcBitLengths(bitLengths, Dcodes, PreflateConstants::DIST_CODE_COUNT, 15, 0);
}
unsigned PreflateTreePredictor::buildTCBitlengths(
unsigned char (&simpleCodeTree)[PreflateConstants::CODETREE_CODE_COUNT],
unsigned (&BLfreqs)[PreflateConstants::CODETREE_CODE_COUNT]) {
memset(simpleCodeTree, 0, sizeof(simpleCodeTree));
calcBitLengths(simpleCodeTree, BLfreqs, PreflateConstants::CODETREE_CODE_COUNT, 7, 0);
unsigned predictedCTreeSize = PreflateConstants::CODETREE_CODE_COUNT;
while (predictedCTreeSize > 4
&& simpleCodeTree[PreflateConstants::treeCodeOrderTable[predictedCTreeSize - 1]] == 0) {
--predictedCTreeSize;
}
return predictedCTreeSize;
}
void PreflateTreePredictor::analyzeBlock(
const unsigned blockno,
const PreflateTokenBlock& block) {
if (blockno != analysisResults.size() || predictionFailure) {
return;
}
analysisResults.push_back(BlockAnalysisResult());
BlockAnalysisResult& analysis = analysisResults[blockno];
analysis.blockType = block.type;
if (analysis.blockType != PreflateTokenBlock::DYNAMIC_HUFF) {
return;
}
unsigned Lcodes[PreflateConstants::LITLEN_CODE_COUNT], Dcodes[PreflateConstants::DIST_CODE_COUNT];
unsigned Lcount = 0, Dcount = 0;
collectTokenStatistics(Lcodes, Dcodes, Lcount, Dcount, block);
unsigned char bitLengths[PreflateConstants::LITLENDIST_CODE_COUNT];
memset(bitLengths, 0, sizeof(bitLengths));
unsigned predictedLTreeSize = buildLBitlenghs(bitLengths, Lcodes);
analysis.tokenInfo.push_back(predictedLTreeSize != block.nlen);
if (predictedLTreeSize != block.nlen) {
analysis.correctives.push_back(block.nlen);
}
predictedLTreeSize = block.nlen;
unsigned predictedDTreeSize = buildDBitlenghs(bitLengths + predictedLTreeSize, Dcodes);
analysis.tokenInfo.push_back(predictedDTreeSize != block.ndist);
if (predictedDTreeSize != block.ndist) {
analysis.correctives.push_back(block.ndist);
}
predictedDTreeSize = block.ndist;
unsigned BLfreqs[PreflateConstants::CODETREE_CODE_COUNT];
const unsigned char* targetCodes = &block.treecodes[0];
unsigned targetCodeSize = block.treecodes.size();
predictLDTrees(analysis, BLfreqs, bitLengths, predictedLTreeSize, predictedDTreeSize, targetCodes + block.ncode, targetCodeSize - block.ncode);
unsigned char simpleCodeTree[PreflateConstants::CODETREE_CODE_COUNT];
unsigned predictedCTreeSize = buildTCBitlengths(simpleCodeTree, BLfreqs);
analysis.tokenInfo.push_back(block.ncode);
analysis.tokenInfo.push_back(predictedCTreeSize != block.ncode);
predictedCTreeSize = block.ncode;
for (unsigned i = 0; i < predictedCTreeSize; ++i) {
unsigned predictedBL = simpleCodeTree[PreflateConstants::treeCodeOrderTable[i]];
analysis.correctives.push_back(predictedBL);
analysis.correctives.push_back(targetCodes[i] - predictedBL);
}
}
void PreflateTreePredictor::encodeBlock(
PreflatePredictionEncoder* codec,
const unsigned blockno) {
BlockAnalysisResult& analysis = analysisResults[blockno];
if (analysis.blockType != PreflateTokenBlock::DYNAMIC_HUFF) {
return;
}
unsigned infoPos = 0, correctivePos = 0;
unsigned char info = analysis.tokenInfo[infoPos++];
codec->encodeLiteralCountMisprediction(info);
if (info) {
codec->encodeValue(analysis.correctives[correctivePos++] - PreflateConstants::NONLEN_CODE_COUNT, 5);
}
info = analysis.tokenInfo[infoPos++];
codec->encodeDistanceCountMisprediction(info);
if (info) {
codec->encodeValue(analysis.correctives[correctivePos++], 5);
}
while ((info = analysis.tokenInfo[infoPos++]) != 0xff) {
unsigned type = (info & 3);
if (info & 4) {
unsigned newType = analysis.correctives[correctivePos++];
codec->encodeLDTypeCorrection(type, newType);
type = newType;
} else {
codec->encodeLDTypeCorrection(type, type);
}
if (type != TCT_BITS) {
unsigned predRepeat = analysis.correctives[correctivePos++];
if (info & 8) {
unsigned newRepeat = analysis.correctives[correctivePos++];
codec->encodeRepeatCountCorrection(predRepeat, newRepeat, type);
} else {
codec->encodeRepeatCountCorrection(predRepeat, predRepeat, type);
}
} else {
unsigned bl_pred = analysis.correctives[correctivePos++];
int bl_diff = analysis.correctives[correctivePos++];
codec->encodeLDBitLengthCorrection(bl_pred, bl_pred + bl_diff);
}
}
unsigned blcount = analysis.tokenInfo[infoPos++];
info = analysis.tokenInfo[infoPos++];
codec->encodeTreeCodeCountMisprediction(info);
if (info) {
codec->encodeValue(blcount - 4, 4);
}
for (unsigned i = 0; i < blcount; ++i) {
int bl_pred = analysis.correctives[correctivePos++];
int bl_diff = analysis.correctives[correctivePos++];
codec->encodeTreeCodeBitLengthCorrection(bl_pred, bl_pred + bl_diff);
}
}
void PreflateTreePredictor::updateCounters(
PreflateStatisticsCounter* model,
const unsigned blockno) {
BlockAnalysisResult& analysis = analysisResults[blockno];
if (analysis.blockType != PreflateTokenBlock::DYNAMIC_HUFF) {
return;
}
unsigned infoPos = 0, correctivePos = 0;
unsigned char info = analysis.tokenInfo[infoPos++];
model->treecode.incLiteralCountPredictionWrong(info);
if (info) {
correctivePos++;
}
info = analysis.tokenInfo[infoPos++];
model->treecode.incDistanceCountPredictionWrong(info);
if (info) {
correctivePos++;
}
while ((info = analysis.tokenInfo[infoPos++]) != 0xff) {
unsigned type = (info & 3);
model->treecode.incLDCodeTypePredictionWrong(type, (info & 4) != 0);
if (info & 4) {
unsigned newType = analysis.correctives[correctivePos++];
model->treecode.incLDCodeTypeReplacement(newType);
type = newType;
}
if (type != TCT_BITS) {
unsigned predRepeat = analysis.correctives[correctivePos++];
if (info & 8) {
unsigned newRepeat = analysis.correctives[correctivePos++];
model->treecode.incLDCodeRepeatDiffToPrediction(newRepeat - predRepeat);
} else {
model->treecode.incLDCodeRepeatDiffToPrediction(0);
}
} else {
/*unsigned bl_pred = analysis.correctives[*/correctivePos++/*]*/;
int bl_diff = analysis.correctives[correctivePos++];
model->treecode.incLDCodeLengthDiffToPrediction(bl_diff);
}
}
unsigned blcount = analysis.tokenInfo[infoPos++];
info = analysis.tokenInfo[infoPos++];
model->treecode.incTreeCodeCountPredictionWrong(info);
for (unsigned i = 0; i < blcount; ++i) {
/*int bl_pred = analysis.correctives[*/correctivePos++/*]*/;
int bl_diff = analysis.correctives[correctivePos++];
model->treecode.incTreeCodeLengthDiffToPrediction(bl_diff);
}
}
unsigned PreflateTreePredictor::reconstructLDTrees(
PreflatePredictionDecoder* codec,
unsigned* frequencies,
unsigned char* targetCodes,
const unsigned targetCodeSize,
const unsigned char* symBitLen,
const unsigned symLCount,
const unsigned symDCount) {
memset(frequencies, 0, sizeof(unsigned) * PreflateConstants::CODETREE_CODE_COUNT);
const unsigned char* ptr = symBitLen;
unsigned osize = 0;
unsigned count1 = symLCount;
unsigned count2 = symDCount;
bool first = true;
while (count1 + count2 > 0) {
TreeCodeType predictedTreeCodeType = predictCodeType(ptr, count1, first);
unsigned newType = codec->decodeLDTypeCorrection(predictedTreeCodeType);
switch (newType) {
case TCT_BITS:
predictedTreeCodeType = TCT_BITS;
break;
case TCT_REP:
predictedTreeCodeType = TCT_REP;
break;
case TCT_REPZS:
predictedTreeCodeType = TCT_REPZS;
break;
case TCT_REPZL:
predictedTreeCodeType = TCT_REPZL;
break;
}
unsigned char predictedTreeCodeData = predictCodeData(ptr, predictedTreeCodeType, count1, first);
first = false;
if (predictedTreeCodeType != TCT_BITS) {
predictedTreeCodeData = codec->decodeRepeatCountCorrection(predictedTreeCodeData, predictedTreeCodeType);
} else {
predictedTreeCodeData = codec->decodeLDBitLengthCorrection(predictedTreeCodeData);;
}
unsigned l;
if (predictedTreeCodeType != TCT_BITS) {
frequencies[predictedTreeCodeType + 15]++;
l = predictedTreeCodeData;
if (osize + 2 > targetCodeSize) {
predictionFailure = true;
break;
}
targetCodes[osize++] = predictedTreeCodeType + 15;
targetCodes[osize++] = predictedTreeCodeData;
} else {
frequencies[predictedTreeCodeData]++;
l = 1;
if (osize >= targetCodeSize) {
predictionFailure = true;
break;
}
targetCodes[osize++] = predictedTreeCodeData;
}
ptr += l;
if (count1 > l) {
count1 -= l;
} else {
count1 += count2;
count2 = 0;
first = true;
if (count1 >= l) {
count1 -= l;
} else {
predictionFailure = true;
break;
}
}
}
if (count1 + count2 != 0) {
predictionFailure = true;
}
return predictionFailure ? 0 : osize;
}
bool PreflateTreePredictor::decodeBlock(
PreflateTokenBlock& block,
PreflatePredictionDecoder* codec) {
if (block.type != PreflateTokenBlock::DYNAMIC_HUFF) {
return true;
}
unsigned Lcodes[PreflateConstants::LITLEN_CODE_COUNT], Dcodes[PreflateConstants::DIST_CODE_COUNT];
unsigned Lcount = 0, Dcount = 0;
collectTokenStatistics(Lcodes, Dcodes, Lcount, Dcount, block);
unsigned char bitLengths[PreflateConstants::LITLENDIST_CODE_COUNT];
memset(bitLengths, 0, sizeof(bitLengths));
unsigned predictedLTreeSize = buildLBitlenghs(bitLengths, Lcodes);
if (codec->decodeLiteralCountMisprediction()) {
predictedLTreeSize = codec->decodeValue(5) + PreflateConstants::NONLEN_CODE_COUNT;
}
block.nlen = predictedLTreeSize;
unsigned predictedDTreeSize = buildDBitlenghs(bitLengths + predictedLTreeSize, Dcodes);
if (codec->decodeDistanceCountMisprediction()) {
predictedDTreeSize = codec->decodeValue(5);
}
block.ndist = predictedDTreeSize;
unsigned BLfreqs[PreflateConstants::CODETREE_CODE_COUNT];
unsigned char compressedLDtrees[PreflateConstants::LITLENDIST_CODE_COUNT];
unsigned targetCodeSize = reconstructLDTrees(codec, BLfreqs, compressedLDtrees, PreflateConstants::LITLENDIST_CODE_COUNT,
bitLengths, predictedLTreeSize, predictedDTreeSize);
if (predictionFailure) {
return false;
}
unsigned char simpleCodeTree[PreflateConstants::CODETREE_CODE_COUNT];
unsigned predictedCTreeSize = buildTCBitlengths(simpleCodeTree, BLfreqs);
if (codec->decodeTreeCodeCountMisprediction()) {
predictedCTreeSize = codec->decodeValue(4) + 4;
}
block.ncode = predictedCTreeSize;
unsigned char shuffledCodeTree[PreflateConstants::CODETREE_CODE_COUNT];
for (unsigned i = 0; i < predictedCTreeSize; ++i) {
unsigned predictedBL = simpleCodeTree[PreflateConstants::treeCodeOrderTable[i]];
shuffledCodeTree[i] = codec->decodeTreeCodeBitLengthCorrection(predictedBL);
}
block.treecodes.reserve(predictedCTreeSize + targetCodeSize);
block.treecodes.insert(block.treecodes.end(), shuffledCodeTree, shuffledCodeTree + predictedCTreeSize);
block.treecodes.insert(block.treecodes.end(), compressedLDtrees, compressedLDtrees + targetCodeSize);
return true;
}

View File

@@ -0,0 +1,99 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PREFLATE_TREE_PREDICTOR_H
#define PREFLATE_TREE_PREDICTOR_H
#include <vector>
#include "preflate_constants.h"
#include "preflate_input.h"
#include "preflate_parameter_estimator.h"
struct PreflateStatisticsCounter;
struct PreflatePredictionDecoder;
struct PreflatePredictionEncoder;
enum TreeCodeType {
TCT_BITS = 0, TCT_REP = 1, TCT_REPZS = 2, TCT_REPZL = 3
};
struct PreflateTreePredictor {
PreflateInput input;
bool predictionFailure;
struct BlockAnalysisResult {
PreflateTokenBlock::Type blockType;
std::vector<unsigned char> tokenInfo;
std::vector<signed> correctives;
};
std::vector<BlockAnalysisResult> analysisResults;
void collectTokenStatistics(
unsigned LcodeFrequencies[],
unsigned DcodeFrequencies[],
unsigned& Lcount,
unsigned& Dcount,
const PreflateTokenBlock& block);
unsigned buildLBitlenghs(
unsigned char bitLengths[],
unsigned Lcodes[]);
unsigned buildDBitlenghs(
unsigned char bitLengths[],
unsigned Dcodes[]);
unsigned buildTCBitlengths(
unsigned char (&bitLengths)[PreflateConstants::CODETREE_CODE_COUNT],
unsigned (&BLfreqs)[PreflateConstants::CODETREE_CODE_COUNT]);
unsigned calcBitLengths(unsigned char* symBitLen,
const unsigned* symFreq,
const unsigned symCount,
const unsigned maxBits,
const unsigned minMaxCode);
TreeCodeType predictCodeType(const unsigned char* symBitLen,
const unsigned symCount,
const bool first);
unsigned char predictCodeData(const unsigned char* symBitLen,
const TreeCodeType type,
const unsigned symCount,
const bool first);
void predictLDTrees(BlockAnalysisResult& analysis,
unsigned* frequencies,
const unsigned char* symBitLen,
const unsigned symLCount,
const unsigned symDCount,
const unsigned char* targetCodes,
const unsigned targetCodeSize);
unsigned reconstructLDTrees(PreflatePredictionDecoder* codec,
unsigned* frequencies,
unsigned char* targetCodes,
unsigned targetCodeSize,
const unsigned char* symBitLen,
const unsigned symLCount,
const unsigned symDCount);
PreflateTreePredictor(const std::vector<unsigned char>& dump, const size_t offset);
void analyzeBlock(const unsigned blockno,
const PreflateTokenBlock& block);
void updateCounters(PreflateStatisticsCounter*,
const unsigned blockno);
void encodeBlock(PreflatePredictionEncoder*,
const unsigned blockno);
bool decodeBlock(PreflateTokenBlock& block, PreflatePredictionDecoder*);
};
#endif /* PREFLATE_TREE_PREDICTOR_H */

View File

@@ -0,0 +1,232 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <algorithm>
#include "arithmetic_coder.h"
#include "array_helper.h"
#include "bit_helper.h"
const uint8_t ArithmeticCodecBase::_normCheckLUT[8] = {
0x33, 0x77, 0xff, 0xff, 0x33, 0x77, 0xff, 0xff
};
ArithmeticCodecBase::ArithmeticCodecBase()
: _low(0)
, _high(0x7fffffff) {}
ArithmeticEncoder::ArithmeticEncoder(BitOutputStream& bos)
: _bos(bos)
, _e3cnt(0) {}
void ArithmeticEncoder::_writeE3(const unsigned w) {
while (_e3cnt > 0) {
uint32_t todo = min(_e3cnt, 16u);
_bos.put(w, todo);
_e3cnt -= todo;
}
}
void ArithmeticEncoder::flush() {
if (_low < 0x20000000) { // case a.)
_bos.put(2, 2); // write 0, 1, E3
_writeE3(~0u);
} else {
_bos.put(1, 1);
}
_low = 0;
_high = 0x7fffffff;
}
void ArithmeticEncoder::_normalize() {
#ifdef _DEBUG
_ASSERT(_low <= _high && _high < 0x80000000);
#endif
// write determinated bits
// this is the case if _low features 1 bits
// or _high features 0 bits
uint32_t lh = ~_low & _high;
if ((lh & 0x40000000) == 0) {
unsigned w = (_low & 0x40000000) != 0;
_bos.put(w, 1);
_writeE3(w - 1);
if ((lh & 0x20000000) == 0) {
unsigned l = bitLeadingZeroes((lh << 2) + 3);
if (l <= 16) {
_bos.putReverse(_low >> (30 - l), l);
} else {
_bos.putReverse(_low >> (30 - 16), 16);
_bos.putReverse(_low >> (30 - l), l - 16);
}
_low = (_low << (l + 1)) & 0x7fffffff;
_high = (((_high + 1) << (l + 1)) - 1) & 0x7fffffff;
} else {
_low = (_low << 1) & 0x7fffffff;
_high = ((_high << 1) + 1) & 0x7fffffff;
}
}
// count indeterminated bits
lh = ~_low | _high;
if ((lh & 0x20000000) == 0) {
// low starts with 01, high starts with 10
unsigned l = bitLeadingZeroes((lh << 2) + 3);
_e3cnt += l;
_low = (_low << l) & 0x3fffffff;
_high = ((((_high + 1) << l) - 1) & 0x3fffffff)
| 0x40000000;
}
#ifdef _DEBUG
_ASSERT(_low <= _high && _high < 0x80000000);
#endif
}
ArithmeticDecoder::ArithmeticDecoder(BitInputStream& bis)
: _bis(bis)
, _value(0) {
_value = _bis.getReverse(16) << 15;
_value |= _bis.getReverse(15);
}
void ArithmeticDecoder::_normalize() {
#ifdef _DEBUG
_ASSERT(_low <= _value && _value <= _high && _high < 0x80000000);
#endif
// skip determinated bits
// this is the case if _low features 1 bits
// or _high features 0 bits
uint32_t lh = ~_low & _high;
if ((lh & 0x40000000) == 0) {
//unsigned w = (_low & 0x40000000) != 0;
if ((lh & 0x20000000) == 0) {
unsigned l = bitLeadingZeroes((lh << 2) + 3);
_low = (_low << (l + 1)) & 0x7fffffff;
_high = (((_high + 1) << (l + 1)) - 1) & 0x7fffffff;
if (l <= 15) {
_value = ((_value << (l + 1)) + _bis.getReverse(l + 1)) & 0x7fffffff;
} else {
_value = ((_value << 16) + _bis.getReverse(16)) & 0x7fffffff;
_value = ((_value << (l - 15)) + _bis.getReverse(l - 15)) & 0x7fffffff;
}
} else {
_low = (_low << 1) & 0x7fffffff;
_high = ((_high << 1) + 1) & 0x7fffffff;
_value = ((_value << 1) + _bis.get(1)) & 0x7fffffff;
}
}
// count indeterminated bits
lh = ~_low | _high;
if ((lh & 0x20000000) == 0) {
// low starts with 01, high starts with 10
unsigned l = bitLeadingZeroes((lh << 2) + 3);
_low = (_low << l) & 0x3fffffff;
_high = ((((_high + 1) << l) - 1) & 0x3fffffff)
| 0x40000000;
if (l <= 16) {
_value = (((_value << l) + _bis.getReverse(l)) -0x40000000) & 0x7fffffff;
} else {
_value = ((_value << 16) + _bis.getReverse(16));
_value = (((_value << (l - 16)) + _bis.getReverse(l - 16)) - 0x40000000) & 0x7fffffff;
}
}
#ifdef _DEBUG
_ASSERT(_low <= _value && _value <= _high && _high < 0x80000000);
#endif
}
bool modelCheckFixed(unsigned bounds[], unsigned short ids[], unsigned short rids[],
const unsigned N) {
unsigned idx = N;
for (unsigned i = 0; i < N; ++i) {
if (bounds[i]) {
if (idx != N) {
return false;
}
idx = i;
}
}
ids[N - 1] = idx;
rids[idx] = N - 1;
bounds[idx] = 0;
bounds[N] = 1 << 16;
return true;
}
void modelSortBounds(unsigned bounds[], unsigned short ids[], unsigned short rids[],
unsigned backup[], const unsigned N) {
for (unsigned i = 0; i < N; ++i) {
ids[i] = i;
backup[i] = bounds[i];
}
std::sort(ids, ids + N, [=](unsigned i1, unsigned i2) {
if (backup[i1] != backup[i2]) {
return backup[i1] < backup[i2];
}
return i1 < i2;
});
for (unsigned i = 0; i < N; ++i) {
bounds[i] = backup[ids[i]];
rids[ids[i]] = i;
}
}
void modelRecreateBounds(unsigned bounds[], const unsigned N) {
unsigned sum = sumArray(bounds, N), acc, prev;
prev = bounds[0];
bounds[0] = acc = 0;
for (unsigned i = 0; i < N; ++i) {
if (prev) {
acc += prev;
prev = bounds[i + 1];
int diff = (((uint64_t)acc) << 16) / sum - bounds[i];
unsigned diff_bits = bitLength(diff);
const unsigned k = 5;
if (diff > 0 && diff_bits > k) {
diff = diff & (((1 << k) - 1) << (diff_bits - k));
}
bounds[i + 1] = bounds[i] + diff;
if (bounds[i + 1] <= bounds[i]) {
bounds[i + 1] = bounds[i] + 1;
}
} else {
prev = bounds[i + 1];
bounds[i + 1] = bounds[i];
}
}
if (bounds[N] > 0) {
bounds[N] = 1 << 16;
}
}
void ACFixedScaleBinaryModel::build() {
if (bounds[0] == 0 || bounds[1] == 0) {
_fixed = true;
ids[1] = bounds[0] == 0;
rids[ids[1]] = 1;
bounds[1] = bounds[0] = 0;
bounds[2] = 1 << 16;
return;
}
ids[0] = 0;
ids[1] = 1;
if (bounds[1] < bounds[0]) {
std::swap(ids[0], ids[1]);
std::swap(bounds[0], bounds[1]);
}
rids[ids[0]] = 0;
rids[ids[1]] = 1;
modelRecreateBounds(bounds, 2);
}

View File

@@ -0,0 +1,260 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef ARITHMETIC_CODER_H
#define ARITHMETIC_CODER_H
#include <stdint.h>
#include <string.h>
#include "bitstream.h"
#include "const_division.h"
class ArithmeticCodecBase {
public:
ArithmeticCodecBase();
// array for fast check if normalization is required
static const uint8_t _normCheckLUT[8];
bool _needsNormalization() const {
return (_normCheckLUT[_low >> 29] & (1 << (_high >> 29))) != 0;
}
// arithmetic coding variables
uint32_t _low;
uint32_t _high;
};
class ArithmeticEncoder : public ArithmeticCodecBase {
public:
ArithmeticEncoder(BitOutputStream& bos);
void flush();
void encode(const uint32_t scale, const uint32_t low, const uint32_t high) {
// update steps, low count, high count
uint32_t step = ((_high - _low) + 1) / scale;
_high = _low + step * high - 1;
_low += step * low;
_checkNormalize();
}
void encodeShiftScale(const uint32_t shift, const uint32_t low, const uint32_t high) {
// update steps, low count, high count
uint32_t step = ((_high - _low) + 1) >> shift;
_high = _low + step * high - 1;
_low += step * low;
_checkNormalize();
}
void encode(const udivider_t<32>& scale, const uint32_t low, const uint32_t high) {
// update steps, low count, high count
uint32_t step = divide((_high - _low) + 1, scale);
_high = _low + step * high - 1;
_low += step * low;
_checkNormalize();
}
void encodeBits(const uint32_t value, const uint32_t bits) {
uint32_t step = ((_high - _low) + 1) >> bits;
_low += step * value;
_high = _low + step - 1;
_normalize();
}
private:
void _checkNormalize() {
if (_needsNormalization()) {
_normalize();
}
}
void _normalize();
void _writeE3(const unsigned w);
BitOutputStream& _bos;
// arithmetic coding variables
uint32_t _e3cnt;
};
class ArithmeticDecoder : public ArithmeticCodecBase {
public:
ArithmeticDecoder(BitInputStream& bis);
unsigned decode(const uint32_t scale, const unsigned bounds[], const unsigned N) {
uint32_t step = ((_high - _low) + 1) / scale;
return _decode(step, bounds, N);
}
unsigned decodeShiftScale(const uint32_t shift, const unsigned bounds[], const unsigned N) {
uint32_t step = ((_high - _low) + 1) >> shift;
return _decode(step, bounds, N);
}
unsigned decode(const udivider_t<32>& scale, const unsigned bounds[], const unsigned N) {
uint32_t step = divide((_high - _low) + 1, scale);
return _decode(step, bounds, N);
}
unsigned decodeBinary(const uint32_t scale, const unsigned bounds[]) {
uint32_t step = ((_high - _low) + 1) / scale;
return _decodeBinary(step, bounds);
}
unsigned decodeBinaryShiftScale(const uint32_t shift, const unsigned bounds[]) {
uint32_t step = ((_high - _low) + 1) >> shift;
return _decodeBinary(step, bounds);
}
unsigned decodeBinary(const udivider_t<32>& scale, const unsigned bounds[]) {
uint32_t step = divide((_high - _low) + 1, scale);
return _decodeBinary(step, bounds);
}
unsigned decodeBits(const uint32_t bits) {
uint32_t step = ((_high - _low) + 1) >> bits;
unsigned result = (_value - _low) / step;
_low += step * result;
_high = _low + step - 1;
_normalize();
return result;
}
private:
unsigned _findIndex(const unsigned bounds[],
const unsigned N,
const unsigned val) {
for (unsigned i = N; i > 1; --i) {
if (val >= bounds[i - 1]) {
return i - 1;
}
}
return 0;
}
unsigned _decode(const uint32_t step, const unsigned bounds[], const unsigned N) {
uint32_t val = (_value - _low) / step;
unsigned result = _findIndex(bounds, N, val);
_high = _low + step * bounds[result + 1] - 1;
_low += step * bounds[result];
_checkNormalize();
return result;
}
unsigned _decodeBinary(const uint32_t step, const unsigned bounds[]) {
unsigned result = (_value >= _low + bounds[1] * step);
_high = _low + step * bounds[result + 1] - 1;
_low += step * bounds[result];
_checkNormalize();
return result;
}
void _checkNormalize() {
if (_needsNormalization()) {
_normalize();
}
}
void _normalize();
BitInputStream& _bis;
// arithmetic coding variables
uint32_t _value;
};
bool modelCheckFixed(unsigned bounds[], unsigned short ids[], unsigned short rids[],
const unsigned N);
void modelSortBounds(unsigned bounds[], unsigned short ids[], unsigned short rids[],
unsigned backup[], const unsigned N);
void modelRecreateBounds(unsigned bounds[], const unsigned N);
template <unsigned N>
struct ACModelBase {
static const unsigned L = N;
bool isEqualTo(const ACModelBase& m) const {
for (unsigned i = 0; i < N; ++i) {
if (bounds[i] != m.bounds[i]) {
return false;
}
if (bounds[i + 1] > 0 && ids[i] != m.ids[i]) {
return false;
}
}
if (bounds[N] != m.bounds[N]) {
return false;
}
return true;
}
unsigned bounds[N + 1];
unsigned short ids[N], rids[N];
bool _fixed;
};
struct ACFixedScaleBinaryModel : public ACModelBase<2> {
ACFixedScaleBinaryModel() {}
ACFixedScaleBinaryModel(const unsigned(&arr)[2]) {
memcpy(this->bounds, arr, sizeof(arr));
build();
}
void build();
void encode(ArithmeticEncoder* encoder, const unsigned item) {
if (!this->_fixed) {
unsigned pos = this->rids[item];
encoder->encodeShiftScale(16, this->bounds[pos], this->bounds[pos + 1]);
}
}
#if 0
unsigned decode(aricoder* codec) {
symbol s;
s.scale = 1 << 16;
unsigned cnt = codec->decode_count(&s);
for (unsigned i = 0; i < N; ++i) {
if (cnt < bounds[i + 1]) {
s.low_count = bounds[i];
s.high_count = bounds[i + 1];
codec->decode(&s);
return ids[i];
}
}
return 0;
}
#endif
};
template <unsigned N>
struct ACFixedScaleModel : public ACModelBase<N> {
ACFixedScaleModel() {}
ACFixedScaleModel(const unsigned(&arr)[N]) {
memcpy(this->bounds, arr, sizeof(arr));
build();
}
void build() {
unsigned backup[N];
if (!(this->_fixed = modelCheckFixed(this->bounds, this->ids, this->rids, N))) {
modelSortBounds(this->bounds, this->ids, this->rids, backup, N);
modelRecreateBounds(this->bounds, N);
}
}
void encode(ArithmeticEncoder* encoder, const unsigned item) {
if (!this->_fixed) {
unsigned pos =this->rids[item];
encoder->encodeShiftScale(16, this->bounds[pos], this->bounds[pos + 1]);
}
}
#if 0
unsigned decode(aricoder* codec) {
symbol s;
s.scale = 1 << 16;
unsigned cnt = codec->decode_count(&s);
for (unsigned i = 0; i < N; ++i) {
if (cnt < bounds[i + 1]) {
s.low_count = bounds[i];
s.high_count = bounds[i + 1];
codec->decode(&s);
return this->ids[i];
}
}
return 0;
}
#endif
};
#endif /* ARITHMETIC_CODER_H */

View File

@@ -0,0 +1,24 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include "array_helper.h"
unsigned sumArray(const unsigned* data, const unsigned n) {
unsigned sum = 0;
for (unsigned i = 0; i < n; ++i) {
sum += data[i];
}
return sum;
}

View File

@@ -0,0 +1,25 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef ARRAY_HELPER_H
#define ARRAY_HELPER_H
unsigned sumArray(const unsigned* data, const unsigned n);
template <unsigned N>
inline unsigned sumArray(const unsigned (&data)[N]) {
return sumArray(data, N);
}
#endif /* ARRAY_HELPER_H */

View File

@@ -0,0 +1,73 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include "bit_helper.h"
unsigned bitLength(unsigned value) {
unsigned l = 0;
while (value > 0) {
l++;
value >>= 1;
}
return l;
}
static unsigned char reverse4[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15};
static unsigned bitReverse8(const unsigned value) {
return (reverse4[value & 0x0f] << 4) | reverse4[(value >> 4) & 0x0f];
}
static unsigned bitReverse16(const unsigned value) {
return (bitReverse8(value & 0xff) << 8) | bitReverse8(value >> 8);
}
static unsigned bitReverse32(const unsigned value) {
return (bitReverse16(value & 0xffff) << 16) | bitReverse16(value >> 16);
}
unsigned bitReverse(const unsigned value, const unsigned bits) {
if (bits <= 8) {
return bitReverse8(value) >> (8 - bits);
}
if (bits <= 16) {
return bitReverse16(value) >> (16 - bits);
}
return bitReverse32(value) >> (32 - bits);
}
static unsigned char leading4[16] = {4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0};
unsigned bitLeadingZeroes(const unsigned value_) {
if (value_ == 0) {
return 32;
}
unsigned value = value_;
unsigned result = 0;
while ((value & 0xf0000000) == 0) {
value <<= 4;
result += 4;
}
return result + leading4[value >> 28];
}
static unsigned char trailing4[16] = {4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0};
unsigned bitTrailingZeroes(const unsigned value_) {
if (value_ == 0) {
return 32;
}
unsigned value = value_;
unsigned result = 0;
while ((value & 0xf) == 0) {
value >>= 4;
result += 4;
}
return result + trailing4[value & 0xf];
}

View File

@@ -0,0 +1,23 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef BIT_HELPER_H
#define BIT_HELPER_H
unsigned bitLength(unsigned value);
unsigned bitReverse(const unsigned value, const unsigned bits);
unsigned bitLeadingZeroes(const unsigned value);
unsigned bitTrailingZeroes(const unsigned value);
#endif /* BIT_HELPER_H */

View File

@@ -0,0 +1,176 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <algorithm>
#include <memory.h>
#include "bitstream.h"
BitInputStream::BitInputStream(InputStream& is)
: _input(is)
, _bufPos(0)
, _bufSize(0)
, _bufFastLimit(0)
, _eof(false)
, _bits(0)
, _bitsRemaining(0)
, _totalBitPos(0)
{}
void BitInputStream::_fillBytes() {
// free space in bit buffer
if (_bufPos >= _bufFastLimit) {
if (!_eof) {
unsigned remaining = _bufSize - _bufPos;
memcpy(_buffer + PRE_BUF_EXTRA - remaining,
_buffer + _bufPos, remaining);
_bufPos = PRE_BUF_EXTRA - remaining;
_bufSize = PRE_BUF_EXTRA + _input.read(_buffer + PRE_BUF_EXTRA, BUF_SIZE);
_bufFastLimit = max(_bufPos, _bufSize - PRE_BUF_EXTRA);
_eof = _bufSize != PRE_BUF_EXTRA + BUF_SIZE;
}
}
}
void BitInputStream::_fill() {
// free space in bit buffer
if (_bufPos >= _bufFastLimit) {
if (!_eof) {
_fillBytes();
}
while (_bitsRemaining <= BITS - 8 && _bufPos < _bufSize) {
_bits |= ((size_t)_buffer[_bufPos++]) << _bitsRemaining;
_bitsRemaining += 8;
}
return;
}
while (_bitsRemaining <= BITS - 8) {
_bits |= ((size_t)_buffer[_bufPos++]) << _bitsRemaining;
_bitsRemaining += 8;
}
}
size_t BitInputStream::copyBytesTo(OutputStream& output, const size_t len) {
if (_bitsRemaining & 7) {
return 0;
}
uint8_t a[sizeof(_bits)];
size_t l = 0;
while (_bitsRemaining > 0 && l < len) {
a[l++] = _bits & 0xff;
_bitsRemaining -= 8;
_bits >>= 8;
_totalBitPos += 8;
}
size_t w = output.write(a, l);
if (w != l) {
return w;
}
while (l < len) {
unsigned todo = min(len - l, (size_t)(_bufSize - _bufPos));
w = output.write(_buffer + _bufPos, todo);
_totalBitPos += 8 * w;
_bufPos += w;
l += w;
if (w != todo || eof()) {
return l;
}
_fillBytes();
}
return l;
}
size_t BitInputStream::getBytes(uint8_t* data, const size_t size_) {
skipToByte();
size_t size = size_;
while (_bitsRemaining > 0 && size > 0) {
*data++ = _bits & 0xff;
_bitsRemaining -= 8;
_bits >>= 8;
_totalBitPos += 8;
size--;
}
while (size > 0) {
unsigned todo = min(size, (size_t)(_bufSize - _bufPos));
memcpy(data, _buffer + _bufPos, todo);
data += todo;
_totalBitPos += 8 * todo;
_bufPos += todo;
size -= todo;
if (eof()) {
return size_ - size;
}
_fillBytes();
}
return size_;
}
uint64_t BitInputStream::getVLI() {
uint64_t result = 0, o = 0;
unsigned s = 0, c;
unsigned bitsRemaining = ((_bitsRemaining - 1) & 7) + 1;
unsigned limit = 1 << (bitsRemaining - 1);
while ((c = get(bitsRemaining)) >= limit) {
result += ((uint64_t)(c & (limit - 1))) << s;
s += (bitsRemaining - 1);
o = (o + 1) << (bitsRemaining - 1);
bitsRemaining = 8;
limit = 128;
}
return result + o + (((uint64_t)c) << s);
}
BitOutputStream::BitOutputStream(OutputStream& output)
: _output(output)
, _bufPos(0)
, _bits(0)
, _bitPos(0) {}
void BitOutputStream::_flush() {
while (_bitPos >= 8) {
_buffer[_bufPos++] = _bits & 0xff;
_bits >>= 8;
_bitPos -= 8;
}
if (_bufPos >= BUF_SIZE) {
_output.write(_buffer, BUF_SIZE);
memcpy(_buffer, _buffer + BUF_SIZE, _bufPos - BUF_SIZE);
_bufPos -= BUF_SIZE;
}
}
void BitOutputStream::flush() {
_flush();
if (_bitPos > 0) {
_buffer[_bufPos++] = _bits & 0xff;
_bits = 0;
_bitPos = 0;
}
_output.write(_buffer, _bufPos);
_bufPos = 0;
}
void BitOutputStream::putBytes(const uint8_t* data, const size_t size) {
flush();
_output.write(data, size);
}
void BitOutputStream::putVLI(const uint64_t size_) {
uint64_t size = size_;
unsigned bitsRemaining = 8 - (_bitPos & 7);
unsigned limit = 1 << (bitsRemaining - 1);
while (size >= limit) {
put(size | limit, bitsRemaining);
size = (size >> (bitsRemaining - 1)) - 1;
bitsRemaining = 8;
limit = 128;
}
put(size, bitsRemaining);
}

View File

@@ -0,0 +1,130 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef BITSTREAM_H
#define BITSTREAM_H
#include <algorithm>
#include "bit_helper.h"
#include "stream.h"
// Huffman decoder for little endian
class BitInputStream {
public:
BitInputStream(InputStream&);
bool eof() const {
return _eof && _bufPos == _bufSize && !_bitsRemaining;
}
size_t bitPos() const {
return _totalBitPos;
}
size_t peek(const unsigned n) {
if (_bitsRemaining < n) {
_fill();
}
return _bits & ((1 << n) - 1);
}
void skip(const unsigned n) {
_bitsRemaining -= min(n, _bitsRemaining);
_bits >>= n;
_totalBitPos += n;
}
size_t get(const unsigned n) {
size_t v = peek(n);
skip(n);
return v;
}
size_t getReverse(const unsigned n) {
return bitReverse(get(n), n);
}
void skipToByte() {
skip(_bitsRemaining & 7);
}
bool checkLastBitsOfByteAreZero() {
return peek(_bitsRemaining & 7) == 0;
}
void fastFill(const unsigned n) {
if (_bitsRemaining < n) {
_fill();
}
}
size_t fastPeek(const unsigned n) {
return _bits & ((1 << n) - 1);
}
size_t fastGet(const unsigned n) {
size_t v = fastPeek(n);
skip(n);
return v;
}
size_t copyBytesTo(OutputStream& output, const size_t len);
size_t getBytes(uint8_t* data, const size_t size);
uint64_t getVLI();
private:
void _fillBytes();
void _fill();
enum { BUF_SIZE = 1024, PRE_BUF_EXTRA = 16, BITS = sizeof(size_t)*8 };
InputStream& _input;
unsigned char _buffer[PRE_BUF_EXTRA + BUF_SIZE];
unsigned _bufPos, _bufSize, _bufFastLimit;
bool _eof;
size_t _bits;
unsigned _bitsRemaining;
size_t _totalBitPos;
};
class BitOutputStream {
public:
BitOutputStream(OutputStream&);
void put(const size_t value, const unsigned n) {
if (_bitPos + n >= BITS) {
_flush();
}
_bits |= (value & ((1 << n) - 1)) << _bitPos;
_bitPos += n;
}
void putReverse(const size_t value, const unsigned n) {
put(bitReverse(value, n), n);
}
void fillByte() {
_bitPos = (_bitPos + 7) & ~7;
}
void flush();
unsigned bitPos() const {
return _bitPos;
}
void putBytes(const uint8_t* data, const size_t size);
void putVLI(const uint64_t size);
private:
void _flush();
enum {
BUF_SIZE = 1024, BUF_EXTRA = 64, BITS = sizeof(size_t) * 8
};
OutputStream& _output;
unsigned char _buffer[BUF_SIZE + BUF_EXTRA];
unsigned _bufPos;
size_t _bits;
unsigned _bitPos;
};
#endif /* BITSTREAM_H */

View File

@@ -0,0 +1,115 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include "bit_helper.h"
#include "const_division.h"
// Based on "N-Bit Unsigned Division Via N-Bit Multiply-Add"
// by Robison
template <unsigned N>
udivider_t<N> build_udivider(const typename divider_uint_t<N>::type d) {
typedef typename divider_uint_t<N * 2>::type T1;
typedef typename divider_uint_t<N>::type T2;
udivider_t<N> result;
result.shift = bitLength(d) - 1;
if ((d & (d - 1)) == 0) {
result.magic1 = result.magic2 = ~(T2)0;
} else {
T2 shm = 1 << result.shift;
T2 t = (((T1)shm) << N) / d;
T2 r = t * d + d;
if (r <= shm) {
result.magic1 = t + 1;
result.magic2 = 0;
} else {
result.magic1 = t;
result.magic2 = t;
}
}
return result;
}
udivider_t<16> build_udivider_16(const uint16_t d) {
return build_udivider<16>(d);
}
udivider_t<32> build_udivider_32(const uint32_t d) {
return build_udivider<32>(d);
}
template <unsigned N>
ucdivider_t<N> build_ucdivider(const typename divider_uint_t<N>::type d) {
typedef typename divider_uint_t<N * 2>::type T1;
typedef typename divider_uint_t<N>::type T2;
ucdivider_t<N> result;
result.ctrl = bitLength(d) - 1;
if ((d & (d - 1)) == 0) {
result.magic = ~(T2)0;
result.ctrl |= 0x80;
} else {
T2 shm = 1 << result.ctrl;
T2 t = (((T1)shm) << N) / d;
T2 r = t * d + d;
if (r <= shm) {
result.magic = t + 1;
} else {
result.magic = t;
result.ctrl |= 0x80;
}
}
return result;
}
ucdivider_t<16> build_ucdivider_16(const uint16_t d) {
return build_ucdivider<16>(d);
}
ucdivider_t<32> build_ucdivider_32(const uint32_t d) {
return build_ucdivider<32>(d);
}
template <unsigned N>
sdivider_t<N> build_sdivider(const typename divider_int_t<N>::type d_) {
sdivider_t<N> result;
udivider_t<N> uresult = build_udivider<N>(d_ < 0 ? -d_ : d_);
result.magic1 = uresult.magic1;
result.magic2 = uresult.magic2;
result.shift = uresult.shift;
result.sign = d_ < 0 ? -1 : 0;
return result;
}
sdivider_t<16> build_sdivider_16(const int16_t d) {
return build_sdivider<16>(d);
}
sdivider_t<32> build_sdivider_32(const int32_t d) {
return build_sdivider<32>(d);
}
template <unsigned N>
scdivider_t<N> build_scdivider(const typename divider_int_t<N>::type d_) {
scdivider_t<N> result;
ucdivider_t<N> uresult = build_ucdivider<N>(d_ < 0 ? -d_ : d_);
result.magic = uresult.magic;
result.ctrl = uresult.ctrl;
if (d_ < 0) {
result.ctrl |= 0x40;
}
return result;
}
scdivider_t<16> build_scdivider_16(const int16_t d) {
return build_scdivider<16>(d);
}
scdivider_t<32> build_scdivider_32(const int32_t d) {
return build_scdivider<32>(d);
}

View File

@@ -0,0 +1,170 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef CONST_DIVISION_H
#define CONST_DIVISION_H
#include <stdint.h>
template <unsigned N> struct divider_int_t;
template <unsigned N> struct divider_uint_t;
template <> struct divider_int_t<16> {
typedef int16_t type;
};
template <> struct divider_int_t<32> {
typedef int32_t type;
};
template <> struct divider_uint_t<16> {
typedef uint16_t type;
};
template <> struct divider_uint_t<32> {
typedef uint32_t type;
};
template <> struct divider_uint_t<64> {
typedef uint64_t type;
};
template <unsigned N>
struct udivider_t {
typename divider_uint_t<N>::type magic1; // factor
typename divider_uint_t<N>::type magic2; // addend
uint8_t shift;
};
template <unsigned N>
struct ucdivider_t {
typename divider_uint_t<N>::type magic; // factor/addend
uint8_t ctrl; // bits 0..3/4/5 - shift, bit 7 - add required
};
// If it wasn't for +/-1, the signed dividers wouldn't
// need the add-term (magic2), as they could just
// use a factor (magic1) with one more bit precision.
template <unsigned N>
struct sdivider_t {
typename divider_uint_t<N>::type magic1; // factor
typename divider_uint_t<N>::type magic2; // addend
uint8_t shift;
int8_t sign; // -1 if negative, 0 otherwise
};
template <unsigned N>
struct scdivider_t {
typename divider_uint_t<N>::type magic;
uint8_t ctrl; // bits 0..3/4/5 - shift, bit 6 - negative, bit 7 - add required
};
udivider_t<16> build_udivider_16(const uint16_t d);
udivider_t<32> build_udivider_32(const uint32_t d);
ucdivider_t<16> build_ucdivider_16(const uint16_t d);
ucdivider_t<32> build_ucdivider_32(const uint32_t d);
sdivider_t<16> build_sdivider_16(const int16_t d);
sdivider_t<32> build_sdivider_32(const int32_t d);
scdivider_t<16> build_scdivider_16(const int16_t d);
scdivider_t<32> build_scdivider_32(const int32_t d);
template <unsigned N1, unsigned N2>
inline typename divider_uint_t<N1>::type
divide_template(const typename divider_uint_t<N1>::type dividend,
const udivider_t<N2>& divisor) {
typedef typename divider_uint_t<N1 * 2>::type T1;
typedef typename divider_uint_t<N1>::type T2;
T1 t = ((T1)dividend) * divisor.magic1 + divisor.magic2;
T2 u = (T2)(t >> N2);
return u >> divisor.shift;
}
template <unsigned N1, unsigned N2>
inline typename divider_uint_t<N1>::type
divide_template(const typename divider_uint_t<N1>::type dividend,
const ucdivider_t<N2>& divisor) {
typedef typename divider_uint_t<N1 * 2>::type T1;
typedef typename divider_uint_t<N1>::type T2;
T1 t = ((T1)dividend) * divisor.magic
+ (divisor.ctrl & 0x80 ? divisor.magic : 0);
T2 u = (T2)(t >> N2);
return u >> (divisor.ctrl & (N2 - 1));
}
template <unsigned N1, unsigned N2>
inline typename divider_int_t<N1>::type
divide_template(const typename divider_int_t<N1>::type dividend,
const sdivider_t<N2>& divisor) {
typedef typename divider_uint_t<N1 * 2>::type T1;
typedef typename divider_uint_t<N1>::type T2;
T2 s = dividend < 0 ? -1 : 0;
T1 t = ((T1)(T2)((dividend ^ s) - s)) * divisor.magic1
+ divisor.magic2;
T2 u = (T2)(t >> N2) >> divisor.shift;
s ^= divisor.sign;
return (u ^ s) - s;
}
template <unsigned N1, unsigned N2>
inline typename divider_int_t<N1>::type
divide_template(const typename divider_int_t<N1>::type dividend,
const scdivider_t<N2>& divisor) {
typedef typename divider_uint_t<N1 * 2>::type T1;
typedef typename divider_uint_t<N1>::type T2;
T2 s = dividend < 0 ? -1 : 0;
T1 t = ((T1)(T2)((dividend ^ s) - s)) * divisor.magic
+ (divisor.ctrl & 0x80 ? divisor.magic : 0);
T2 u = (T2)(t >> N2) >> (divisor.ctrl & (N2 - 1));
s ^= (divisor.ctrl & 0x40 ? -1 : 0);
return (u ^ s) - s;
}
inline uint16_t divide(const uint16_t dividend, const udivider_t<16>& divisor) {
return divide_template<16, 16>(dividend, divisor);
}
inline uint32_t divide(const uint32_t dividend, const udivider_t<16>& divisor) {
return divide_template<32, 16>(dividend, divisor);
}
inline uint32_t divide(const uint32_t dividend, const udivider_t<32>& divisor) {
return divide_template<32, 32>(dividend, divisor);
}
inline uint16_t divide(const uint16_t dividend, const ucdivider_t<16>& divisor) {
return divide_template<16, 16>(dividend, divisor);
}
inline uint32_t divide(const uint32_t dividend, const ucdivider_t<16>& divisor) {
return divide_template<32, 16>(dividend, divisor);
}
inline uint32_t divide(const uint32_t dividend, const ucdivider_t<32>& divisor) {
return divide_template<32, 32>(dividend, divisor);
}
inline int16_t divide(const int16_t dividend, const sdivider_t<16>& divisor) {
return divide_template<16, 16>(dividend, divisor);
}
inline int32_t divide(const int32_t dividend, const sdivider_t<16>& divisor) {
return divide_template<32, 16>(dividend, divisor);
}
inline int32_t divide(const int32_t dividend, const sdivider_t<32>& divisor) {
return divide_template<32, 32>(dividend, divisor);
}
inline int16_t divide(const int16_t dividend, const scdivider_t<16>& divisor) {
return divide_template<16, 16>(dividend, divisor);
}
inline int32_t divide(const int32_t dividend, const scdivider_t<16>& divisor) {
return divide_template<32, 16>(dividend, divisor);
}
inline int32_t divide(const int32_t dividend, const scdivider_t<32>& divisor) {
return divide_template<32, 32>(dividend, divisor);
}
#endif /* CONST_DIVISION_H */

View File

@@ -0,0 +1,41 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <algorithm>
#include <string.h>
#include <stdio.h>
#include "filestream.h"
FileStream::FileStream(FILE* f) : _f(f) {}
bool FileStream::eof() const {
return feof(_f);
}
size_t FileStream::read(unsigned char* buffer, const size_t size) {
return fread(buffer, 1, size, _f);
}
size_t FileStream::write(const unsigned char* buffer, const size_t size) {
return fwrite(buffer, 1, size, _f);
}
uint64_t FileStream::tell() const {
return _ftelli64(_f);
}
uint64_t FileStream::seek(const uint64_t newPos) {
uint64_t oldPos = tell();
_fseeki64(_f, newPos, SEEK_SET);
return oldPos;
}

View File

@@ -0,0 +1,38 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef FILESTREAM_H
#define FILESTREAM_H
#include <stdint.h>
#include <vector>
#include "stream.h"
class FileStream : public SeekableInputOutputStream {
public:
FileStream(FILE* f);
virtual bool eof() const;
virtual size_t read(unsigned char* buffer, const size_t size);
virtual size_t write(const unsigned char* buffer, const size_t size);
virtual uint64_t tell() const;
virtual uint64_t seek(const uint64_t newPos);
private:
FILE* _f;
};
#endif /* FILESTREAM_H */

View File

@@ -0,0 +1,111 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <algorithm>
#include "huffman_decoder.h"
#include "huffman_helper.h"
#include "bit_helper.h"
HuffmanDecoder::HuffmanDecoder(
const unsigned char* symbolBitLengths,
const size_t symbolCount,
const bool disableZeroBitSymbols,
const unsigned char maxBitsPerTable
) : _error(false) {
if (!_constructTables(symbolBitLengths, symbolCount, disableZeroBitSymbols, maxBitsPerTable)) {
_constructErrorTable();
}
}
size_t HuffmanDecoder::_decodeDeeper(
BitInputStream& bis,
const size_t tableId_
) const {
bis.skip(_table0.peekBits);
size_t tableId = tableId_;
do {
const Table* table = &_tables[tableId];
size_t v = bis.peek(table->peekBits);
signed short w = table->lookup[v];
if (w >= 0) {
bis.skip(w & 0xf);
return w >> 4;
}
bis.skip(table->peekBits);
tableId = ~w;
} while (true);
}
bool HuffmanDecoder::_constructTables(
const unsigned char* symbolBitLengths,
const size_t symbolCount,
const bool disableZeroBitSymbols,
const unsigned char maxBitsPerTable
) {
if (maxBitsPerTable < 1 || maxBitsPerTable > 15) {
return false;
}
unsigned nextCode[HuffmanHelper::MAX_BL + 2];
unsigned char minLength, maxLength;
if (!HuffmanHelper::countSymbols(nextCode, minLength, maxLength,
symbolBitLengths, symbolCount,
disableZeroBitSymbols)) {
return false;
}
_table0.peekBits = min((unsigned char)(maxLength - 1), maxBitsPerTable);
_table0.lookup.resize(1 << _table0.peekBits);
std::fill(_table0.lookup.begin(), _table0.lookup.end(), 0);
unsigned char minL = disableZeroBitSymbols ? 2 : 1;
for (unsigned i = 0; i < symbolCount; ++i) {
unsigned char l = (unsigned char)(symbolBitLengths[i] + 1);
if (l < minL) {
continue;
}
unsigned char k = l - 1, maxK = maxLength - 1;
unsigned code = bitReverse(nextCode[l]++, k);
Table* t = &_table0;
while (k > t->peekBits) {
k -= t->peekBits;
maxK -= t->peekBits;
unsigned subbits = code & ((1 << t->peekBits) - 1);
code >>= t->peekBits;
signed short v = t->lookup[subbits];
if (v >= 0) {
unsigned newTableId = _tables.size();
t->lookup[subbits] = ~newTableId;
_tables.push_back(Table());
t = &_tables[newTableId];
t->peekBits = min(maxK, maxBitsPerTable);
t->lookup.resize(1 << t->peekBits);
std::fill(t->lookup.begin(), t->lookup.end(), 0);
} else {
t = &_tables[~v];
}
}
do {
t->lookup[code] = (i << 4) | k;
code += 1 << k;
} while (code < t->lookup.size());
}
return true;
}
void HuffmanDecoder::_constructErrorTable() {
_error = true;
_table0.peekBits = 0;
_table0.lookup.resize(1);
_table0.lookup[0] = 0;
}

View File

@@ -0,0 +1,62 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HUFFMAN_DECODER_H
#define HUFFMAN_DECODER_H
#include <vector>
#include "bitstream.h"
// Huffman decoder
class HuffmanDecoder {
public:
HuffmanDecoder(const unsigned char* symbolBitLengths,
const size_t symbolCount,
const bool disableZeroBitSymbols,
const unsigned char maxBitsPerTable);
bool error() const {
return _error;
}
size_t decode(BitInputStream& bis) const {
size_t v = bis.peek(_table0.peekBits);
signed short w = _table0.lookup[v];
if (w >= 0) {
bis.skip(w & 0xf);
return w >> 4;
}
return _decodeDeeper(bis, ~w);
}
private:
size_t _decodeDeeper(BitInputStream& bis, const size_t tableId) const;
bool _constructTables(const unsigned char* symbolBitLengths,
const size_t symbolCount,
const bool disableZeroBitSymbols,
const unsigned char maxBitsPerTable);
void _constructErrorTable();
private:
struct Table {
unsigned char peekBits;
std::vector<signed short> lookup;
};
Table _table0;
std::vector<Table> _tables;
bool _error;
};
#endif /* HUFFMAN_DECODER_H */

View File

@@ -0,0 +1,65 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <algorithm>
#include "huffman_encoder.h"
#include "huffman_helper.h"
#include "bit_helper.h"
HuffmanEncoder::HuffmanEncoder(
const unsigned char* symbolBitLengths,
const unsigned symbolCount,
const bool disableZeroBitSymbols
) : _error(false) {
if (!_constructTables(symbolBitLengths, symbolCount, disableZeroBitSymbols)) {
_constructErrorTable(symbolCount);
}
}
bool HuffmanEncoder::_constructTables(
const unsigned char* symbolBitLengths,
const unsigned symbolCount,
const bool disableZeroBitSymbols
) {
unsigned nextCode[HuffmanHelper::MAX_BL + 2];
unsigned char minLength, maxLength;
if (!HuffmanHelper::countSymbols(nextCode, minLength, maxLength,
symbolBitLengths, symbolCount,
disableZeroBitSymbols)) {
return false;
}
unsigned char minL = disableZeroBitSymbols ? 2 : 1;
_lookup.resize(symbolCount);
for (unsigned i = 0; i < symbolCount; ++i) {
unsigned char l = (unsigned char)(symbolBitLengths[i] + 1);
if (l < minL) {
_lookup[i] = 0;
continue;
}
unsigned char k = l - 1;
unsigned code = bitReverse(nextCode[l]++, k);
_lookup[i] = (code << 5) | k;
}
return true;
}
void HuffmanEncoder::_constructErrorTable(
const unsigned symbolCount
) {
_error = true;
_lookup.resize(symbolCount);
std::fill(_lookup.begin(), _lookup.end(), 0);
}

View File

@@ -0,0 +1,48 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HUFFMAN_ENCODER_H
#define HUFFMAN_ENCODER_H
#include <vector>
#include "bitstream.h"
// Huffman decoder
class HuffmanEncoder {
public:
HuffmanEncoder(const unsigned char* symbolBitLengths,
const unsigned symbolCount,
const bool disableZeroBitSymbols);
bool error() const {
return _error;
}
void encode(BitOutputStream& bos, const unsigned symbol) const {
unsigned v = _lookup[symbol];
bos.put(v >> 5, v & 0x1f);
}
private:
bool _constructTables(const unsigned char* symbolBitLengths,
const unsigned symbolCount,
const bool disableZeroBitSymbols);
void _constructErrorTable(const unsigned symbolCount);
private:
std::vector<unsigned> _lookup;
bool _error;
};
#endif /* HUFFMAN_ENCODER_H */

View File

@@ -0,0 +1,75 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <algorithm>
#include <string.h>
#include "huffman_helper.h"
#include "bit_helper.h"
bool HuffmanHelper::countSymbols(
unsigned(&nextCode)[MAX_BL + 2],
unsigned char& minLength,
unsigned char& maxLength,
const unsigned char* symbolBitLengths,
const unsigned symbolCount,
const bool disableZeroBitSymbols
) {
if (symbolCount < 1 || symbolCount >= 1024) {
return false;
}
unsigned short blCount[MAX_BL + 2];
// Count symbol frequencies
memset(blCount, 0, sizeof(blCount));
for (unsigned i = 0; i < symbolCount; ++i) {
unsigned char l = (unsigned char)(symbolBitLengths[i] + 1);
if (l > MAX_BL + 1) {
return false;
}
blCount[l]++;
}
for (minLength = 1; minLength <= MAX_BL + 1; ++minLength) {
if (blCount[minLength]) {
break;
}
}
for (maxLength = MAX_BL + 1; maxLength >= minLength; --maxLength) {
if (blCount[maxLength]) {
break;
}
}
if (minLength > maxLength) {
return false;
}
// Remove deleted symbols
blCount[0] = 0;
if (disableZeroBitSymbols) {
blCount[1] = 0;
}
// Calculate start codes
unsigned code = 0;
for (unsigned i = minLength; i <= maxLength; ++i) {
code = (code + blCount[i - 1]) << 1;
nextCode[i] = code;
}
if (minLength == maxLength && blCount[maxLength] == 1) {
return true;
}
// Check that we don't have holes
return nextCode[maxLength] + blCount[maxLength] == (unsigned)(1 << (maxLength - 1));
}

View File

@@ -0,0 +1,34 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HUFFMAN_HELPER_H
#define HUFFMAN_HELPER_H
#include <vector>
// Huffman decoder
class HuffmanHelper {
public:
enum {
MAX_BL = 25
};
static bool countSymbols(unsigned(&nextCode)[MAX_BL + 2],
unsigned char& minLength,
unsigned char& maxLength,
const unsigned char* symbolBitLengths,
const unsigned symbolCount,
const bool disableZeroBitSymbols);
};
#endif /* HUFFMAN_HELPER_H */

View File

@@ -0,0 +1,57 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <algorithm>
#include <string.h>
#include "memstream.h"
MemStream::MemStream() : _pos(0) {}
MemStream::MemStream(const std::vector<uint8_t>& content)
: _data(content)
, _pos(0) {}
MemStream::MemStream(const std::vector<uint8_t>& content, const size_t off, const size_t sz)
: _data(max(min(content.size(), off + sz), off) - off)
, _pos(0) {
memcpy(_data.data(), content.data() + off, _data.size());
}
bool MemStream::eof() const {
return _pos == _data.size();
}
size_t MemStream::read(unsigned char* buffer, const size_t size) {
size_t toCopy = min(size, _data.size() - _pos);
memcpy(buffer, _data.data() + _pos, toCopy);
_pos += toCopy;
return toCopy;
}
size_t MemStream::write(const unsigned char* buffer, const size_t size) {
size_t remaining = _data.size() - _pos;
if (size > remaining) {
_data.resize(_pos + size);
}
memcpy(_data.data() + _pos, buffer, size);
_pos += size;
return size;
}
uint64_t MemStream::tell() const {
return _pos;
}
uint64_t MemStream::seek(const uint64_t newPos) {
size_t oldPos = _pos;
_pos = min(newPos, (uint64_t)_data.size());
return oldPos;
}

View File

@@ -0,0 +1,52 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef MEMSTREAM_H
#define MEMSTREAM_H
#include <stdint.h>
#include <vector>
#include "stream.h"
class MemStream : public SeekableInputOutputStream {
public:
MemStream();
MemStream(const std::vector<uint8_t>& content);
MemStream(const std::vector<uint8_t>& content, const size_t off, const size_t sz);
virtual bool eof() const;
virtual size_t read(unsigned char* buffer, const size_t size);
virtual size_t write(const unsigned char* buffer, const size_t size);
virtual uint64_t tell() const;
virtual uint64_t seek(const uint64_t newPos);
void replaceData(const std::vector<uint8_t>& content) {
_data = content;
}
const std::vector<uint8_t>& data() const {
return _data;
}
std::vector<uint8_t> extractData() {
return std::move(_data);
}
private:
std::vector<uint8_t> _data;
size_t _pos;
};
#endif /* MEMSTREAM_H */

View File

@@ -0,0 +1,30 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <algorithm>
#include "outputcachestream.h"
OutputCacheStream::OutputCacheStream(OutputStream& os)
: _os(os)
, _cacheStartPos(0) {}
OutputCacheStream::~OutputCacheStream() {
}
void OutputCacheStream::flushUpTo(const uint64_t newStartPos) {
size_t toWrite = min(newStartPos - _cacheStartPos, (uint64_t)_cache.size());
size_t written = _os.write(_cache.data(), toWrite);
_cacheStartPos += written;
_cache.erase(_cache.begin(), _cache.begin() + written);
}

View File

@@ -0,0 +1,67 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef OUTPUTCACHESTREAM_H
#define OUTPUTCACHESTREAM_H
#include <algorithm>
#include <vector>
#include "stream.h"
class OutputCacheStream : public OutputStream {
public:
OutputCacheStream(OutputStream& os);
virtual ~OutputCacheStream();
size_t write(const unsigned char* buffer, const size_t size) {
/* if (size == 1) {
_cache.push_back(*buffer);
return 1;
}*/
_cache.insert(_cache.end(), buffer, buffer + size);
return size;
}
void reserve(const size_t len) {
size_t cap = _cache.capacity();
if (_cache.size() + len > cap) {
_cache.reserve(cap + max(cap >> 1, len));
}
}
void flush() {
flushUpTo(cacheEndPos());
}
void flushUpTo(const uint64_t newStartPos);
uint64_t cacheStartPos() const {
return _cacheStartPos;
}
uint64_t cacheEndPos() const {
return _cacheStartPos + _cache.size();
}
const unsigned char* cacheData(const uint64_t pos) const {
return _cache.data() + (ptrdiff_t)(pos - _cacheStartPos);
}
const unsigned char* cacheEnd() const {
return _cache.data() + _cache.size();
}
const size_t cacheSize() const {
return _cache.size();
}
private:
OutputStream& _os;
std::vector<unsigned char> _cache;
uint64_t _cacheStartPos;
};
#endif /* OUTPUTCACHESTREAM_H */

View File

@@ -0,0 +1,51 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef STREAM_H
#define STREAM_H
#include <stdint.h>
class InputStream {
public:
virtual ~InputStream() {}
virtual bool eof() const = 0;
virtual size_t read(unsigned char* buffer, const size_t size) = 0;
};
class OutputStream {
public:
virtual ~OutputStream() {}
virtual size_t write(const unsigned char* buffer, const size_t size) = 0;
};
class SeekableStream {
public:
virtual ~SeekableStream() {}
virtual uint64_t tell() const = 0;
virtual uint64_t seek(const uint64_t newPos) = 0;
};
class SeekableInputStream
: public InputStream
, public SeekableStream {};
class SeekableInputOutputStream
: public SeekableInputStream
, public OutputStream {};
#endif /* STREAM_H */

View File

@@ -0,0 +1,181 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <stdio.h>
#include "array_helper.h"
#include "bit_helper.h"
#include "bitstream.h"
#include "const_division.h"
#include "huffman_decoder.h"
#include "huffman_encoder.h"
#include "huffman_helper.h"
#include "memstream.h"
#include "outputcachestream.h"
#include "stream.h"
bool support_self_tests() {
unsigned arr[] = {1,2,3,4,5};
if (sumArray(arr) != 15
|| sumArray(arr, sizeof(arr) / sizeof(arr[0])) != 15) {
printf("sumArray failed\n");
return false;
}
if (bitLength(0) != 0
|| bitLength(15) != 4
|| bitLength(0xffffffff) != 32) {
printf("bitLength failed\n");
return false;
}
if (bitReverse(1, 3) != 4
|| bitReverse(0x12345678, 32) != 0x1e6a2c48
|| bitReverse(0xfedcba90, 32) != 0x095d3b7f) {
printf("bitReverse failed\n");
return false;
}
MemStream mem;
mem.write((const uint8_t*)"Hello", 5);
if (mem.tell() != 5 || !mem.eof()) {
printf("MemStream/1 failed\n");
return false;
}
mem.write((const uint8_t*)"!", 1);
uint8_t tmp[5], tmp2[2];
if (mem.read(tmp, 5) != 0) {
printf("MemStream/2 failed\n");
return false;
}
if (mem.seek(0) != 6) {
printf("MemStream/3 failed\n");
return false;
}
if (mem.tell() != 0) {
printf("MemStream/4 failed\n");
return false;
}
if (mem.read(tmp, 5) != 5 || tmp[0] != 'H' || tmp[4] != 'o') {
printf("MemStream/5 failed\n");
return false;
}
if (mem.read(tmp2, 2) != 1 || tmp2[0] != '!') {
printf("MemStream/6 failed\n");
return false;
}
if (!mem.eof()) {
printf("MemStream/7 failed\n");
return false;
}
mem.seek(0);
{
BitOutputStream bos(mem);
for (unsigned i = 0; i <= HuffmanHelper::MAX_BL; ++i) {
bos.put(i, i);
}
bos.flush();
}
mem.seek(0);
{
BitInputStream bis(mem);
for (unsigned i = 0; i <= HuffmanHelper::MAX_BL; ++i) {
if (bis.get(i) != i) {
printf("BitStreams failed\n");
return false;
}
}
}
unsigned char lengths[] = {
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,
17,18,19,20,21,22,23,24,25,25
};
unsigned count = sizeof(lengths) / sizeof(lengths[0]);
HuffmanEncoder henc(lengths, count, false);
HuffmanDecoder hdec(lengths, count, false, 7);
if (henc.error() || hdec.error()) {
printf("HuffmanEncoder failed\n");
return false;
}
mem.seek(0);
{
BitOutputStream bos(mem);
for (unsigned i = 0; i < count; ++i) {
henc.encode(bos, i);
}
bos.flush();
}
mem.seek(0);
{
BitInputStream bis(mem);
for (unsigned i = 0; i < count; ++i) {
if (hdec.decode(bis) != i) {
printf("HuffmanDecoder failed\n");
return false;
}
}
}
uint16_t divtest16[] = {1, 3, 5, 7, 9, 11, 13, 17, 32767};
for (int i = 0, n = sizeof(divtest16) / sizeof(divtest16[0]); i < n; ++i) {
udivider_t<16> du = build_udivider_16(divtest16[i]);
ucdivider_t<16> duc = build_ucdivider_16(divtest16[i]);
sdivider_t<16> ds = build_sdivider_16(divtest16[i]);
scdivider_t<16> dsc = build_scdivider_16(divtest16[i]);
for (int k = 0; k < 65536; ++k) {
uint16_t c1 = divide((uint16_t)k, du);
uint16_t c2 = divide((uint16_t)k, duc);
uint16_t r = k / divtest16[i];
if (c1 != r || c2 != r) {
printf("16bit divider/1 failed\n");
return false;
}
int16_t d1 = divide((int16_t)(k - 32768), ds);
int16_t d2 = divide((int16_t)(k - 32768), dsc);
int16_t s = ((int16_t)(k - 32768)) / (int16_t)divtest16[i];
if (d1 != s || d2 != s) {
printf("16bit divider/2 failed\n");
return false;
}
}
}
uint32_t divtest32[] = {1, 3, 5, 7, 9, 11, 13, 17, 0x7fff, 0x7fffffff};
for (int i = 0, n = sizeof(divtest32) / sizeof(divtest32[0]); i < n; ++i) {
udivider_t<32> du = build_udivider_32(divtest32[i]);
ucdivider_t<32> duc = build_ucdivider_32(divtest32[i]);
sdivider_t<32> ds = build_sdivider_32(divtest32[i]);
scdivider_t<32> dsc = build_scdivider_32(divtest32[i]);
for (int k = 0; k < 65536; ++k) {
uint32_t c1 = divide(((uint32_t)k)* 65536, du);
uint32_t c2 = divide(((uint32_t)k) * 65536, duc);
uint32_t r = (((uint32_t)k) * 65536) / divtest32[i];
if (c1 != r || c2 != r) {
printf("32bit divider/1 failed\n");
return false;
}
int32_t d1 = divide((int32_t)(k - 32768) * 65536, ds);
int32_t d2 = divide((int32_t)(k - 32768) * 65536, dsc);
int32_t s = ((int32_t)(k - 32768)) * 65536 / (int32_t)divtest32[i];
if (d1 != s || d2 != s) {
printf("32bit divider/2 failed\n");
return false;
}
}
}
return true;
}

View File

@@ -0,0 +1,20 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef SUPPORT_TESTS_H
#define SUPPORT_TESTS_H
bool support_self_tests();
#endif /* SUPPORT_TESTS_H */

View File

@@ -0,0 +1,58 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include "task_pool.h"
#include <memory>
/*TaskPool globalTaskPool;
TaskPool::TaskPool()
: _state(INIT)
, _threadLimit(max(1u, std::thread::hardware_concurrency()) - 1) {
}
void TaskPool::_init() {
_state = RUN;
std::function<void(void)> workerLoop = [this] {
for (;;) {
std::function<void()> task;
{
std::unique_lock<std::mutex> lock(this->_mutex);
this->_condition.wait(lock,
[this] { return this->_state == FINISH || !this->_tasks.empty(); });
if (this->_state == FINISH) {
return;
}
task = std::move(this->_tasks.front());
this->_tasks.pop();
}
task();
}
};
for (unsigned i = 0, n = max((size_t)1, _threadLimit); i < n; ++i) {
_workers.emplace_back(workerLoop);
}
}
TaskPool::~TaskPool() {
_state = FINISH;
_condition.notify_all();
for (auto& thr : _workers) {
if (thr.joinable()) {
thr.join();
}
}
} */

View File

@@ -0,0 +1,70 @@
/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
// #ifndef TASK_POOL_H
// #define TASK_POOL_H
#include <condition_variable>
#include <functional>
#include <future>
#include <memory>
#include <mutex>
#include <queue>
#include <thread>
#include <vector>
/* class TaskPool {
public:
TaskPool();
~TaskPool();
template<class F, class... Args>
auto addTask(F&& f, Args&&... args)
-> std::future<typename std::result_of<F(Args...)>::type> {
using R = typename std::result_of<F(Args...)>::type;
auto task = std::make_shared<std::packaged_task<R()>>(
std::bind(std::forward<F>(f), std::forward<Args>(args)...));
if (_state == INIT) {
_init();
}
std::future<R> res = task->get_future();
{
std::unique_lock<std::mutex> lock(_mutex);
_tasks.emplace([task]() { (*task)(); });
}
_condition.notify_one();
return res;
}
size_t extraThreadCount() const {
return _threadLimit;
}
private:
enum State { INIT, RUN, FINISH };
void _init();
State _state;
size_t _threadLimit;
std::vector<std::thread> _workers;
std::mutex _mutex;
std::condition_variable _condition;
std::queue<std::function<void()>> _tasks;
};
extern TaskPool globalTaskPool;
#endif */ /* TASK_POOL_H */