xtool/contrib/preflate/preflate_checker.cpp

290 lines
11 KiB
C++

/* Copyright 2018 Dirk Steinke
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pch.h"
#include <stdio.h>
#include <string.h>
#include "preflate_block_decoder.h"
#include "preflate_block_reencoder.h"
#include "preflate_checker.h"
#include "preflate_parameter_estimator.h"
#include "preflate_statistical_model.h"
#include "preflate_token_predictor.h"
#include "preflate_tree_predictor.h"
#include "support/bitstream.h"
#include "support/memstream.h"
#include "support/outputcachestream.h"
#include <algorithm>
#include <chrono>
bool preflate_checker(const std::vector<unsigned char>& deflate_raw) {
printf("Checking raw deflate file of size %d\n", (int)deflate_raw.size());
MemStream decIn(deflate_raw);
MemStream decUnc;
BitInputStream decInBits(decIn);
OutputCacheStream decOutCache(decUnc);
std::vector<PreflateTokenBlock> blocks;
auto ts_start = std::chrono::steady_clock::now();
PreflateBlockDecoder bdec(decInBits, decOutCache);
if (bdec.status() != PreflateBlockDecoder::OK) {
return false;
}
bool last;
unsigned i = 0;
do {
PreflateTokenBlock newBlock;
bool ok = bdec.readBlock(newBlock, last);
if (!ok) {
printf("inflating error (preflate)\n");
return false;
}
blocks.push_back(newBlock);
++i;
} while (!last);
uint8_t remaining_bit_count = (8 - decInBits.bitPos()) & 7;
uint8_t remaining_bits = decInBits.get(remaining_bit_count);
decOutCache.flush();
std::vector<unsigned char> unpacked_output = decUnc.extractData();
auto ts_end = std::chrono::steady_clock::now();
printf("Unpacked data has size %d\n", (int)unpacked_output.size());
printf("Unpacking took %g seconds\n", std::chrono::duration<double>(ts_end - ts_start).count());
// Encode
PreflateParameters paramsE = estimatePreflateParameters(unpacked_output, 0, blocks);
printf("prediction parameters: w %d, c %d, m %d, zlib %d, farL3M %d, very far M %d, M2S %d, log2CD %d\n",
paramsE.windowBits, paramsE.compLevel, paramsE.memLevel,
paramsE.zlibCompatible, paramsE.farLen3MatchesDetected,
paramsE.veryFarMatchesDetected, paramsE.matchesToStartDetected,
paramsE.log2OfMaxChainDepthM1);
ts_start = std::chrono::steady_clock::now();
PreflateStatisticsCounter counterE;
memset(&counterE, 0, sizeof(counterE));
PreflateTokenPredictor tokenPredictorE(paramsE, unpacked_output, 0);
PreflateTreePredictor treePredictorE(unpacked_output, 0);
for (unsigned i = 0, n = blocks.size(); i < n; ++i) {
tokenPredictorE.analyzeBlock(i, blocks[i]);
if (tokenPredictorE.predictionFailure) {
printf("block %d: compress failed token prediction\n", i);
return false;
}
treePredictorE.analyzeBlock(i, blocks[i]);
if (treePredictorE.predictionFailure) {
printf("block %d: compress failed tree prediction\n", i);
return false;
}
tokenPredictorE.updateCounters(&counterE, i);
treePredictorE.updateCounters(&counterE, i);
}
counterE.block.incNonZeroPadding(remaining_bits != 0);
ts_end = std::chrono::steady_clock::now();
printf("Prediction took %g seconds\n", std::chrono::duration<double>(ts_end - ts_start).count());
counterE.print();
ts_start = std::chrono::steady_clock::now();
PreflateMetaEncoder codecE;
if (codecE.error()) {
return false;
}
PreflatePredictionEncoder pcodecE;
unsigned modelId = codecE.addModel(counterE, paramsE);
if (!codecE.beginMetaBlockWithModel(pcodecE, modelId)) {
return false;
}
for (unsigned i = 0, n = blocks.size(); i < n; ++i) {
tokenPredictorE.encodeBlock(&pcodecE, i);
if (tokenPredictorE.predictionFailure) {
printf("block %d: compress failed token encoding\n", i);
return false;
}
treePredictorE.encodeBlock(&pcodecE, i);
if (treePredictorE.predictionFailure) {
printf("block %d: compress failed tree encoding\n", i);
return false;
}
tokenPredictorE.encodeEOF(&pcodecE, i, i + 1 == blocks.size());
}
pcodecE.encodeNonZeroPadding(remaining_bits != 0);
if (remaining_bits != 0) {
unsigned bitsToSave = bitLength(remaining_bits);
pcodecE.encodeValue(bitsToSave, 3);
if (bitsToSave > 1) {
pcodecE.encodeValue(remaining_bits & ((1 << (bitsToSave - 1)) - 1), bitsToSave - 1);
}
}
if (!codecE.endMetaBlock(pcodecE, unpacked_output.size())) {
return false;
}
std::vector<unsigned char> preflate_diff = codecE.finish();
ts_end = std::chrono::steady_clock::now();
printf("Prediction diff has size %d\n", (int)preflate_diff.size());
printf("Encoding diff took %g seconds\n", std::chrono::duration<double>(ts_end - ts_start).count());
// Decode
ts_start = std::chrono::steady_clock::now();
PreflateMetaDecoder codecD(preflate_diff, unpacked_output.size());
PreflatePredictionDecoder pcodecD;
PreflateParameters paramsD;
if (codecD.error() || codecD.metaBlockCount() != 1) {
return false;
}
if (!codecD.beginMetaBlock(pcodecD, paramsD, 0)) {
return false;
}
PreflateTokenPredictor tokenPredictorD(paramsD, unpacked_output, 0);
PreflateTreePredictor treePredictorD(unpacked_output, 0);
MemStream mem;
BitOutputStream bos(mem);
std::vector<PreflateTokenBlock> dblocks;
unsigned blockno = 0;
bool eof = true;
do {
PreflateTokenBlock block = tokenPredictorD.decodeBlock(&pcodecD);
if (tokenPredictorD.predictionFailure) {
printf("block %d: token uncompress failed\n", blockno);
return false;
}
if (!treePredictorD.decodeBlock(block, &pcodecD)) {
printf("block %d: tree uncompress failed\n", blockno);
return false;
}
if (treePredictorD.predictionFailure) {
printf("block %d: tree uncompress failed\n", blockno);
return false;
}
eof = tokenPredictorD.decodeEOF(&pcodecD);
dblocks.push_back(block);
++blockno;
} while (!eof);
ts_end = std::chrono::steady_clock::now();
printf("Decoding diff and reprediction took %g seconds\n", std::chrono::duration<double>(ts_end - ts_start).count());
if (paramsD.windowBits != paramsE.windowBits) {
printf("parameter decoding failed: windowBits mismatch\n");
return false;
}
if (paramsD.memLevel != paramsE.memLevel) {
printf("parameter decoding failed: memLevel mismatch\n");
return false;
}
if (paramsD.compLevel != paramsE.compLevel) {
printf("parameter decoding failed: compLevel mismatch\n");
return false;
}
if (paramsD.zlibCompatible != paramsE.zlibCompatible) {
printf("parameter decoding failed: zlib compatible flag mismatch\n");
return false;
}
if (!paramsD.zlibCompatible && (0
// || paramsD.farLen3MatchesDetected != paramsE.farLen3MatchesDetected
|| paramsD.veryFarMatchesDetected != paramsE.veryFarMatchesDetected
|| paramsD.matchesToStartDetected != paramsE.matchesToStartDetected
|| paramsD.log2OfMaxChainDepthM1 != paramsE.log2OfMaxChainDepthM1)) {
printf("parameter decoding failed: non-zlib flag mismatch\n");
return false;
}
if (!isEqual(pcodecD, pcodecE)) {
printf("decoded model differs from original\n");
return false;
}
for (size_t blockno = 0, n = min(blocks.size(), dblocks.size()); blockno < n; ++blockno) {
if (dblocks[blockno].type != blocks[blockno].type) {
printf("block %zu: type differs: org %d, new %d\n", blockno, blocks[blockno].type, dblocks[blockno].type);
return false;
}
for (unsigned i = 0, n = min(dblocks[blockno].tokens.size(), blocks[blockno].tokens.size()); i < n; ++i) {
PreflateToken orgToken = blocks[blockno].tokens[i];
PreflateToken newToken = dblocks[blockno].tokens[i];
if (newToken.len != orgToken.len || newToken.dist != orgToken.dist) {
printf("block %zu: generated token %d differs: org(%d,%d), new(%d,%d)\n",
blockno, i, orgToken.len, orgToken.dist, newToken.len, newToken.dist);
return false;
}
}
if (dblocks[blockno].tokens.size() != blocks[blockno].tokens.size()) {
printf("block %zu: differing token count: org %d, new %d\n",
blockno, (int)blocks[blockno].tokens.size(), (int)dblocks[blockno].tokens.size());
return false;
}
if (dblocks[blockno].type == PreflateTokenBlock::DYNAMIC_HUFF) {
if (dblocks[blockno].nlen != blocks[blockno].nlen) {
printf("block %zu: literal/len count differs: org %d, new %d\n",
blockno, blocks[blockno].nlen, dblocks[blockno].nlen);
return false;
}
if (dblocks[blockno].ndist != blocks[blockno].ndist) {
printf("block %zu: dist count differs: org %d, new %d\n",
blockno, blocks[blockno].ndist, dblocks[blockno].ndist);
return false;
}
if (dblocks[blockno].ncode != blocks[blockno].ncode) {
printf("block %zu: tree code count differs: org %d, new %d\n",
blockno, blocks[blockno].ncode, dblocks[blockno].ncode);
return false;
}
if (dblocks[blockno].treecodes != blocks[blockno].treecodes) {
printf("block %zu: generated tree codes differs\n", blockno);
return false;
}
}
}
ts_start = std::chrono::steady_clock::now();
PreflateBlockReencoder deflater(bos, unpacked_output, 0);
for (size_t i = 0; i < dblocks.size(); ++i) {
deflater.writeBlock(dblocks[i], i + 1 == dblocks.size());
}
bool non_zero_bits = pcodecD.decodeNonZeroPadding();
if (non_zero_bits) {
unsigned bitsToLoad = pcodecD.decodeValue(3);
unsigned padding = 0;
if (bitsToLoad > 0) {
padding = (1 << (bitsToLoad - 1)) + pcodecD.decodeValue(bitsToLoad - 1);
}
bos.put(padding, bitsToLoad);
}
if (!codecD.endMetaBlock(pcodecD)) {
return false;
}
deflater.flush();
std::vector<unsigned char> deflate_raw_out = mem.extractData();
ts_end = std::chrono::steady_clock::now();
printf("Reencoding deflate stream took %g seconds\n", std::chrono::duration<double>(ts_end - ts_start).count());
for (unsigned i = 0, n = min(deflate_raw.size(), deflate_raw_out.size()); i < n; ++i) {
if (deflate_raw[i] != deflate_raw_out[i]) {
printf("created deflate stream differs at offset %d\n", i);
return false;
}
}
if (deflate_raw.size() != deflate_raw_out.size()) {
printf("created deflate streams differs in size: org %d, new %d\n",
(int)deflate_raw.size(), (int)deflate_raw_out.size());
return false;
}
printf("Success\n");
return true;
}