CodeCommitsIssuesPull requestsActionsInsightsSecurity
54003

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

utils/compressor/main.cpp

123lines · modepreview

#include <iostream>

#include <boost/program_options.hpp>

#include <DB/IO/WriteBufferFromFileDescriptor.h>
#include <DB/IO/ReadBufferFromFileDescriptor.h>
#include <DB/IO/CompressedWriteBuffer.h>
#include <DB/IO/CompressedReadBuffer.h>
#include <DB/IO/WriteHelpers.h>
#include <DB/IO/copyData.h>


/// Выводит размеры разжатых и сжатых блоков для сжатого файла.
void stat(DB::ReadBuffer & in, DB::WriteBuffer & out)
{
	while (!in.eof())
	{
		char header[COMPRESSED_BLOCK_HEADER_SIZE];

		in.ignore(16);	/// checksum
		in.readStrict(header, COMPRESSED_BLOCK_HEADER_SIZE);

		size_t size_compressed = 0;
		memcpy(&size_compressed, &header[1], 4);	/// little endian

		if (size_compressed > DBMS_MAX_COMPRESSED_SIZE)
			throw DB::Exception("Too large size_compressed. Most likely corrupted data.", DB::ErrorCodes::TOO_LARGE_SIZE_COMPRESSED);

		size_t size_decompressed = 0;
		memcpy(&size_compressed, &header[5], 4);	/// little endian

		DB::writeText(size_decompressed, out);
		DB::writeChar('\t', out);
		DB::writeText(size_compressed, out);
		DB::writeChar('\n', out);

		in.ignore(size_compressed - COMPRESSED_BLOCK_HEADER_SIZE);
	}
}


int main(int argc, char ** argv)
{
	boost::program_options::options_description desc("Allowed options");
	desc.add_options()
		("help,h", "produce help message")
		("d,decompress", "decompress")
		("block-size,b", boost::program_options::value<unsigned>()->default_value(DBMS_DEFAULT_BUFFER_SIZE), "compress in blocks of specified size")
		("hc", "use LZ4HC instead of LZ4")
	#ifdef USE_QUICKLZ
		("qlz", "use QuickLZ (level 1) instead of LZ4")
	#endif
		("zstd", "use ZSTD instead of LZ4")
		("stat", "print block statistics of compressed data")
	;

	boost::program_options::variables_map options;
	boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);

	if (options.count("help"))
	{
		std::cout << "Usage: " << argv[0] << " [options] < in > out" << std::endl;
		std::cout << desc << std::endl;
		return 1;
	}

	try
	{
		bool decompress = options.count("d");

	#ifdef USE_QUICKLZ
		bool use_qlz = options.count("qlz");
	#else
		bool use_qlz = false;
	#endif

		bool use_lz4hc = options.count("hc");
		bool use_zstd = options.count("zstd");
		bool stat_mode = options.count("stat");
		unsigned block_size = options["block-size"].as<unsigned>();

		DB::CompressionMethod method = DB::CompressionMethod::LZ4;

		if (use_qlz)
			method = DB::CompressionMethod::QuickLZ;
		else if (use_lz4hc)
			method = DB::CompressionMethod::LZ4HC;
		else if (use_zstd)
			method = DB::CompressionMethod::ZSTD;

		DB::ReadBufferFromFileDescriptor rb(STDIN_FILENO);
		DB::WriteBufferFromFileDescriptor wb(STDOUT_FILENO);

		if (stat_mode)
		{
			/// Вывести статистику для сжатого файла.
			stat(rb, wb);
		}
		else if (decompress)
		{
			/// Разжатие
			DB::CompressedReadBuffer from(rb);
			DB::copyData(from, wb);
		}
		else
		{
			/// Сжатие
			DB::CompressedWriteBuffer to(wb, method, block_size);
			DB::copyData(rb, to);
		}
	}
	catch (const DB::Exception & e)
	{
		std::cerr << e.what() << ", " << e.message() << std::endl
			<< std::endl
			<< "Stack trace:" << std::endl
			<< e.getStackTrace().toString()
			<< std::endl;
		throw;
	}

    return 0;
}