// Copyright 2005 and onwards Google Inc. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // A light-weight compression algorithm. It is designed for speed of // compression and decompression, rather than for the utmost in space // savings. // // For getting better compression ratios when you are compressing data // with long repeated sequences or compressing data that is similar to // other data, while still compressing fast, you might look at first // using BMDiff and then compressing the output of BMDiff with // Snappy. #ifndef UTIL_SNAPPY_SNAPPY_H__ #define UTIL_SNAPPY_SNAPPY_H__ #include <stddef.h> #include <string> #include "snappy-stubs-public.h" namespace snappy { class Source; class Sink; // ------------------------------------------------------------------------ // Generic compression/decompression routines. // ------------------------------------------------------------------------ // Compress the bytes read from "*source" and append to "*sink". Return the // number of bytes written. size_t Compress(Source* source, Sink* sink); bool GetUncompressedLength(Source* source, uint32* result); // ------------------------------------------------------------------------ // Higher-level string based routines (should be sufficient for most users) // ------------------------------------------------------------------------ // Sets "*output" to the compressed version of "input[0,input_length-1]". // Original contents of *output are lost. // // REQUIRES: "input[]" is not an alias of "*output". size_t Compress(const char* input, size_t input_length, string* output); // Decompresses "compressed[0,compressed_length-1]" to "*uncompressed". // Original contents of "*uncompressed" are lost. // // REQUIRES: "compressed[]" is not an alias of "*uncompressed". // // returns false if the message is corrupted and could not be decompressed bool Uncompress(const char* compressed, size_t compressed_length, string* uncompressed); // ------------------------------------------------------------------------ // Lower-level character array based routines. May be useful for // efficiency reasons in certain circumstances. // ------------------------------------------------------------------------ // REQUIRES: "compressed" must point to an area of memory that is at // least "MaxCompressedLength(input_length)" bytes in length. // // Takes the data stored in "input[0..input_length]" and stores // it in the array pointed to by "compressed". // // "*compressed_length" is set to the length of the compressed output. // // Example: // char* output = new char[snappy::MaxCompressedLength(input_length)]; // size_t output_length; // RawCompress(input, input_length, output, &output_length); // ... Process(output, output_length) ... // delete [] output; void RawCompress(const char* input, size_t input_length, char* compressed, size_t* compressed_length); // Given data in "compressed[0..compressed_length-1]" generated by // calling the Snappy::Compress routine, this routine // stores the uncompressed data to // uncompressed[0..GetUncompressedLength(compressed)-1] // returns false if the message is corrupted and could not be decrypted bool RawUncompress(const char* compressed, size_t compressed_length, char* uncompressed); // Given data from the byte source 'compressed' generated by calling // the Snappy::Compress routine, this routine stores the uncompressed // data to // uncompressed[0..GetUncompressedLength(compressed,compressed_length)-1] // returns false if the message is corrupted and could not be decrypted bool RawUncompress(Source* compressed, char* uncompressed); // Returns the maximal size of the compressed representation of // input data that is "source_bytes" bytes in length; size_t MaxCompressedLength(size_t source_bytes); // REQUIRES: "compressed[]" was produced by RawCompress() or Compress() // Returns true and stores the length of the uncompressed data in // *result normally. Returns false on parsing error. // This operation takes O(1) time. bool GetUncompressedLength(const char* compressed, size_t compressed_length, size_t* result); // Returns true iff the contents of "compressed[]" can be uncompressed // successfully. Does not return the uncompressed data. Takes // time proportional to compressed_length, but is usually at least // a factor of four faster than actual decompression. bool IsValidCompressedBuffer(const char* compressed, size_t compressed_length); // *** DO NOT CHANGE THE VALUE OF kBlockSize *** // // New Compression code chops up the input into blocks of at most // the following size. This ensures that back-references in the // output never cross kBlockSize block boundaries. This can be // helpful in implementing blocked decompression. However the // decompression code should not rely on this guarantee since older // compression code may not obey it. static const int kBlockLog = 15; static const size_t kBlockSize = 1 << kBlockLog; static const int kMaxHashTableBits = 14; static const size_t kMaxHashTableSize = 1 << kMaxHashTableBits; } // end namespace snappy #endif // UTIL_SNAPPY_SNAPPY_H__
/* * Copyright 2011 Martin Gieseking <martin.gieseking@uos.de>. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Plain C interface (a wrapper around the C++ implementation). */ #ifndef UTIL_SNAPPY_OPENSOURCE_SNAPPY_C_H_ #define UTIL_SNAPPY_OPENSOURCE_SNAPPY_C_H_ #ifdef __cplusplus extern "C" { #endif #include <stddef.h> /* * Return values; see the documentation for each function to know * what each can return. */ typedef enum { SNAPPY_OK = 0, SNAPPY_INVALID_INPUT = 1, SNAPPY_BUFFER_TOO_SMALL = 2, } snappy_status; /* * Takes the data stored in "input[0..input_length-1]" and stores * it in the array pointed to by "compressed". * * <compressed_length> signals the space available in "compressed". * If it is not at least equal to "snappy_max_compressed_length(input_length)", * SNAPPY_BUFFER_TOO_SMALL is returned. After successful compression, * <compressed_length> contains the true length of the compressed output, * and SNAPPY_OK is returned. * * Example: * size_t output_length = snappy_max_compressed_length(input_length); * char* output = (char*)malloc(output_length); * if (snappy_compress(input, input_length, output, &output_length) * == SNAPPY_OK) { * ... Process(output, output_length) ... * } * free(output); */ snappy_status snappy_compress(const char* input, size_t input_length, char* compressed, size_t* compressed_length); /* * Given data in "compressed[0..compressed_length-1]" generated by * calling the snappy_compress routine, this routine stores * the uncompressed data to * uncompressed[0..uncompressed_length-1]. * Returns failure (a value not equal to SNAPPY_OK) if the message * is corrupted and could not be decrypted. * * <uncompressed_length> signals the space available in "uncompressed". * If it is not at least equal to the value returned by * snappy_uncompressed_length for this stream, SNAPPY_BUFFER_TOO_SMALL * is returned. After successful decompression, <uncompressed_length> * contains the true length of the decompressed output. * * Example: * size_t output_length; * if (snappy_uncompressed_length(input, input_length, &output_length) * != SNAPPY_OK) { * ... fail ... * } * char* output = (char*)malloc(output_length); * if (snappy_uncompress(input, input_length, output, &output_length) * == SNAPPY_OK) { * ... Process(output, output_length) ... * } * free(output); */ snappy_status snappy_uncompress(const char* compressed, size_t compressed_length, char* uncompressed, size_t* uncompressed_length); /* * Returns the maximal size of the compressed representation of * input data that is "source_length" bytes in length. */ size_t snappy_max_compressed_length(size_t source_length); /* * REQUIRES: "compressed[]" was produced by snappy_compress() * Returns SNAPPY_OK and stores the length of the uncompressed data in * *result normally. Returns SNAPPY_INVALID_INPUT on parsing error. * This operation takes O(1) time. */ snappy_status snappy_uncompressed_length(const char* compressed, size_t compressed_length, size_t* result); /* * Check if the contents of "compressed[]" can be uncompressed successfully. * Does not return the uncompressed data; if so, returns SNAPPY_OK, * or if not, returns SNAPPY_INVALID_INPUT. * Takes time proportional to compressed_length, but is usually at least a * factor of four faster than actual decompression. */ snappy_status snappy_validate_compressed_buffer(const char* compressed, size_t compressed_length); #ifdef __cplusplus } // extern "C" #endif #endif /* UTIL_SNAPPY_OPENSOURCE_SNAPPY_C_H_ */
替换完成后,Hadoop在运行时会自动识别并使用新的Snappy压缩库,从而实现对Snappy压缩格式的支持。 在实际应用中,可以通过修改Hadoop配置文件(如`core-site.xml`)来指定默认的压缩算法为Snappy。例如,可以添加...
Snappy则是一种高效的压缩和解压缩库,常用于提升大数据系统中的I/O性能。本文将深入探讨如何配置和安装Hadoop HBase以及集成Snappy,以优化大数据处理的效率。 首先,我们需要理解Hadoop的环境配置。Hadoop的配置...
一、Snappy压缩库介绍 Snappy是由Google开发的一种快速、轻量级的压缩和解压缩库,主要设计目标是提供高吞吐量而不是最小化的压缩比。它在处理大量数据时表现优秀,广泛应用于大数据环境,如Hadoop和HBase。Snappy...
Snappy是一种由Google开发的高效、快速的压缩和解压缩算法,它在Hadoop中被用作数据压缩的选项之一。然而,对于Hadoop用户来说,直接使用Snappy功能需要对应的Java Archive (JAR) 文件,即hadoop-snappy的jar包。 ...
对于Hadoop而言,压缩是提升数据传输效率和存储利用率的关键技术之一,而Snappy正是Hadoop中广泛使用的高效压缩算法。 Snappy是由Google开发的一种快速的、无损的数据压缩算法,其主要设计目标是在保证较高压缩比的...
Snappy是由Google开发的一种高效的数据压缩库,广泛应用于大数据领域,尤其在Hadoop生态系统中,因其高速度和较低的解压损耗而受到青睐。 Snappy在Hadoop中的应用主要体现在以下几个方面: 1. **HDFS数据压缩**:...
这个“hadoop编译后的包”特别之处在于它支持Snappy压缩,这是一个由Google开发的高效数据压缩库,通常用于提高数据存储和传输效率。 在Hadoop中,数据是以Block的形式存储在HDFS(Hadoop Distributed File System...
* bzip2:一个完全免费、免费专利和高质量的数据压缩库,能够对数据进行高效的压缩和解压缩。 * doboz:能够快速解压缩的压缩库,适用于需要快速解压缩的应用场景。 * PhysicsFS:对各种归档提供抽象访问的库,主要...
6. **HDFS**:Hadoop的核心组件之一,负责数据的存储和分发,支持高容错性和高吞吐量的数据访问。 7. **MapReduce**:Hadoop的计算模型,通过将任务分解为映射和化简阶段来处理大数据。 8. **YARN**:资源管理框架,...
Hadoop的原生库(Native Libraries)是其核心组件之一,对于优化性能、提高数据处理速度至关重要。本文将深入探讨Hadoop原生包,特别是针对64位Linux系统编译生成的Hadoop+Snappy native包。 Hadoop原生库是一组C++...
Snappy是一种高效的压缩和解压缩库,常用于Hadoop生态系统中,以提高数据传输和存储效率。 现在我们详细探讨一下相关知识点: 1. **Hadoop CDH5**:CDH5是Hadoop的一个企业级分发版本,包含了多个关键组件,如...
此外,Hadoop社区还发展了其他兼容Hadoop的压缩库,如Snappy和lz4,它们也提供良好的性能和压缩效率,用户可以根据实际需求进行选择。 在Hadoop-gpl-compression-0.1.0这个特定的压缩包中,很可能包含了实现Hadoop...
Snappy 是 Google 开发的高速压缩库,具有极高的压缩和解压缩速度,尤其适合大规模数据处理场景。 在Hadoop中,不同的压缩格式对应不同的编码/解码器,例如 DEFLATE 对应 DefaultCodec,Gzip 对应 GzipCodec,Bzip2...
**TsFile:时间序列数据的高效存储解决方案** TsFile是一种专为时间序列数据(Time Series Data)设计的柱状存储格式,它具有高度...同时,开源社区的支持和丰富的集成选项,使得TsFile成为值得信赖的首选技术之一。
- `libhadoop-lzo.so`、`libhadoop-snappy.so`等:针对不同压缩算法(如LZO和Snappy)的本地库,用于提高数据压缩和解压缩的速度。 - 其他库文件,如`libprotobuf.so`、`libz.so`等,依赖于protobuf和Zlib等第三方库...
Snappy是Google开发的一种压缩算法,旨在兼顾速度和压缩比。它在许多Google项目中被广泛使用,因为它能够在保持相对良好的压缩效果的同时,提供快速的压缩和解压缩速度。 为了进行性能测试,作者选择了一个常见的、...
- **压缩与编码**:Parquet 支持多种压缩算法(如 GZIP、SNAPPY 和 LZO)和数据编码方式(如 RLE、Bit-Packing),以减少存储空间。 - **Schema 元数据**:每个 Parquet 文件包含元数据,描述了文件中的数据结构,...