001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.lz4; 020 021import java.io.ByteArrayOutputStream; 022import java.io.IOException; 023import java.io.OutputStream; 024 025import org.apache.commons.compress.compressors.CompressorOutputStream; 026import org.apache.commons.compress.utils.ByteUtils; 027 028/** 029 * CompressorOutputStream for the LZ4 frame format. 030 * 031 * <p>Based on the "spec" in the version "1.5.1 (31/03/2015)"</p> 032 * 033 * @see <a href="http://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a> 034 * @since 1.14 035 * @NotThreadSafe 036 */ 037public class FramedLZ4CompressorOutputStream extends CompressorOutputStream { 038 039 /** 040 * The block sizes supported by the format. 041 */ 042 public enum BlockSize { 043 /** Block size of 64K */ 044 K64(64 * 1024, 4), 045 /** Block size of 256K */ 046 K256(256 * 1024, 5), 047 /** Block size of 1M */ 048 M1(1024 * 1024, 6), 049 /** Block size of 4M */ 050 M4(4096 * 1024, 7); 051 052 private final int size, index; 053 BlockSize(final int size, final int index) { 054 this.size = size; 055 this.index = index; 056 } 057 int getIndex() { 058 return index; 059 } 060 int getSize() { 061 return size; 062 } 063 } 064 065 /** 066 * Parameters of the LZ4 frame format. 067 */ 068 public static class Parameters { 069 /** 070 * The default parameters of 4M block size, enabled content 071 * checksum, disabled block checksums and independent blocks. 072 * 073 * <p>This matches the defaults of the lz4 command line utility.</p> 074 */ 075 public static final Parameters DEFAULT = new Parameters(BlockSize.M4, true, false, false); 076 private final BlockSize blockSize; 077 private final boolean withContentChecksum, withBlockChecksum, withBlockDependency; 078 079 private final org.apache.commons.compress.compressors.lz77support.Parameters lz77params; 080 081 /** 082 * Sets up custom a custom block size for the LZ4 stream but 083 * otherwise uses the defaults of enabled content checksum, 084 * disabled block checksums and independent blocks. 085 * @param blockSize the size of a single block. 086 */ 087 public Parameters(final BlockSize blockSize) { 088 this(blockSize, true, false, false); 089 } 090 /** 091 * Sets up custom parameters for the LZ4 stream. 092 * @param blockSize the size of a single block. 093 * @param withContentChecksum whether to write a content checksum 094 * @param withBlockChecksum whether to write a block checksum. 095 * Note that block checksums are not supported by the lz4 096 * command line utility 097 * @param withBlockDependency whether a block may depend on 098 * the content of a previous block. Enabling this may improve 099 * compression ratio but makes it impossible to decompress the 100 * output in parallel. 101 */ 102 public Parameters(final BlockSize blockSize, final boolean withContentChecksum, final boolean withBlockChecksum, 103 final boolean withBlockDependency) { 104 this(blockSize, withContentChecksum, withBlockChecksum, withBlockDependency, 105 BlockLZ4CompressorOutputStream.createParameterBuilder().build()); 106 } 107 /** 108 * Sets up custom parameters for the LZ4 stream. 109 * @param blockSize the size of a single block. 110 * @param withContentChecksum whether to write a content checksum 111 * @param withBlockChecksum whether to write a block checksum. 112 * Note that block checksums are not supported by the lz4 113 * command line utility 114 * @param withBlockDependency whether a block may depend on 115 * the content of a previous block. Enabling this may improve 116 * compression ratio but makes it impossible to decompress the 117 * output in parallel. 118 * @param lz77params parameters used to fine-tune compression, 119 * in particular to balance compression ratio vs compression 120 * speed. 121 */ 122 public Parameters(final BlockSize blockSize, final boolean withContentChecksum, final boolean withBlockChecksum, 123 final boolean withBlockDependency, 124 final org.apache.commons.compress.compressors.lz77support.Parameters lz77params) { 125 this.blockSize = blockSize; 126 this.withContentChecksum = withContentChecksum; 127 this.withBlockChecksum = withBlockChecksum; 128 this.withBlockDependency = withBlockDependency; 129 this.lz77params = lz77params; 130 } 131 132 /** 133 * Sets up custom a custom block size for the LZ4 stream but 134 * otherwise uses the defaults of enabled content checksum, 135 * disabled block checksums and independent blocks. 136 * @param blockSize the size of a single block. 137 * @param lz77params parameters used to fine-tune compression, 138 * in particular to balance compression ratio vs compression 139 * speed. 140 */ 141 public Parameters(final BlockSize blockSize, 142 final org.apache.commons.compress.compressors.lz77support.Parameters lz77params) { 143 this(blockSize, true, false, false, lz77params); 144 } 145 146 @Override 147 public String toString() { 148 return "LZ4 Parameters with BlockSize " + blockSize + ", withContentChecksum " + withContentChecksum 149 + ", withBlockChecksum " + withBlockChecksum + ", withBlockDependency " + withBlockDependency; 150 } 151 } 152 153 private static final byte[] END_MARK = new byte[4]; 154 // used in one-arg write method 155 private final byte[] oneByte = new byte[1]; 156 private final byte[] blockData; 157 private final OutputStream out; 158 private final Parameters params; 159 160 private boolean finished; 161 private int currentIndex; 162 163 // used for frame header checksum and content checksum, if requested 164 private final XXHash32 contentHash = new XXHash32(); 165 // used for block checksum, if requested 166 private final XXHash32 blockHash; 167 168 // only created if the config requires block dependency 169 private final byte[] blockDependencyBuffer; 170 171 private int collectedBlockDependencyBytes; 172 173 /** 174 * Constructs a new output stream that compresses data using the 175 * LZ4 frame format using the default block size of 4MB. 176 * @param out the OutputStream to which to write the compressed data 177 * @throws IOException if writing the signature fails 178 */ 179 public FramedLZ4CompressorOutputStream(final OutputStream out) throws IOException { 180 this(out, Parameters.DEFAULT); 181 } 182 183 /** 184 * Constructs a new output stream that compresses data using the 185 * LZ4 frame format using the given block size. 186 * @param out the OutputStream to which to write the compressed data 187 * @param params the parameters to use 188 * @throws IOException if writing the signature fails 189 */ 190 public FramedLZ4CompressorOutputStream(final OutputStream out, final Parameters params) throws IOException { 191 this.params = params; 192 blockData = new byte[params.blockSize.getSize()]; 193 this.out = out; 194 blockHash = params.withBlockChecksum ? new XXHash32() : null; 195 out.write(FramedLZ4CompressorInputStream.LZ4_SIGNATURE); 196 writeFrameDescriptor(); 197 blockDependencyBuffer = params.withBlockDependency 198 ? new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE] 199 : null; 200 } 201 202 private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) { 203 len = Math.min(len, blockDependencyBuffer.length); 204 if (len > 0) { 205 final int keep = blockDependencyBuffer.length - len; 206 if (keep > 0) { 207 // move last keep bytes towards the start of the buffer 208 System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep); 209 } 210 // append new data 211 System.arraycopy(b, off, blockDependencyBuffer, keep, len); 212 collectedBlockDependencyBytes = Math.min(collectedBlockDependencyBytes + len, 213 blockDependencyBuffer.length); 214 } 215 } 216 217 @Override 218 public void close() throws IOException { 219 try { 220 finish(); 221 } finally { 222 out.close(); 223 } 224 } 225 226 /** 227 * Compresses all remaining data and writes it to the stream, 228 * doesn't close the underlying stream. 229 * @throws IOException if an error occurs 230 */ 231 public void finish() throws IOException { 232 if (!finished) { 233 if (currentIndex > 0) { 234 flushBlock(); 235 } 236 writeTrailer(); 237 finished = true; 238 } 239 } 240 241 private void flushBlock() throws IOException { 242 final boolean withBlockDependency = params.withBlockDependency; 243 final ByteArrayOutputStream baos = new ByteArrayOutputStream(); 244 try (BlockLZ4CompressorOutputStream o = new BlockLZ4CompressorOutputStream(baos, params.lz77params)) { 245 if (withBlockDependency) { 246 o.prefill(blockDependencyBuffer, blockDependencyBuffer.length - collectedBlockDependencyBytes, 247 collectedBlockDependencyBytes); 248 } 249 o.write(blockData, 0, currentIndex); 250 } 251 if (withBlockDependency) { 252 appendToBlockDependencyBuffer(blockData, 0, currentIndex); 253 } 254 final byte[] b = baos.toByteArray(); 255 if (b.length > currentIndex) { // compression increased size, maybe beyond blocksize 256 ByteUtils.toLittleEndian(out, currentIndex | FramedLZ4CompressorInputStream.UNCOMPRESSED_FLAG_MASK, 257 4); 258 out.write(blockData, 0, currentIndex); 259 if (params.withBlockChecksum) { 260 blockHash.update(blockData, 0, currentIndex); 261 } 262 } else { 263 ByteUtils.toLittleEndian(out, b.length, 4); 264 out.write(b); 265 if (params.withBlockChecksum) { 266 blockHash.update(b, 0, b.length); 267 } 268 } 269 if (params.withBlockChecksum) { 270 ByteUtils.toLittleEndian(out, blockHash.getValue(), 4); 271 blockHash.reset(); 272 } 273 currentIndex = 0; 274 } 275 276 @Override 277 public void write(final byte[] data, int off, int len) throws IOException { 278 if (params.withContentChecksum) { 279 contentHash.update(data, off, len); 280 } 281 final int blockDataLength = blockData.length; 282 if (currentIndex + len > blockDataLength) { 283 flushBlock(); 284 while (len > blockDataLength) { 285 System.arraycopy(data, off, blockData, 0, blockDataLength); 286 off += blockDataLength; 287 len -= blockDataLength; 288 currentIndex = blockDataLength; 289 flushBlock(); 290 } 291 } 292 System.arraycopy(data, off, blockData, currentIndex, len); 293 currentIndex += len; 294 } 295 296 @Override 297 public void write(final int b) throws IOException { 298 oneByte[0] = (byte) (b & 0xff); 299 write(oneByte); 300 } 301 302 private void writeFrameDescriptor() throws IOException { 303 int flags = FramedLZ4CompressorInputStream.SUPPORTED_VERSION; 304 if (!params.withBlockDependency) { 305 flags |= FramedLZ4CompressorInputStream.BLOCK_INDEPENDENCE_MASK; 306 } 307 if (params.withContentChecksum) { 308 flags |= FramedLZ4CompressorInputStream.CONTENT_CHECKSUM_MASK; 309 } 310 if (params.withBlockChecksum) { 311 flags |= FramedLZ4CompressorInputStream.BLOCK_CHECKSUM_MASK; 312 } 313 out.write(flags); 314 contentHash.update(flags); 315 final int bd = (params.blockSize.getIndex() << 4) & FramedLZ4CompressorInputStream.BLOCK_MAX_SIZE_MASK; 316 out.write(bd); 317 contentHash.update(bd); 318 out.write((int) ((contentHash.getValue() >> 8) & 0xff)); 319 contentHash.reset(); 320 } 321 322 private void writeTrailer() throws IOException { 323 out.write(END_MARK); 324 if (params.withContentChecksum) { 325 ByteUtils.toLittleEndian(out, contentHash.getValue(), 4); 326 } 327 } 328 329} 330