001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.lz4; 020 021import java.io.IOException; 022import java.io.InputStream; 023import java.util.Arrays; 024 025import org.apache.commons.compress.compressors.CompressorInputStream; 026import org.apache.commons.compress.utils.BoundedInputStream; 027import org.apache.commons.compress.utils.ByteUtils; 028import org.apache.commons.compress.utils.ChecksumCalculatingInputStream; 029import org.apache.commons.compress.utils.CountingInputStream; 030import org.apache.commons.compress.utils.IOUtils; 031import org.apache.commons.compress.utils.InputStreamStatistics; 032 033/** 034 * CompressorInputStream for the LZ4 frame format. 035 * 036 * <p>Based on the "spec" in the version "1.5.1 (31/03/2015)"</p> 037 * 038 * @see <a href="http://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a> 039 * @since 1.14 040 * @NotThreadSafe 041 */ 042public class FramedLZ4CompressorInputStream extends CompressorInputStream 043 implements InputStreamStatistics { 044 045 // used by FramedLZ4CompressorOutputStream as well 046 static final byte[] LZ4_SIGNATURE = { //NOSONAR 047 4, 0x22, 0x4d, 0x18 048 }; 049 private static final byte[] SKIPPABLE_FRAME_TRAILER = { 050 0x2a, 0x4d, 0x18 051 }; 052 private static final byte SKIPPABLE_FRAME_PREFIX_BYTE_MASK = 0x50; 053 054 static final int VERSION_MASK = 0xC0; 055 static final int SUPPORTED_VERSION = 0x40; 056 static final int BLOCK_INDEPENDENCE_MASK = 0x20; 057 static final int BLOCK_CHECKSUM_MASK = 0x10; 058 static final int CONTENT_SIZE_MASK = 0x08; 059 static final int CONTENT_CHECKSUM_MASK = 0x04; 060 static final int BLOCK_MAX_SIZE_MASK = 0x70; 061 static final int UNCOMPRESSED_FLAG_MASK = 0x80000000; 062 063 private static boolean isSkippableFrameSignature(final byte[] b) { 064 if ((b[0] & SKIPPABLE_FRAME_PREFIX_BYTE_MASK) != SKIPPABLE_FRAME_PREFIX_BYTE_MASK) { 065 return false; 066 } 067 for (int i = 1; i < 4; i++) { 068 if (b[i] != SKIPPABLE_FRAME_TRAILER[i - 1]) { 069 return false; 070 } 071 } 072 return true; 073 } 074 075 /** 076 * Checks if the signature matches what is expected for a .lz4 file. 077 * 078 * <p>.lz4 files start with a four byte signature.</p> 079 * 080 * @param signature the bytes to check 081 * @param length the number of bytes to check 082 * @return true if this is a .sz stream, false otherwise 083 */ 084 public static boolean matches(final byte[] signature, final int length) { 085 086 if (length < LZ4_SIGNATURE.length) { 087 return false; 088 } 089 090 byte[] shortenedSig = signature; 091 if (signature.length > LZ4_SIGNATURE.length) { 092 shortenedSig = Arrays.copyOf(signature, LZ4_SIGNATURE.length); 093 } 094 095 return Arrays.equals(shortenedSig, LZ4_SIGNATURE); 096 } 097 098 // used in no-arg read method 099 private final byte[] oneByte = new byte[1]; 100 private final ByteUtils.ByteSupplier supplier = this::readOneByte; 101 102 private final CountingInputStream inputStream; 103 private final boolean decompressConcatenated; 104 private boolean expectBlockChecksum; 105 private boolean expectBlockDependency; 106 107 private boolean expectContentSize; 108 private boolean expectContentChecksum; 109 110 private InputStream currentBlock; 111 112 private boolean endReached, inUncompressed; 113 114 // used for frame header checksum and content checksum, if present 115 private final XXHash32 contentHash = new XXHash32(); 116 117 // used for block checksum, if present 118 private final XXHash32 blockHash = new XXHash32(); 119 120 // only created if the frame doesn't set the block independence flag 121 private byte[] blockDependencyBuffer; 122 123 /** 124 * Creates a new input stream that decompresses streams compressed 125 * using the LZ4 frame format and stops after decompressing the 126 * first frame. 127 * @param in the InputStream from which to read the compressed data 128 * @throws IOException if reading fails 129 */ 130 public FramedLZ4CompressorInputStream(final InputStream in) throws IOException { 131 this(in, false); 132 } 133 134 /** 135 * Creates a new input stream that decompresses streams compressed 136 * using the LZ4 frame format. 137 * @param in the InputStream from which to read the compressed data 138 * @param decompressConcatenated if true, decompress until the end 139 * of the input; if false, stop after the first LZ4 frame 140 * and leave the input position to point to the next byte 141 * after the frame stream 142 * @throws IOException if reading fails 143 */ 144 public FramedLZ4CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException { 145 this.inputStream = new CountingInputStream(in); 146 this.decompressConcatenated = decompressConcatenated; 147 init(true); 148 } 149 150 private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) { 151 len = Math.min(len, blockDependencyBuffer.length); 152 if (len > 0) { 153 final int keep = blockDependencyBuffer.length - len; 154 if (keep > 0) { 155 // move last keep bytes towards the start of the buffer 156 System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep); 157 } 158 // append new data 159 System.arraycopy(b, off, blockDependencyBuffer, keep, len); 160 } 161 } 162 163 /** {@inheritDoc} */ 164 @Override 165 public void close() throws IOException { 166 try { 167 if (currentBlock != null) { 168 currentBlock.close(); 169 currentBlock = null; 170 } 171 } finally { 172 inputStream.close(); 173 } 174 } 175 176 /** 177 * @since 1.17 178 */ 179 @Override 180 public long getCompressedCount() { 181 return inputStream.getBytesRead(); 182 } 183 184 private void init(final boolean firstFrame) throws IOException { 185 if (readSignature(firstFrame)) { 186 readFrameDescriptor(); 187 nextBlock(); 188 } 189 } 190 191 private void maybeFinishCurrentBlock() throws IOException { 192 if (currentBlock != null) { 193 currentBlock.close(); 194 currentBlock = null; 195 if (expectBlockChecksum) { 196 verifyChecksum(blockHash, "block"); 197 blockHash.reset(); 198 } 199 } 200 } 201 202 private void nextBlock() throws IOException { 203 maybeFinishCurrentBlock(); 204 final long len = ByteUtils.fromLittleEndian(supplier, 4); 205 final boolean uncompressed = (len & UNCOMPRESSED_FLAG_MASK) != 0; 206 final int realLen = (int) (len & (~UNCOMPRESSED_FLAG_MASK)); 207 if (realLen == 0) { 208 verifyContentChecksum(); 209 if (!decompressConcatenated) { 210 endReached = true; 211 } else { 212 init(false); 213 } 214 return; 215 } 216 InputStream capped = new BoundedInputStream(inputStream, realLen); 217 if (expectBlockChecksum) { 218 capped = new ChecksumCalculatingInputStream(blockHash, capped); 219 } 220 if (uncompressed) { 221 inUncompressed = true; 222 currentBlock = capped; 223 } else { 224 inUncompressed = false; 225 final BlockLZ4CompressorInputStream s = new BlockLZ4CompressorInputStream(capped); 226 if (expectBlockDependency) { 227 s.prefill(blockDependencyBuffer); 228 } 229 currentBlock = s; 230 } 231 } 232 233 /** {@inheritDoc} */ 234 @Override 235 public int read() throws IOException { 236 return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF; 237 } 238 239 /** {@inheritDoc} */ 240 @Override 241 public int read(final byte[] b, final int off, final int len) throws IOException { 242 if (len == 0) { 243 return 0; 244 } 245 if (endReached) { 246 return -1; 247 } 248 int r = readOnce(b, off, len); 249 if (r == -1) { 250 nextBlock(); 251 if (!endReached) { 252 r = readOnce(b, off, len); 253 } 254 } 255 if (r != -1) { 256 if (expectBlockDependency) { 257 appendToBlockDependencyBuffer(b, off, r); 258 } 259 if (expectContentChecksum) { 260 contentHash.update(b, off, r); 261 } 262 } 263 return r; 264 } 265 266 private void readFrameDescriptor() throws IOException { 267 final int flags = readOneByte(); 268 if (flags == -1) { 269 throw new IOException("Premature end of stream while reading frame flags"); 270 } 271 contentHash.update(flags); 272 if ((flags & VERSION_MASK) != SUPPORTED_VERSION) { 273 throw new IOException("Unsupported version " + (flags >> 6)); 274 } 275 expectBlockDependency = (flags & BLOCK_INDEPENDENCE_MASK) == 0; 276 if (expectBlockDependency) { 277 if (blockDependencyBuffer == null) { 278 blockDependencyBuffer = new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE]; 279 } 280 } else { 281 blockDependencyBuffer = null; 282 } 283 expectBlockChecksum = (flags & BLOCK_CHECKSUM_MASK) != 0; 284 expectContentSize = (flags & CONTENT_SIZE_MASK) != 0; 285 expectContentChecksum = (flags & CONTENT_CHECKSUM_MASK) != 0; 286 final int bdByte = readOneByte(); 287 if (bdByte == -1) { // max size is irrelevant for this implementation 288 throw new IOException("Premature end of stream while reading frame BD byte"); 289 } 290 contentHash.update(bdByte); 291 if (expectContentSize) { // for now, we don't care, contains the uncompressed size 292 final byte[] contentSize = new byte[8]; 293 final int skipped = IOUtils.readFully(inputStream, contentSize); 294 count(skipped); 295 if (8 != skipped) { 296 throw new IOException("Premature end of stream while reading content size"); 297 } 298 contentHash.update(contentSize, 0, contentSize.length); 299 } 300 final int headerHash = readOneByte(); 301 if (headerHash == -1) { // partial hash of header. 302 throw new IOException("Premature end of stream while reading frame header checksum"); 303 } 304 final int expectedHash = (int) ((contentHash.getValue() >> 8) & 0xff); 305 contentHash.reset(); 306 if (headerHash != expectedHash) { 307 throw new IOException("Frame header checksum mismatch"); 308 } 309 } 310 311 private int readOnce(final byte[] b, final int off, final int len) throws IOException { 312 if (inUncompressed) { 313 final int cnt = currentBlock.read(b, off, len); 314 count(cnt); 315 return cnt; 316 } 317 final BlockLZ4CompressorInputStream l = (BlockLZ4CompressorInputStream) currentBlock; 318 final long before = l.getBytesRead(); 319 final int cnt = currentBlock.read(b, off, len); 320 count(l.getBytesRead() - before); 321 return cnt; 322 } 323 324 private int readOneByte() throws IOException { 325 final int b = inputStream.read(); 326 if (b != -1) { 327 count(1); 328 return b & 0xFF; 329 } 330 return -1; 331 } 332 333 private boolean readSignature(final boolean firstFrame) throws IOException { 334 final String garbageMessage = firstFrame ? "Not a LZ4 frame stream" : "LZ4 frame stream followed by garbage"; 335 final byte[] b = new byte[4]; 336 int read = IOUtils.readFully(inputStream, b); 337 count(read); 338 if (0 == read && !firstFrame) { 339 // good LZ4 frame and nothing after it 340 endReached = true; 341 return false; 342 } 343 if (4 != read) { 344 throw new IOException(garbageMessage); 345 } 346 347 read = skipSkippableFrame(b); 348 if (0 == read && !firstFrame) { 349 // good LZ4 frame with only some skippable frames after it 350 endReached = true; 351 return false; 352 } 353 if (4 != read || !matches(b, 4)) { 354 throw new IOException(garbageMessage); 355 } 356 return true; 357 } 358 359 /** 360 * Skips over the contents of a skippable frame as well as 361 * skippable frames following it. 362 * 363 * <p>It then tries to read four more bytes which are supposed to 364 * hold an LZ4 signature and returns the number of bytes read 365 * while storing the bytes in the given array.</p> 366 */ 367 private int skipSkippableFrame(final byte[] b) throws IOException { 368 int read = 4; 369 while (read == 4 && isSkippableFrameSignature(b)) { 370 final long len = ByteUtils.fromLittleEndian(supplier, 4); 371 if (len < 0) { 372 throw new IOException("Found illegal skippable frame with negative size"); 373 } 374 final long skipped = IOUtils.skip(inputStream, len); 375 count(skipped); 376 if (len != skipped) { 377 throw new IOException("Premature end of stream while skipping frame"); 378 } 379 read = IOUtils.readFully(inputStream, b); 380 count(read); 381 } 382 return read; 383 } 384 385 private void verifyChecksum(final XXHash32 hash, final String kind) throws IOException { 386 final byte[] checksum = new byte[4]; 387 final int read = IOUtils.readFully(inputStream, checksum); 388 count(read); 389 if (4 != read) { 390 throw new IOException("Premature end of stream while reading " + kind + " checksum"); 391 } 392 final long expectedHash = hash.getValue(); 393 if (expectedHash != ByteUtils.fromLittleEndian(checksum)) { 394 throw new IOException(kind + " checksum mismatch."); 395 } 396 } 397 398 private void verifyContentChecksum() throws IOException { 399 if (expectContentChecksum) { 400 verifyChecksum(contentHash, "content"); 401 } 402 contentHash.reset(); 403 } 404}