001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.compress.archivers.zip; 018 019import java.io.BufferedInputStream; 020import java.io.ByteArrayInputStream; 021import java.io.Closeable; 022import java.io.EOFException; 023import java.io.File; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.SequenceInputStream; 027import java.nio.ByteBuffer; 028import java.nio.channels.FileChannel; 029import java.nio.channels.SeekableByteChannel; 030import java.nio.file.Files; 031import java.nio.file.Path; 032import java.nio.file.StandardOpenOption; 033import java.util.Arrays; 034import java.util.Collections; 035import java.util.Comparator; 036import java.util.EnumSet; 037import java.util.Enumeration; 038import java.util.HashMap; 039import java.util.LinkedList; 040import java.util.List; 041import java.util.Map; 042import java.util.zip.Inflater; 043import java.util.zip.ZipException; 044 045import org.apache.commons.compress.archivers.EntryStreamOffsets; 046import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 047import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; 048import org.apache.commons.compress.utils.BoundedArchiveInputStream; 049import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream; 050import org.apache.commons.compress.utils.CountingInputStream; 051import org.apache.commons.compress.utils.IOUtils; 052import org.apache.commons.compress.utils.InputStreamStatistics; 053 054/** 055 * Replacement for {@code java.util.ZipFile}. 056 * 057 * <p>This class adds support for file name encodings other than UTF-8 058 * (which is required to work on ZIP files created by native ZIP tools 059 * and is able to skip a preamble like the one found in self 060 * extracting archives. Furthermore it returns instances of 061 * {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry} 062 * instead of {@code java.util.zip.ZipEntry}.</p> 063 * 064 * <p>It doesn't extend {@code java.util.zip.ZipFile} as it would 065 * have to reimplement all methods anyway. Like 066 * {@code java.util.ZipFile}, it uses SeekableByteChannel under the 067 * covers and supports compressed and uncompressed entries. As of 068 * Apache Commons Compress 1.3 it also transparently supports Zip64 069 * extensions and thus individual entries and archives larger than 4 070 * GB or with more than 65536 entries.</p> 071 * 072 * <p>The method signatures mimic the ones of 073 * {@code java.util.zip.ZipFile}, with a couple of exceptions: 074 * 075 * <ul> 076 * <li>There is no getName method.</li> 077 * <li>entries has been renamed to getEntries.</li> 078 * <li>getEntries and getEntry return 079 * {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry} 080 * instances.</li> 081 * <li>close is allowed to throw IOException.</li> 082 * </ul> 083 */ 084public class ZipFile implements Closeable { 085 /** 086 * Lock-free implementation of BoundedInputStream. The 087 * implementation uses positioned reads on the underlying archive 088 * file channel and therefore performs significantly faster in 089 * concurrent environment. 090 */ 091 private class BoundedFileChannelInputStream extends BoundedArchiveInputStream { 092 private final FileChannel archive; 093 094 BoundedFileChannelInputStream(final long start, final long remaining) { 095 super(start, remaining); 096 archive = (FileChannel) ZipFile.this.archive; 097 } 098 099 @Override 100 protected int read(final long pos, final ByteBuffer buf) throws IOException { 101 final int read = archive.read(buf, pos); 102 buf.flip(); 103 return read; 104 } 105 } 106 /** 107 * Extends ZipArchiveEntry to store the offset within the archive. 108 */ 109 private static class Entry extends ZipArchiveEntry { 110 111 Entry() { 112 } 113 114 @Override 115 public boolean equals(final Object other) { 116 if (super.equals(other)) { 117 // super.equals would return false if other were not an Entry 118 final Entry otherEntry = (Entry) other; 119 return getLocalHeaderOffset() 120 == otherEntry.getLocalHeaderOffset() 121 && super.getDataOffset() 122 == otherEntry.getDataOffset() 123 && super.getDiskNumberStart() 124 == otherEntry.getDiskNumberStart(); 125 } 126 return false; 127 } 128 129 @Override 130 public int hashCode() { 131 return 3 * super.hashCode() 132 + (int) getLocalHeaderOffset()+(int)(getLocalHeaderOffset()>>32); 133 } 134 } 135 private static final class NameAndComment { 136 private final byte[] name; 137 private final byte[] comment; 138 private NameAndComment(final byte[] name, final byte[] comment) { 139 this.name = name; 140 this.comment = comment; 141 } 142 } 143 private static class StoredStatisticsStream extends CountingInputStream implements InputStreamStatistics { 144 StoredStatisticsStream(final InputStream in) { 145 super(in); 146 } 147 148 @Override 149 public long getCompressedCount() { 150 return super.getBytesRead(); 151 } 152 153 @Override 154 public long getUncompressedCount() { 155 return getCompressedCount(); 156 } 157 } 158 private static final int HASH_SIZE = 509; 159 static final int NIBLET_MASK = 0x0f; 160 static final int BYTE_SHIFT = 8; 161 private static final int POS_0 = 0; 162 163 private static final int POS_1 = 1; 164 165 private static final int POS_2 = 2; 166 167 private static final int POS_3 = 3; 168 169 private static final byte[] ONE_ZERO_BYTE = new byte[1]; 170 171 /** 172 * Length of a "central directory" entry structure without file 173 * name, extra fields or comment. 174 */ 175 private static final int CFH_LEN = 176 /* version made by */ ZipConstants.SHORT 177 /* version needed to extract */ + ZipConstants.SHORT 178 /* general purpose bit flag */ + ZipConstants.SHORT 179 /* compression method */ + ZipConstants.SHORT 180 /* last mod file time */ + ZipConstants.SHORT 181 /* last mod file date */ + ZipConstants.SHORT 182 /* crc-32 */ + ZipConstants.WORD 183 /* compressed size */ + ZipConstants.WORD 184 /* uncompressed size */ + ZipConstants.WORD 185 /* file name length */ + ZipConstants. SHORT 186 /* extra field length */ + ZipConstants.SHORT 187 /* file comment length */ + ZipConstants.SHORT 188 /* disk number start */ + ZipConstants.SHORT 189 /* internal file attributes */ + ZipConstants.SHORT 190 /* external file attributes */ + ZipConstants.WORD 191 /* relative offset of local header */ + ZipConstants.WORD; 192 193 private static final long CFH_SIG = 194 ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG); 195 196 /** 197 * Length of the "End of central directory record" - which is 198 * supposed to be the last structure of the archive - without file 199 * comment. 200 */ 201 static final int MIN_EOCD_SIZE = 202 /* end of central dir signature */ ZipConstants.WORD 203 /* number of this disk */ + ZipConstants.SHORT 204 /* number of the disk with the */ 205 /* start of the central directory */ + ZipConstants.SHORT 206 /* total number of entries in */ 207 /* the central dir on this disk */ + ZipConstants.SHORT 208 /* total number of entries in */ 209 /* the central dir */ + ZipConstants.SHORT 210 /* size of the central directory */ + ZipConstants.WORD 211 /* offset of start of central */ 212 /* directory with respect to */ 213 /* the starting disk number */ + ZipConstants.WORD 214 /* ZIP file comment length */ + ZipConstants.SHORT; 215 216 /** 217 * Maximum length of the "End of central directory record" with a 218 * file comment. 219 */ 220 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE 221 /* maximum length of ZIP file comment */ + ZipConstants.ZIP64_MAGIC_SHORT; 222 223 /** 224 * Offset of the field that holds the location of the length of 225 * the central directory inside the "End of central directory 226 * record" relative to the start of the "End of central directory 227 * record". 228 */ 229 private static final int CFD_LENGTH_OFFSET = 230 /* end of central dir signature */ ZipConstants.WORD 231 /* number of this disk */ + ZipConstants.SHORT 232 /* number of the disk with the */ 233 /* start of the central directory */ + ZipConstants.SHORT 234 /* total number of entries in */ 235 /* the central dir on this disk */ + ZipConstants.SHORT 236 /* total number of entries in */ 237 /* the central dir */ + ZipConstants.SHORT; 238 239 /** 240 * Offset of the field that holds the disk number of the first 241 * central directory entry inside the "End of central directory 242 * record" relative to the start of the "End of central directory 243 * record". 244 */ 245 private static final int CFD_DISK_OFFSET = 246 /* end of central dir signature */ ZipConstants.WORD 247 /* number of this disk */ + ZipConstants.SHORT; 248 /** 249 * Offset of the field that holds the location of the first 250 * central directory entry inside the "End of central directory 251 * record" relative to the "number of the disk with the start 252 * of the central directory". 253 */ 254 private static final int CFD_LOCATOR_RELATIVE_OFFSET = 255 /* total number of entries in */ 256 /* the central dir on this disk */ + ZipConstants.SHORT 257 /* total number of entries in */ 258 /* the central dir */ + ZipConstants.SHORT 259 /* size of the central directory */ + ZipConstants.WORD; 260 /** 261 * Length of the "Zip64 end of central directory locator" - which 262 * should be right in front of the "end of central directory 263 * record" if one is present at all. 264 */ 265 private static final int ZIP64_EOCDL_LENGTH = 266 /* zip64 end of central dir locator sig */ ZipConstants.WORD 267 /* number of the disk with the start */ 268 /* start of the zip64 end of */ 269 /* central directory */ + ZipConstants.WORD 270 /* relative offset of the zip64 */ 271 /* end of central directory record */ + ZipConstants.DWORD 272 /* total number of disks */ + ZipConstants.WORD; 273 /** 274 * Offset of the field that holds the location of the "Zip64 end 275 * of central directory record" inside the "Zip64 end of central 276 * directory locator" relative to the start of the "Zip64 end of 277 * central directory locator". 278 */ 279 private static final int ZIP64_EOCDL_LOCATOR_OFFSET = 280 /* zip64 end of central dir locator sig */ ZipConstants.WORD 281 /* number of the disk with the start */ 282 /* start of the zip64 end of */ 283 /* central directory */ + ZipConstants.WORD; 284 /** 285 * Offset of the field that holds the location of the first 286 * central directory entry inside the "Zip64 end of central 287 * directory record" relative to the start of the "Zip64 end of 288 * central directory record". 289 */ 290 private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET = 291 /* zip64 end of central dir */ 292 /* signature */ ZipConstants.WORD 293 /* size of zip64 end of central */ 294 /* directory record */ + ZipConstants.DWORD 295 /* version made by */ + ZipConstants.SHORT 296 /* version needed to extract */ + ZipConstants.SHORT 297 /* number of this disk */ + ZipConstants.WORD 298 /* number of the disk with the */ 299 /* start of the central directory */ + ZipConstants.WORD 300 /* total number of entries in the */ 301 /* central directory on this disk */ + ZipConstants.DWORD 302 /* total number of entries in the */ 303 /* central directory */ + ZipConstants.DWORD 304 /* size of the central directory */ + ZipConstants.DWORD; 305 /** 306 * Offset of the field that holds the disk number of the first 307 * central directory entry inside the "Zip64 end of central 308 * directory record" relative to the start of the "Zip64 end of 309 * central directory record". 310 */ 311 private static final int ZIP64_EOCD_CFD_DISK_OFFSET = 312 /* zip64 end of central dir */ 313 /* signature */ ZipConstants.WORD 314 /* size of zip64 end of central */ 315 /* directory record */ + ZipConstants.DWORD 316 /* version made by */ + ZipConstants.SHORT 317 /* version needed to extract */ + ZipConstants.SHORT 318 /* number of this disk */ + ZipConstants.WORD; 319 /** 320 * Offset of the field that holds the location of the first 321 * central directory entry inside the "Zip64 end of central 322 * directory record" relative to the "number of the disk 323 * with the start of the central directory". 324 */ 325 private static final int ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET = 326 /* total number of entries in the */ 327 /* central directory on this disk */ ZipConstants.DWORD 328 /* total number of entries in the */ 329 /* central directory */ + ZipConstants.DWORD 330 /* size of the central directory */ + ZipConstants.DWORD; 331 /** 332 * Number of bytes in local file header up to the "length of 333 * file name" entry. 334 */ 335 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = 336 /* local file header signature */ ZipConstants.WORD 337 /* version needed to extract */ + ZipConstants.SHORT 338 /* general purpose bit flag */ + ZipConstants.SHORT 339 /* compression method */ + ZipConstants.SHORT 340 /* last mod file time */ + ZipConstants.SHORT 341 /* last mod file date */ + ZipConstants.SHORT 342 /* crc-32 */ + ZipConstants.WORD 343 /* compressed size */ + ZipConstants.WORD 344 /* uncompressed size */ + (long) ZipConstants.WORD; 345 346 /** 347 * Compares two ZipArchiveEntries based on their offset within the archive. 348 * 349 * <p>Won't return any meaningful results if one of the entries 350 * isn't part of the archive at all.</p> 351 * 352 * @since 1.1 353 */ 354 private static final Comparator<ZipArchiveEntry> offsetComparator = 355 Comparator.comparingLong(ZipArchiveEntry::getDiskNumberStart) 356 .thenComparingLong(ZipArchiveEntry::getLocalHeaderOffset); 357 358 /** 359 * Closes a ZIP file quietly; throwing no IOException, does nothing 360 * on null input. 361 * @param zipFile file to close, can be null 362 */ 363 public static void closeQuietly(final ZipFile zipFile) { 364 IOUtils.closeQuietly(zipFile); 365 } 366 367 /** 368 * List of entries in the order they appear inside the central 369 * directory. 370 */ 371 private final List<ZipArchiveEntry> entries = new LinkedList<>(); 372 373 /** 374 * Maps String to list of ZipArchiveEntrys, name -> actual entries. 375 */ 376 private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = new HashMap<>(HASH_SIZE); 377 378 /** 379 * The encoding to use for file names and the file comment. 380 * 381 * <p>For a list of possible values see <a 382 * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. 383 * Defaults to UTF-8.</p> 384 */ 385 private final String encoding; 386 387 /** 388 * The ZIP encoding to use for file names and the file comment. 389 */ 390 private final ZipEncoding zipEncoding; 391 392 /** 393 * File name of actual source. 394 */ 395 private final String archiveName; 396 397 /** 398 * The actual data source. 399 */ 400 private final SeekableByteChannel archive; 401 402 /** 403 * Whether to look for and use Unicode extra fields. 404 */ 405 private final boolean useUnicodeExtraFields; 406 407 /** 408 * Whether the file is closed. 409 */ 410 private volatile boolean closed = true; 411 412 /** 413 * Whether the ZIP archive is a split ZIP archive 414 */ 415 private final boolean isSplitZipArchive; 416 417 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 418 private final byte[] dwordBuf = new byte[ZipConstants.DWORD]; 419 420 private final byte[] wordBuf = new byte[ZipConstants.WORD]; 421 422 private final byte[] cfhBuf = new byte[CFH_LEN]; 423 424 private final byte[] shortBuf = new byte[ZipConstants.SHORT]; 425 426 private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf); 427 428 private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf); 429 430 private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf); 431 432 private final ByteBuffer shortBbuf = ByteBuffer.wrap(shortBuf); 433 434 private long centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset; 435 436 private long centralDirectoryStartOffset; 437 438 private long firstLocalFileHeaderOffset; 439 440 /** 441 * Opens the given file for reading, assuming "UTF8" for file names. 442 * 443 * @param f the archive. 444 * 445 * @throws IOException if an error occurs while reading the file. 446 */ 447 public ZipFile(final File f) throws IOException { 448 this(f, ZipEncodingHelper.UTF8); 449 } 450 451 /** 452 * Opens the given file for reading, assuming the specified 453 * encoding for file names and scanning for Unicode extra fields. 454 * 455 * @param f the archive. 456 * @param encoding the encoding to use for file names, use null 457 * for the platform's default encoding 458 * 459 * @throws IOException if an error occurs while reading the file. 460 */ 461 public ZipFile(final File f, final String encoding) throws IOException { 462 this(f.toPath(), encoding, true); 463 } 464 465 /** 466 * Opens the given file for reading, assuming the specified 467 * encoding for file names. 468 * 469 * @param f the archive. 470 * @param encoding the encoding to use for file names, use null 471 * for the platform's default encoding 472 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 473 * Extra Fields (if present) to set the file names. 474 * 475 * @throws IOException if an error occurs while reading the file. 476 */ 477 public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields) 478 throws IOException { 479 this(f.toPath(), encoding, useUnicodeExtraFields, false); 480 } 481 482 /** 483 * Opens the given file for reading, assuming the specified 484 * encoding for file names. 485 * 486 * <p>By default the central directory record and all local file headers of the archive will be read immediately 487 * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter 488 * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header 489 * may contain information not present inside of the central directory which will not be available when the argument 490 * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 491 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.</p> 492 * 493 * @param f the archive. 494 * @param encoding the encoding to use for file names, use null 495 * for the platform's default encoding 496 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 497 * Extra Fields (if present) to set the file names. 498 * @param ignoreLocalFileHeader whether to ignore information 499 * stored inside the local file header (see the notes in this method's javadoc) 500 * 501 * @throws IOException if an error occurs while reading the file. 502 * @since 1.19 503 */ 504 public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields, 505 final boolean ignoreLocalFileHeader) 506 throws IOException { 507 this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)), 508 f.getAbsolutePath(), encoding, useUnicodeExtraFields, true, ignoreLocalFileHeader); 509 } 510 511 /** 512 * Opens the given path for reading, assuming "UTF8" for file names. 513 * 514 * @param path path to the archive. 515 * @throws IOException if an error occurs while reading the file. 516 * @since 1.22 517 */ 518 public ZipFile(final Path path) throws IOException { 519 this(path, ZipEncodingHelper.UTF8); 520 } 521 522 /** 523 * Opens the given path for reading, assuming the specified 524 * encoding for file names and scanning for Unicode extra fields. 525 * 526 * @param path path to the archive. 527 * @param encoding the encoding to use for file names, use null 528 * for the platform's default encoding 529 * @throws IOException if an error occurs while reading the file. 530 * @since 1.22 531 */ 532 public ZipFile(final Path path, final String encoding) throws IOException { 533 this(path, encoding, true); 534 } 535 536 537 /** 538 * Opens the given path for reading, assuming the specified 539 * encoding for file names. 540 * 541 * @param path path to the archive. 542 * @param encoding the encoding to use for file names, use null 543 * for the platform's default encoding 544 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 545 * Extra Fields (if present) to set the file names. 546 * @throws IOException if an error occurs while reading the file. 547 * @since 1.22 548 */ 549 public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields) 550 throws IOException { 551 this(path, encoding, useUnicodeExtraFields, false); 552 } 553 554 /** 555 * Opens the given path for reading, assuming the specified 556 * encoding for file names. 557 * <p>By default the central directory record and all local file headers of the archive will be read immediately 558 * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter 559 * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header 560 * may contain information not present inside of the central directory which will not be available when the argument 561 * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 562 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.</p> 563 * 564 * @param path path to the archive. 565 * @param encoding the encoding to use for file names, use null 566 * for the platform's default encoding 567 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 568 * Extra Fields (if present) to set the file names. 569 * @param ignoreLocalFileHeader whether to ignore information 570 * stored inside the local file header (see the notes in this method's javadoc) 571 * @throws IOException if an error occurs while reading the file. 572 * @since 1.22 573 */ 574 public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields, 575 final boolean ignoreLocalFileHeader) 576 throws IOException { 577 this(Files.newByteChannel(path, EnumSet.of(StandardOpenOption.READ)), 578 path.toAbsolutePath().toString(), encoding, useUnicodeExtraFields, 579 true, ignoreLocalFileHeader); 580 } 581 582 /** 583 * Opens the given channel for reading, assuming "UTF8" for file names. 584 * 585 * <p>{@link 586 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 587 * allows you to read from an in-memory archive.</p> 588 * 589 * @param channel the archive. 590 * 591 * @throws IOException if an error occurs while reading the file. 592 * @since 1.13 593 */ 594 public ZipFile(final SeekableByteChannel channel) 595 throws IOException { 596 this(channel, "unknown archive", ZipEncodingHelper.UTF8, true); 597 } 598 599 /** 600 * Opens the given channel for reading, assuming the specified 601 * encoding for file names. 602 * 603 * <p>{@link 604 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 605 * allows you to read from an in-memory archive.</p> 606 * 607 * @param channel the archive. 608 * @param encoding the encoding to use for file names, use null 609 * for the platform's default encoding 610 * 611 * @throws IOException if an error occurs while reading the file. 612 * @since 1.13 613 */ 614 public ZipFile(final SeekableByteChannel channel, final String encoding) 615 throws IOException { 616 this(channel, "unknown archive", encoding, true); 617 } 618 619 /** 620 * Opens the given channel for reading, assuming the specified 621 * encoding for file names. 622 * 623 * <p>{@link 624 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 625 * allows you to read from an in-memory archive.</p> 626 * 627 * @param channel the archive. 628 * @param archiveName name of the archive, used for error messages only. 629 * @param encoding the encoding to use for file names, use null 630 * for the platform's default encoding 631 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 632 * Extra Fields (if present) to set the file names. 633 * 634 * @throws IOException if an error occurs while reading the file. 635 * @since 1.13 636 */ 637 public ZipFile(final SeekableByteChannel channel, final String archiveName, 638 final String encoding, final boolean useUnicodeExtraFields) 639 throws IOException { 640 this(channel, archiveName, encoding, useUnicodeExtraFields, false, false); 641 } 642 643 /** 644 * Opens the given channel for reading, assuming the specified 645 * encoding for file names. 646 * 647 * <p>{@link 648 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 649 * allows you to read from an in-memory archive.</p> 650 * 651 * <p>By default the central directory record and all local file headers of the archive will be read immediately 652 * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter 653 * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header 654 * may contain information not present inside of the central directory which will not be available when the argument 655 * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 656 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.</p> 657 * 658 * @param channel the archive. 659 * @param archiveName name of the archive, used for error messages only. 660 * @param encoding the encoding to use for file names, use null 661 * for the platform's default encoding 662 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 663 * Extra Fields (if present) to set the file names. 664 * @param ignoreLocalFileHeader whether to ignore information 665 * stored inside the local file header (see the notes in this method's javadoc) 666 * 667 * @throws IOException if an error occurs while reading the file. 668 * @since 1.19 669 */ 670 public ZipFile(final SeekableByteChannel channel, final String archiveName, 671 final String encoding, final boolean useUnicodeExtraFields, 672 final boolean ignoreLocalFileHeader) 673 throws IOException { 674 this(channel, archiveName, encoding, useUnicodeExtraFields, false, ignoreLocalFileHeader); 675 } 676 677 private ZipFile(final SeekableByteChannel channel, final String archiveName, 678 final String encoding, final boolean useUnicodeExtraFields, 679 final boolean closeOnError, final boolean ignoreLocalFileHeader) 680 throws IOException { 681 isSplitZipArchive = (channel instanceof ZipSplitReadOnlySeekableByteChannel); 682 683 this.archiveName = archiveName; 684 this.encoding = encoding; 685 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 686 this.useUnicodeExtraFields = useUnicodeExtraFields; 687 archive = channel; 688 boolean success = false; 689 try { 690 final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = 691 populateFromCentralDirectory(); 692 if (!ignoreLocalFileHeader) { 693 resolveLocalFileHeaderData(entriesWithoutUTF8Flag); 694 } 695 fillNameMap(); 696 success = true; 697 } catch (final IOException e) { 698 throw new IOException("Error on ZipFile " + archiveName, e); 699 } finally { 700 closed = !success; 701 if (!success && closeOnError) { 702 IOUtils.closeQuietly(archive); 703 } 704 } 705 } 706 707 /** 708 * Opens the given file for reading, assuming "UTF8". 709 * 710 * @param name name of the archive. 711 * 712 * @throws IOException if an error occurs while reading the file. 713 */ 714 public ZipFile(final String name) throws IOException { 715 this(new File(name).toPath(), ZipEncodingHelper.UTF8); 716 } 717 718 /** 719 * Opens the given file for reading, assuming the specified 720 * encoding for file names, scanning unicode extra fields. 721 * 722 * @param name name of the archive. 723 * @param encoding the encoding to use for file names, use null 724 * for the platform's default encoding 725 * 726 * @throws IOException if an error occurs while reading the file. 727 */ 728 public ZipFile(final String name, final String encoding) throws IOException { 729 this(new File(name).toPath(), encoding, true); 730 } 731 732 /** 733 * Whether this class is able to read the given entry. 734 * 735 * <p>May return false if it is set up to use encryption or a 736 * compression method that hasn't been implemented yet.</p> 737 * @since 1.1 738 * @param ze the entry 739 * @return whether this class is able to read the given entry. 740 */ 741 public boolean canReadEntryData(final ZipArchiveEntry ze) { 742 return ZipUtil.canHandleEntryData(ze); 743 } 744 745 /** 746 * Closes the archive. 747 * @throws IOException if an error occurs closing the archive. 748 */ 749 @Override 750 public void close() throws IOException { 751 // this flag is only written here and read in finalize() which 752 // can never be run in parallel. 753 // no synchronization needed. 754 closed = true; 755 756 archive.close(); 757 } 758 759 /** 760 * Transfer selected entries from this ZIP file to a given #ZipArchiveOutputStream. 761 * Compression and all other attributes will be as in this file. 762 * <p>This method transfers entries based on the central directory of the ZIP file.</p> 763 * 764 * @param target The zipArchiveOutputStream to write the entries to 765 * @param predicate A predicate that selects which entries to write 766 * @throws IOException on error 767 */ 768 public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate) 769 throws IOException { 770 final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder(); 771 while (src.hasMoreElements()) { 772 final ZipArchiveEntry entry = src.nextElement(); 773 if (predicate.test( entry)) { 774 target.addRawArchiveEntry(entry, getRawInputStream(entry)); 775 } 776 } 777 } 778 779 /** 780 * Creates new BoundedInputStream, according to implementation of 781 * underlying archive channel. 782 */ 783 private BoundedArchiveInputStream createBoundedInputStream(final long start, final long remaining) { 784 if (start < 0 || remaining < 0 || start + remaining < start) { 785 throw new IllegalArgumentException("Corrupted archive, stream boundaries" 786 + " are out of range"); 787 } 788 return archive instanceof FileChannel ? 789 new BoundedFileChannelInputStream(start, remaining) : 790 new BoundedSeekableByteChannelInputStream(start, remaining, archive); 791 } 792 793 private void fillNameMap() { 794 entries.forEach(ze -> { 795 // entries are filled in populateFromCentralDirectory and 796 // never modified 797 final String name = ze.getName(); 798 final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.computeIfAbsent(name, k -> new LinkedList<>()); 799 entriesOfThatName.addLast(ze); 800 }); 801 } 802 803 /** 804 * Ensures that the close method of this ZIP file is called when 805 * there are no more references to it. 806 * @see #close() 807 */ 808 @Override 809 protected void finalize() throws Throwable { 810 try { 811 if (!closed) { 812 close(); 813 } 814 } finally { 815 super.finalize(); 816 } 817 } 818 819 /** 820 * Gets an InputStream for reading the content before the first local file header. 821 * 822 * @return null if there is no content before the first local file header. 823 * Otherwise, returns a stream to read the content before the first local file header. 824 * @since 1.23 825 */ 826 public InputStream getContentBeforeFirstLocalFileHeader() { 827 return firstLocalFileHeaderOffset == 0 828 ? null : createBoundedInputStream(0, firstLocalFileHeaderOffset); 829 } 830 831 private long getDataOffset(final ZipArchiveEntry ze) throws IOException { 832 final long s = ze.getDataOffset(); 833 if (s == EntryStreamOffsets.OFFSET_UNKNOWN) { 834 setDataOffset(ze); 835 return ze.getDataOffset(); 836 } 837 return s; 838 } 839 840 /** 841 * Gets the encoding to use for file names and the file comment. 842 * 843 * @return null if using the platform's default character encoding. 844 */ 845 public String getEncoding() { 846 return encoding; 847 } 848 849 /** 850 * Gets all entries. 851 * 852 * <p>Entries will be returned in the same order they appear 853 * within the archive's central directory.</p> 854 * 855 * @return all entries as {@link ZipArchiveEntry} instances 856 */ 857 public Enumeration<ZipArchiveEntry> getEntries() { 858 return Collections.enumeration(entries); 859 } 860 861 /** 862 * Gets all named entries in the same order they appear within 863 * the archive's central directory. 864 * 865 * @param name name of the entry. 866 * @return the Iterable<ZipArchiveEntry> corresponding to the 867 * given name 868 * @since 1.6 869 */ 870 public Iterable<ZipArchiveEntry> getEntries(final String name) { 871 return nameMap.getOrDefault(name, ZipArchiveEntry.EMPTY_LINKED_LIST); 872 } 873 874 /** 875 * Gets all entries in physical order. 876 * 877 * <p>Entries will be returned in the same order their contents 878 * appear within the archive.</p> 879 * 880 * @return all entries as {@link ZipArchiveEntry} instances 881 * 882 * @since 1.1 883 */ 884 public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() { 885 final ZipArchiveEntry[] allEntries = entries.toArray(ZipArchiveEntry.EMPTY_ARRAY); 886 Arrays.sort(allEntries, offsetComparator); 887 return Collections.enumeration(Arrays.asList(allEntries)); 888 } 889 890 /** 891 * Gets all named entries in the same order their contents 892 * appear within the archive. 893 * 894 * @param name name of the entry. 895 * @return the Iterable<ZipArchiveEntry> corresponding to the 896 * given name 897 * @since 1.6 898 */ 899 public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) { 900 ZipArchiveEntry[] entriesOfThatName = ZipArchiveEntry.EMPTY_ARRAY; 901 final LinkedList<ZipArchiveEntry> linkedList = nameMap.get(name); 902 if (linkedList != null) { 903 entriesOfThatName = linkedList.toArray(entriesOfThatName); 904 Arrays.sort(entriesOfThatName, offsetComparator); 905 } 906 return Arrays.asList(entriesOfThatName); 907 } 908 909 /** 910 * Gets a named entry or {@code null} if no entry by 911 * that name exists. 912 * 913 * <p>If multiple entries with the same name exist the first entry 914 * in the archive's central directory by that name is 915 * returned.</p> 916 * 917 * @param name name of the entry. 918 * @return the ZipArchiveEntry corresponding to the given name - or 919 * {@code null} if not present. 920 */ 921 public ZipArchiveEntry getEntry(final String name) { 922 final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 923 return entriesOfThatName != null ? entriesOfThatName.getFirst() : null; 924 } 925 926 /** 927 * Gets the offset of the first local file header in the file. 928 * 929 * @return the length of the content before the first local file header 930 * @since 1.23 931 */ 932 public long getFirstLocalFileHeaderOffset() { 933 return firstLocalFileHeaderOffset; 934 } 935 936 /** 937 * Gets an InputStream for reading the contents of the given entry. 938 * 939 * @param zipEntry the entry to get the stream for. 940 * @return a stream to read the entry from. The returned stream 941 * implements {@link InputStreamStatistics}. 942 * @throws IOException if unable to create an input stream from the zipEntry. 943 */ 944 public InputStream getInputStream(final ZipArchiveEntry zipEntry) 945 throws IOException { 946 if (!(zipEntry instanceof Entry)) { 947 return null; 948 } 949 // cast validity is checked just above 950 ZipUtil.checkRequestedFeatures(zipEntry); 951 952 // doesn't get closed if the method is not supported - which 953 // should never happen because of the checkRequestedFeatures 954 // call above 955 final InputStream is = new BufferedInputStream(getRawInputStream(zipEntry)); //NOSONAR 956 switch (ZipMethod.getMethodByCode(zipEntry.getMethod())) { 957 case STORED: 958 return new StoredStatisticsStream(is); 959 case UNSHRINKING: 960 return new UnshrinkingInputStream(is); 961 case IMPLODING: 962 try { 963 return new ExplodingInputStream(zipEntry.getGeneralPurposeBit().getSlidingDictionarySize(), 964 zipEntry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is); 965 } catch (final IllegalArgumentException ex) { 966 throw new IOException("bad IMPLODE data", ex); 967 } 968 case DEFLATED: 969 final Inflater inflater = new Inflater(true); 970 // Inflater with nowrap=true has this odd contract for a zero padding 971 // byte following the data stream; this used to be zlib's requirement 972 // and has been fixed a long time ago, but the contract persists so 973 // we comply. 974 // https://docs.oracle.com/javase/7/docs/api/java/util/zip/Inflater.html#Inflater(boolean) 975 return new InflaterInputStreamWithStatistics(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)), 976 inflater) { 977 @Override 978 public void close() throws IOException { 979 try { 980 super.close(); 981 } finally { 982 inflater.end(); 983 } 984 } 985 }; 986 case BZIP2: 987 return new BZip2CompressorInputStream(is); 988 case ENHANCED_DEFLATED: 989 return new Deflate64CompressorInputStream(is); 990 case AES_ENCRYPTED: 991 case EXPANDING_LEVEL_1: 992 case EXPANDING_LEVEL_2: 993 case EXPANDING_LEVEL_3: 994 case EXPANDING_LEVEL_4: 995 case JPEG: 996 case LZMA: 997 case PKWARE_IMPLODING: 998 case PPMD: 999 case TOKENIZATION: 1000 case UNKNOWN: 1001 case WAVPACK: 1002 case XZ: 1003 default: 1004 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(zipEntry.getMethod()), zipEntry); 1005 } 1006 } 1007 1008 /** 1009 * Gets the raw stream of the archive entry (compressed form). 1010 * 1011 * <p>This method does not relate to how/if we understand the payload in the 1012 * stream, since we really only intend to move it on to somewhere else.</p> 1013 * 1014 * <p>Since version 1.22, this method will make an attempt to read the entry's data 1015 * stream offset, even if the {@code ignoreLocalFileHeader} parameter was {@code true} 1016 * in the constructor. An IOException can also be thrown from the body of the method 1017 * if this lookup fails for some reason.</p> 1018 * 1019 * @param ze The entry to get the stream for 1020 * @return The raw input stream containing (possibly) compressed data. 1021 * @since 1.11 1022 * @throws IOException if there is a problem reading data offset (added in version 1.22). 1023 */ 1024 public InputStream getRawInputStream(final ZipArchiveEntry ze) throws IOException { 1025 if (!(ze instanceof Entry)) { 1026 return null; 1027 } 1028 1029 final long start = getDataOffset(ze); 1030 if (start == EntryStreamOffsets.OFFSET_UNKNOWN) { 1031 return null; 1032 } 1033 return createBoundedInputStream(start, ze.getCompressedSize()); 1034 } 1035 1036 /** 1037 * Gets the entry's content as a String if isUnixSymlink() 1038 * returns true for it, otherwise returns null. 1039 * <p>This method assumes the symbolic link's file name uses the 1040 * same encoding that as been specified for this ZipFile.</p> 1041 * 1042 * @param entry ZipArchiveEntry object that represents the symbolic link 1043 * @return entry's content as a String 1044 * @throws IOException problem with content's input stream 1045 * @since 1.5 1046 */ 1047 public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException { 1048 if (entry != null && entry.isUnixSymlink()) { 1049 try (InputStream in = getInputStream(entry)) { 1050 return zipEncoding.decode(IOUtils.toByteArray(in)); 1051 } 1052 } 1053 return null; 1054 } 1055 1056 /** 1057 * Reads the central directory of the given archive and populates 1058 * the internal tables with ZipArchiveEntry instances. 1059 * 1060 * <p>The ZipArchiveEntrys will know all data that can be obtained from 1061 * the central directory alone, but not the data that requires the 1062 * local file header or additional data to be read.</p> 1063 * 1064 * @return a map of zip entries that didn't have the language 1065 * encoding flag set when read. 1066 */ 1067 private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory() 1068 throws IOException { 1069 final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = 1070 new HashMap<>(); 1071 1072 positionAtCentralDirectory(); 1073 centralDirectoryStartOffset = archive.position(); 1074 1075 wordBbuf.rewind(); 1076 IOUtils.readFully(archive, wordBbuf); 1077 long sig = ZipLong.getValue(wordBuf); 1078 1079 if (sig != CFH_SIG && startsWithLocalFileHeader()) { 1080 throw new IOException("Central directory is empty, can't expand" 1081 + " corrupt archive."); 1082 } 1083 1084 while (sig == CFH_SIG) { 1085 readCentralDirectoryEntry(noUTF8Flag); 1086 wordBbuf.rewind(); 1087 IOUtils.readFully(archive, wordBbuf); 1088 sig = ZipLong.getValue(wordBuf); 1089 } 1090 return noUTF8Flag; 1091 } 1092 1093 /** 1094 * Searches for either the "Zip64 end of central directory 1095 * locator" or the "End of central dir record", parses 1096 * it and positions the stream at the first central directory 1097 * record. 1098 */ 1099 private void positionAtCentralDirectory() 1100 throws IOException { 1101 positionAtEndOfCentralDirectoryRecord(); 1102 boolean found = false; 1103 final boolean searchedForZip64EOCD = 1104 archive.position() > ZIP64_EOCDL_LENGTH; 1105 if (searchedForZip64EOCD) { 1106 archive.position(archive.position() - ZIP64_EOCDL_LENGTH); 1107 wordBbuf.rewind(); 1108 IOUtils.readFully(archive, wordBbuf); 1109 found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG, 1110 wordBuf); 1111 } 1112 if (!found) { 1113 // not a ZIP64 archive 1114 if (searchedForZip64EOCD) { 1115 skipBytes(ZIP64_EOCDL_LENGTH - ZipConstants.WORD); 1116 } 1117 positionAtCentralDirectory32(); 1118 } else { 1119 positionAtCentralDirectory64(); 1120 } 1121 } 1122 1123 /** 1124 * Parses the "End of central dir record" and positions 1125 * the stream at the first central directory record. 1126 * 1127 * Expects stream to be positioned at the beginning of the 1128 * "End of central dir record". 1129 */ 1130 private void positionAtCentralDirectory32() 1131 throws IOException { 1132 final long endOfCentralDirectoryRecordOffset = archive.position(); 1133 if (isSplitZipArchive) { 1134 skipBytes(CFD_DISK_OFFSET); 1135 shortBbuf.rewind(); 1136 IOUtils.readFully(archive, shortBbuf); 1137 centralDirectoryStartDiskNumber = ZipShort.getValue(shortBuf); 1138 1139 skipBytes(CFD_LOCATOR_RELATIVE_OFFSET); 1140 1141 wordBbuf.rewind(); 1142 IOUtils.readFully(archive, wordBbuf); 1143 centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf); 1144 ((ZipSplitReadOnlySeekableByteChannel) archive) 1145 .position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset); 1146 } else { 1147 skipBytes(CFD_LENGTH_OFFSET); 1148 wordBbuf.rewind(); 1149 IOUtils.readFully(archive, wordBbuf); 1150 final long centralDirectoryLength = ZipLong.getValue(wordBuf); 1151 1152 wordBbuf.rewind(); 1153 IOUtils.readFully(archive, wordBbuf); 1154 centralDirectoryStartDiskNumber = 0; 1155 centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf); 1156 1157 firstLocalFileHeaderOffset = Long.max( 1158 endOfCentralDirectoryRecordOffset - centralDirectoryLength - centralDirectoryStartRelativeOffset, 1159 0L); 1160 archive.position(centralDirectoryStartRelativeOffset + firstLocalFileHeaderOffset); 1161 } 1162 } 1163 1164 /** 1165 * Parses the "Zip64 end of central directory locator", 1166 * finds the "Zip64 end of central directory record" using the 1167 * parsed information, parses that and positions the stream at the 1168 * first central directory record. 1169 * 1170 * Expects stream to be positioned right behind the "Zip64 1171 * end of central directory locator"'s signature. 1172 */ 1173 private void positionAtCentralDirectory64() 1174 throws IOException { 1175 if (isSplitZipArchive) { 1176 wordBbuf.rewind(); 1177 IOUtils.readFully(archive, wordBbuf); 1178 final long diskNumberOfEOCD = ZipLong.getValue(wordBuf); 1179 1180 dwordBbuf.rewind(); 1181 IOUtils.readFully(archive, dwordBbuf); 1182 final long relativeOffsetOfEOCD = ZipEightByteInteger.getLongValue(dwordBuf); 1183 ((ZipSplitReadOnlySeekableByteChannel) archive) 1184 .position(diskNumberOfEOCD, relativeOffsetOfEOCD); 1185 } else { 1186 skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET 1187 - ZipConstants.WORD /* signature has already been read */); 1188 dwordBbuf.rewind(); 1189 IOUtils.readFully(archive, dwordBbuf); 1190 archive.position(ZipEightByteInteger.getLongValue(dwordBuf)); 1191 } 1192 1193 wordBbuf.rewind(); 1194 IOUtils.readFully(archive, wordBbuf); 1195 if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) { 1196 throw new ZipException("Archive's ZIP64 end of central " 1197 + "directory locator is corrupt."); 1198 } 1199 1200 if (isSplitZipArchive) { 1201 skipBytes(ZIP64_EOCD_CFD_DISK_OFFSET 1202 - ZipConstants.WORD /* signature has already been read */); 1203 wordBbuf.rewind(); 1204 IOUtils.readFully(archive, wordBbuf); 1205 centralDirectoryStartDiskNumber = ZipLong.getValue(wordBuf); 1206 1207 skipBytes(ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET); 1208 1209 dwordBbuf.rewind(); 1210 IOUtils.readFully(archive, dwordBbuf); 1211 centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf); 1212 ((ZipSplitReadOnlySeekableByteChannel) archive) 1213 .position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset); 1214 } else { 1215 skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET 1216 - ZipConstants.WORD /* signature has already been read */); 1217 dwordBbuf.rewind(); 1218 IOUtils.readFully(archive, dwordBbuf); 1219 centralDirectoryStartDiskNumber = 0; 1220 centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf); 1221 archive.position(centralDirectoryStartRelativeOffset); 1222 } 1223 } 1224 1225 /** 1226 * Searches for the and positions the stream at the start of the 1227 * "End of central dir record". 1228 */ 1229 private void positionAtEndOfCentralDirectoryRecord() 1230 throws IOException { 1231 final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE, 1232 ZipArchiveOutputStream.EOCD_SIG); 1233 if (!found) { 1234 throw new ZipException("Archive is not a ZIP archive"); 1235 } 1236 } 1237 1238 /** 1239 * Reads an individual entry of the central directory, creates an 1240 * ZipArchiveEntry from it and adds it to the global maps. 1241 * 1242 * @param noUTF8Flag map used to collect entries that don't have 1243 * their UTF-8 flag set and whose name will be set by data read 1244 * from the local file header later. The current entry may be 1245 * added to this map. 1246 */ 1247 private void 1248 readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag) 1249 throws IOException { 1250 cfhBbuf.rewind(); 1251 IOUtils.readFully(archive, cfhBbuf); 1252 int off = 0; 1253 final Entry ze = new Entry(); 1254 1255 final int versionMadeBy = ZipShort.getValue(cfhBuf, off); 1256 off += ZipConstants.SHORT; 1257 ze.setVersionMadeBy(versionMadeBy); 1258 ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK); 1259 1260 ze.setVersionRequired(ZipShort.getValue(cfhBuf, off)); 1261 off += ZipConstants.SHORT; // version required 1262 1263 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off); 1264 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 1265 final ZipEncoding entryEncoding = 1266 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 1267 if (hasUTF8Flag) { 1268 ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG); 1269 } 1270 ze.setGeneralPurposeBit(gpFlag); 1271 ze.setRawFlag(ZipShort.getValue(cfhBuf, off)); 1272 1273 off += ZipConstants.SHORT; 1274 1275 //noinspection MagicConstant 1276 ze.setMethod(ZipShort.getValue(cfhBuf, off)); 1277 off += ZipConstants.SHORT; 1278 1279 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off)); 1280 ze.setTime(time); 1281 off += ZipConstants.WORD; 1282 1283 ze.setCrc(ZipLong.getValue(cfhBuf, off)); 1284 off += ZipConstants.WORD; 1285 1286 long size = ZipLong.getValue(cfhBuf, off); 1287 if (size < 0) { 1288 throw new IOException("broken archive, entry with negative compressed size"); 1289 } 1290 ze.setCompressedSize(size); 1291 off += ZipConstants.WORD; 1292 1293 size = ZipLong.getValue(cfhBuf, off); 1294 if (size < 0) { 1295 throw new IOException("broken archive, entry with negative size"); 1296 } 1297 ze.setSize(size); 1298 off += ZipConstants.WORD; 1299 1300 final int fileNameLen = ZipShort.getValue(cfhBuf, off); 1301 off += ZipConstants.SHORT; 1302 if (fileNameLen < 0) { 1303 throw new IOException("broken archive, entry with negative fileNameLen"); 1304 } 1305 1306 final int extraLen = ZipShort.getValue(cfhBuf, off); 1307 off += ZipConstants.SHORT; 1308 if (extraLen < 0) { 1309 throw new IOException("broken archive, entry with negative extraLen"); 1310 } 1311 1312 final int commentLen = ZipShort.getValue(cfhBuf, off); 1313 off += ZipConstants.SHORT; 1314 if (commentLen < 0) { 1315 throw new IOException("broken archive, entry with negative commentLen"); 1316 } 1317 1318 ze.setDiskNumberStart(ZipShort.getValue(cfhBuf, off)); 1319 off += ZipConstants.SHORT; 1320 1321 ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off)); 1322 off += ZipConstants.SHORT; 1323 1324 ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off)); 1325 off += ZipConstants.WORD; 1326 1327 final byte[] fileName = IOUtils.readRange(archive, fileNameLen); 1328 if (fileName.length < fileNameLen) { 1329 throw new EOFException(); 1330 } 1331 ze.setName(entryEncoding.decode(fileName), fileName); 1332 1333 // LFH offset, 1334 ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off) + firstLocalFileHeaderOffset); 1335 // data offset will be filled later 1336 entries.add(ze); 1337 1338 final byte[] cdExtraData = IOUtils.readRange(archive, extraLen); 1339 if (cdExtraData.length < extraLen) { 1340 throw new EOFException(); 1341 } 1342 try { 1343 ze.setCentralDirectoryExtra(cdExtraData); 1344 } catch (final RuntimeException ex) { 1345 final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName()); 1346 z.initCause(ex); 1347 throw z; 1348 } 1349 1350 setSizesAndOffsetFromZip64Extra(ze); 1351 sanityCheckLFHOffset(ze); 1352 1353 final byte[] comment = IOUtils.readRange(archive, commentLen); 1354 if (comment.length < commentLen) { 1355 throw new EOFException(); 1356 } 1357 ze.setComment(entryEncoding.decode(comment)); 1358 1359 if (!hasUTF8Flag && useUnicodeExtraFields) { 1360 noUTF8Flag.put(ze, new NameAndComment(fileName, comment)); 1361 } 1362 1363 ze.setStreamContiguous(true); 1364 } 1365 1366 /** 1367 * Walks through all recorded entries and adds the data available 1368 * from the local file header. 1369 * 1370 * <p>Also records the offsets for the data to read from the 1371 * entries.</p> 1372 */ 1373 private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment> 1374 entriesWithoutUTF8Flag) 1375 throws IOException { 1376 for (final ZipArchiveEntry zipArchiveEntry : entries) { 1377 // entries are filled in populateFromCentralDirectory and 1378 // never modified 1379 final Entry ze = (Entry) zipArchiveEntry; 1380 final int[] lens = setDataOffset(ze); 1381 final int fileNameLen = lens[0]; 1382 final int extraFieldLen = lens[1]; 1383 skipBytes(fileNameLen); 1384 final byte[] localExtraData = IOUtils.readRange(archive, extraFieldLen); 1385 if (localExtraData.length < extraFieldLen) { 1386 throw new EOFException(); 1387 } 1388 try { 1389 ze.setExtra(localExtraData); 1390 } catch (final RuntimeException ex) { 1391 final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName()); 1392 z.initCause(ex); 1393 throw z; 1394 } 1395 1396 if (entriesWithoutUTF8Flag.containsKey(ze)) { 1397 final NameAndComment nc = entriesWithoutUTF8Flag.get(ze); 1398 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, 1399 nc.comment); 1400 } 1401 } 1402 } 1403 1404 private void sanityCheckLFHOffset(final ZipArchiveEntry ze) throws IOException { 1405 if (ze.getDiskNumberStart() < 0) { 1406 throw new IOException("broken archive, entry with negative disk number"); 1407 } 1408 if (ze.getLocalHeaderOffset() < 0) { 1409 throw new IOException("broken archive, entry with negative local file header offset"); 1410 } 1411 if (isSplitZipArchive) { 1412 if (ze.getDiskNumberStart() > centralDirectoryStartDiskNumber) { 1413 throw new IOException("local file header for " + ze.getName() + " starts on a later disk than central directory"); 1414 } 1415 if (ze.getDiskNumberStart() == centralDirectoryStartDiskNumber 1416 && ze.getLocalHeaderOffset() > centralDirectoryStartRelativeOffset) { 1417 throw new IOException("local file header for " + ze.getName() + " starts after central directory"); 1418 } 1419 } else if (ze.getLocalHeaderOffset() > centralDirectoryStartOffset) { 1420 throw new IOException("local file header for " + ze.getName() + " starts after central directory"); 1421 } 1422 } 1423 1424 private int[] setDataOffset(final ZipArchiveEntry ze) throws IOException { 1425 long offset = ze.getLocalHeaderOffset(); 1426 if (isSplitZipArchive) { 1427 ((ZipSplitReadOnlySeekableByteChannel) archive) 1428 .position(ze.getDiskNumberStart(), offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 1429 // the offset should be updated to the global offset 1430 offset = archive.position() - LFH_OFFSET_FOR_FILENAME_LENGTH; 1431 } else { 1432 archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 1433 } 1434 wordBbuf.rewind(); 1435 IOUtils.readFully(archive, wordBbuf); 1436 wordBbuf.flip(); 1437 wordBbuf.get(shortBuf); 1438 final int fileNameLen = ZipShort.getValue(shortBuf); 1439 wordBbuf.get(shortBuf); 1440 final int extraFieldLen = ZipShort.getValue(shortBuf); 1441 ze.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH 1442 + ZipConstants.SHORT + ZipConstants.SHORT + fileNameLen + extraFieldLen); 1443 if (ze.getDataOffset() + ze.getCompressedSize() > centralDirectoryStartOffset) { 1444 throw new IOException("data for " + ze.getName() + " overlaps with central directory."); 1445 } 1446 return new int[] { fileNameLen, extraFieldLen }; 1447 } 1448 1449 /** 1450 * If the entry holds a Zip64 extended information extra field, 1451 * read sizes from there if the entry's sizes are set to 1452 * 0xFFFFFFFFF, do the same for the offset of the local file 1453 * header. 1454 * 1455 * <p>Ensures the Zip64 extra either knows both compressed and 1456 * uncompressed size or neither of both as the internal logic in 1457 * ExtraFieldUtils forces the field to create local header data 1458 * even if they are never used - and here a field with only one 1459 * size would be invalid.</p> 1460 */ 1461 private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze) 1462 throws IOException { 1463 final ZipExtraField extra = 1464 ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 1465 if (extra != null && !(extra instanceof Zip64ExtendedInformationExtraField)) { 1466 throw new ZipException("archive contains unparseable zip64 extra field"); 1467 } 1468 final Zip64ExtendedInformationExtraField z64 = 1469 (Zip64ExtendedInformationExtraField) extra; 1470 if (z64 != null) { 1471 final boolean hasUncompressedSize = ze.getSize() == ZipConstants.ZIP64_MAGIC; 1472 final boolean hasCompressedSize = ze.getCompressedSize() == ZipConstants.ZIP64_MAGIC; 1473 final boolean hasRelativeHeaderOffset = 1474 ze.getLocalHeaderOffset() == ZipConstants.ZIP64_MAGIC; 1475 final boolean hasDiskStart = ze.getDiskNumberStart() == ZipConstants.ZIP64_MAGIC_SHORT; 1476 z64.reparseCentralDirectoryData(hasUncompressedSize, 1477 hasCompressedSize, 1478 hasRelativeHeaderOffset, 1479 hasDiskStart); 1480 1481 if (hasUncompressedSize) { 1482 final long size = z64.getSize().getLongValue(); 1483 if (size < 0) { 1484 throw new IOException("broken archive, entry with negative size"); 1485 } 1486 ze.setSize(size); 1487 } else if (hasCompressedSize) { 1488 z64.setSize(new ZipEightByteInteger(ze.getSize())); 1489 } 1490 1491 if (hasCompressedSize) { 1492 final long size = z64.getCompressedSize().getLongValue(); 1493 if (size < 0) { 1494 throw new IOException("broken archive, entry with negative compressed size"); 1495 } 1496 ze.setCompressedSize(size); 1497 } else if (hasUncompressedSize) { 1498 z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize())); 1499 } 1500 1501 if (hasRelativeHeaderOffset) { 1502 ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue()); 1503 } 1504 1505 if (hasDiskStart) { 1506 ze.setDiskNumberStart(z64.getDiskStartNumber().getValue()); 1507 } 1508 } 1509 } 1510 1511 /** 1512 * Skips the given number of bytes or throws an EOFException if 1513 * skipping failed. 1514 */ 1515 private void skipBytes(final int count) throws IOException { 1516 final long currentPosition = archive.position(); 1517 final long newPosition = currentPosition + count; 1518 if (newPosition > archive.size()) { 1519 throw new EOFException(); 1520 } 1521 archive.position(newPosition); 1522 } 1523 1524 /** 1525 * Checks whether the archive starts with an LFH. If it doesn't, 1526 * it may be an empty archive. 1527 */ 1528 private boolean startsWithLocalFileHeader() throws IOException { 1529 archive.position(firstLocalFileHeaderOffset); 1530 wordBbuf.rewind(); 1531 IOUtils.readFully(archive, wordBbuf); 1532 return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG); 1533 } 1534 1535 /** 1536 * Searches the archive backwards from minDistance to maxDistance 1537 * for the given signature, positions the RandomaccessFile right 1538 * at the signature if it has been found. 1539 */ 1540 private boolean tryToLocateSignature(final long minDistanceFromEnd, 1541 final long maxDistanceFromEnd, 1542 final byte[] sig) throws IOException { 1543 boolean found = false; 1544 long off = archive.size() - minDistanceFromEnd; 1545 final long stopSearching = 1546 Math.max(0L, archive.size() - maxDistanceFromEnd); 1547 if (off >= 0) { 1548 for (; off >= stopSearching; off--) { 1549 archive.position(off); 1550 try { 1551 wordBbuf.rewind(); 1552 IOUtils.readFully(archive, wordBbuf); 1553 wordBbuf.flip(); 1554 } catch (final EOFException ex) { // NOSONAR 1555 break; 1556 } 1557 int curr = wordBbuf.get(); 1558 if (curr == sig[POS_0]) { 1559 curr = wordBbuf.get(); 1560 if (curr == sig[POS_1]) { 1561 curr = wordBbuf.get(); 1562 if (curr == sig[POS_2]) { 1563 curr = wordBbuf.get(); 1564 if (curr == sig[POS_3]) { 1565 found = true; 1566 break; 1567 } 1568 } 1569 } 1570 } 1571 } 1572 } 1573 if (found) { 1574 archive.position(off); 1575 } 1576 return found; 1577 } 1578}