001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers; 020 021import java.io.ByteArrayInputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.io.OutputStream; 025import java.security.AccessController; 026import java.security.PrivilegedAction; 027import java.util.Collections; 028import java.util.Locale; 029import java.util.ServiceLoader; 030import java.util.Set; 031import java.util.SortedMap; 032import java.util.TreeMap; 033 034import org.apache.commons.compress.archivers.ar.ArArchiveInputStream; 035import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream; 036import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream; 037import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream; 038import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream; 039import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream; 040import org.apache.commons.compress.archivers.jar.JarArchiveInputStream; 041import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream; 042import org.apache.commons.compress.archivers.sevenz.SevenZFile; 043import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; 044import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; 045import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; 046import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; 047import org.apache.commons.compress.utils.IOUtils; 048import org.apache.commons.compress.utils.Sets; 049 050/** 051 * Factory to create Archive[In|Out]putStreams from names or the first bytes of the InputStream. In order to add other implementations, you should extend 052 * ArchiveStreamFactory and override the appropriate methods (and call their implementation from super of course). 053 * 054 * Compressing a ZIP-File: 055 * 056 * <pre> 057 * final OutputStream out = Files.newOutputStream(output.toPath()); 058 * ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.ZIP, out); 059 * 060 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml")); 061 * IOUtils.copy(Files.newInputStream(file1.toPath()), os); 062 * os.closeArchiveEntry(); 063 * 064 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml")); 065 * IOUtils.copy(Files.newInputStream(file2.toPath()), os); 066 * os.closeArchiveEntry(); 067 * os.close(); 068 * </pre> 069 * 070 * Decompressing a ZIP-File: 071 * 072 * <pre> 073 * final InputStream is = Files.newInputStream(input.toPath()); 074 * ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is); 075 * ZipArchiveEntry entry = (ZipArchiveEntry) in.getNextEntry(); 076 * OutputStream out = Files.newOutputStream(dir.toPath().resolve(entry.getName())); 077 * IOUtils.copy(in, out); 078 * out.close(); 079 * in.close(); 080 * </pre> 081 * 082 * @Immutable provided that the deprecated method setEntryEncoding is not used. 083 * @ThreadSafe even if the deprecated method setEntryEncoding is used 084 */ 085public class ArchiveStreamFactory implements ArchiveStreamProvider { 086 087 private static final int TAR_HEADER_SIZE = 512; 088 089 private static final int DUMP_SIGNATURE_SIZE = 32; 090 091 private static final int SIGNATURE_SIZE = 12; 092 093 /** 094 * The singleton instance using the platform default encoding. 095 * 096 * @since 1.21 097 */ 098 public static final ArchiveStreamFactory DEFAULT = new ArchiveStreamFactory(); 099 100 /** 101 * Constant (value {@value}) used to identify the APK archive format. 102 * <p> 103 * APK file extensions are .apk, .xapk, .apks, .apkm 104 * </p> 105 * 106 * @since 1.22 107 */ 108 public static final String APK = "apk"; 109 110 /** 111 * Constant (value {@value}) used to identify the XAPK archive format. 112 * <p> 113 * APK file extensions are .apk, .xapk, .apks, .apkm 114 * </p> 115 * 116 * @since 1.22 117 */ 118 public static final String XAPK = "xapk"; 119 120 /** 121 * Constant (value {@value}) used to identify the APKS archive format. 122 * <p> 123 * APK file extensions are .apk, .xapk, .apks, .apkm 124 * </p> 125 * 126 * @since 1.22 127 */ 128 public static final String APKS = "apks"; 129 130 /** 131 * Constant (value {@value}) used to identify the APKM archive format. 132 * <p> 133 * APK file extensions are .apk, .xapk, .apks, .apkm 134 * </p> 135 * 136 * @since 1.22 137 */ 138 public static final String APKM = "apkm"; 139 140 /** 141 * Constant (value {@value}) used to identify the AR archive format. 142 * 143 * @since 1.1 144 */ 145 public static final String AR = "ar"; 146 147 /** 148 * Constant (value {@value}) used to identify the ARJ archive format. Not supported as an output stream type. 149 * 150 * @since 1.6 151 */ 152 public static final String ARJ = "arj"; 153 154 /** 155 * Constant (value {@value}) used to identify the CPIO archive format. 156 * 157 * @since 1.1 158 */ 159 public static final String CPIO = "cpio"; 160 161 /** 162 * Constant (value {@value}) used to identify the Unix DUMP archive format. Not supported as an output stream type. 163 * 164 * @since 1.3 165 */ 166 public static final String DUMP = "dump"; 167 168 /** 169 * Constant (value {@value}) used to identify the JAR archive format. 170 * 171 * @since 1.1 172 */ 173 public static final String JAR = "jar"; 174 175 /** 176 * Constant used to identify the TAR archive format. 177 * 178 * @since 1.1 179 */ 180 public static final String TAR = "tar"; 181 182 /** 183 * Constant (value {@value}) used to identify the ZIP archive format. 184 * 185 * @since 1.1 186 */ 187 public static final String ZIP = "zip"; 188 189 /** 190 * Constant (value {@value}) used to identify the 7z archive format. 191 * 192 * @since 1.8 193 */ 194 public static final String SEVEN_Z = "7z"; 195 196 private static Iterable<ArchiveStreamProvider> archiveStreamProviderIterable() { 197 return ServiceLoader.load(ArchiveStreamProvider.class, ClassLoader.getSystemClassLoader()); 198 } 199 200 /** 201 * Try to determine the type of Archiver 202 * 203 * @param in input stream 204 * @return type of archiver if found 205 * @throws ArchiveException if an archiver cannot be detected in the stream 206 * @since 1.14 207 */ 208 public static String detect(final InputStream in) throws ArchiveException { 209 if (in == null) { 210 throw new IllegalArgumentException("Stream must not be null."); 211 } 212 213 if (!in.markSupported()) { 214 throw new IllegalArgumentException("Mark is not supported."); 215 } 216 217 final byte[] signature = new byte[SIGNATURE_SIZE]; 218 in.mark(signature.length); 219 int signatureLength = -1; 220 try { 221 signatureLength = IOUtils.readFully(in, signature); 222 in.reset(); 223 } catch (final IOException e) { 224 throw new ArchiveException("IOException while reading signature.", e); 225 } 226 227 if (ZipArchiveInputStream.matches(signature, signatureLength)) { 228 return ZIP; 229 } 230 if (JarArchiveInputStream.matches(signature, signatureLength)) { 231 return JAR; 232 } 233 if (ArArchiveInputStream.matches(signature, signatureLength)) { 234 return AR; 235 } 236 if (CpioArchiveInputStream.matches(signature, signatureLength)) { 237 return CPIO; 238 } 239 if (ArjArchiveInputStream.matches(signature, signatureLength)) { 240 return ARJ; 241 } 242 if (SevenZFile.matches(signature, signatureLength)) { 243 return SEVEN_Z; 244 } 245 246 // Dump needs a bigger buffer to check the signature; 247 final byte[] dumpsig = new byte[DUMP_SIGNATURE_SIZE]; 248 in.mark(dumpsig.length); 249 try { 250 signatureLength = IOUtils.readFully(in, dumpsig); 251 in.reset(); 252 } catch (final IOException e) { 253 throw new ArchiveException("IOException while reading dump signature", e); 254 } 255 if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) { 256 return DUMP; 257 } 258 259 // Tar needs an even bigger buffer to check the signature; read the first block 260 final byte[] tarHeader = new byte[TAR_HEADER_SIZE]; 261 in.mark(tarHeader.length); 262 try { 263 signatureLength = IOUtils.readFully(in, tarHeader); 264 in.reset(); 265 } catch (final IOException e) { 266 throw new ArchiveException("IOException while reading tar signature", e); 267 } 268 if (TarArchiveInputStream.matches(tarHeader, signatureLength)) { 269 return TAR; 270 } 271 272 // COMPRESS-117 - improve auto-recognition 273 if (signatureLength >= TAR_HEADER_SIZE) { 274 try (TarArchiveInputStream tais = new TarArchiveInputStream(new ByteArrayInputStream(tarHeader))) { 275 // COMPRESS-191 - verify the header checksum 276 if (tais.getNextEntry().isCheckSumOK()) { 277 return TAR; 278 } 279 } catch (final Exception e) { // NOPMD NOSONAR 280 // can generate IllegalArgumentException as well 281 // as IOException 282 // autodetection, simply not a TAR 283 // ignored 284 } 285 } 286 throw new ArchiveException("No Archiver found for the stream signature"); 287 } 288 289 /** 290 * Constructs a new sorted map from input stream provider names to provider objects. 291 * 292 * <p> 293 * The map returned by this method will have one entry for each provider for which support is available in the current Java virtual machine. If two or more 294 * supported provider have the same name then the resulting map will contain just one of them; which one it will contain is not specified. 295 * </p> 296 * 297 * <p> 298 * The invocation of this method, and the subsequent use of the resulting map, may cause time-consuming disk or network I/O operations to occur. This method 299 * is provided for applications that need to enumerate all of the available providers, for example to allow user provider selection. 300 * </p> 301 * 302 * <p> 303 * This method may return different results at different times if new providers are dynamically made available to the current Java virtual machine. 304 * </p> 305 * 306 * @return An immutable, map from names to provider objects 307 * @since 1.13 308 */ 309 public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveInputStreamProviders() { 310 return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> { 311 final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>(); 312 putAll(DEFAULT.getInputStreamArchiveNames(), DEFAULT, map); 313 archiveStreamProviderIterable().forEach(provider -> putAll(provider.getInputStreamArchiveNames(), provider, map)); 314 return map; 315 }); 316 } 317 318 /** 319 * Constructs a new sorted map from output stream provider names to provider objects. 320 * 321 * <p> 322 * The map returned by this method will have one entry for each provider for which support is available in the current Java virtual machine. If two or more 323 * supported provider have the same name then the resulting map will contain just one of them; which one it will contain is not specified. 324 * </p> 325 * 326 * <p> 327 * The invocation of this method, and the subsequent use of the resulting map, may cause time-consuming disk or network I/O operations to occur. This method 328 * is provided for applications that need to enumerate all of the available providers, for example to allow user provider selection. 329 * </p> 330 * 331 * <p> 332 * This method may return different results at different times if new providers are dynamically made available to the current Java virtual machine. 333 * </p> 334 * 335 * @return An immutable, map from names to provider objects 336 * @since 1.13 337 */ 338 public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveOutputStreamProviders() { 339 return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> { 340 final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>(); 341 putAll(DEFAULT.getOutputStreamArchiveNames(), DEFAULT, map); 342 archiveStreamProviderIterable().forEach(provider -> putAll(provider.getOutputStreamArchiveNames(), provider, map)); 343 return map; 344 }); 345 } 346 347 static void putAll(final Set<String> names, final ArchiveStreamProvider provider, final TreeMap<String, ArchiveStreamProvider> map) { 348 names.forEach(name -> map.put(toKey(name), provider)); 349 } 350 351 private static String toKey(final String name) { 352 return name.toUpperCase(Locale.ROOT); 353 } 354 355 /** 356 * Entry encoding, null for the platform default. 357 */ 358 private final String encoding; 359 360 /** 361 * Entry encoding, null for the default. 362 */ 363 private volatile String entryEncoding; 364 365 private SortedMap<String, ArchiveStreamProvider> archiveInputStreamProviders; 366 367 private SortedMap<String, ArchiveStreamProvider> archiveOutputStreamProviders; 368 369 /** 370 * Create an instance using the platform default encoding. 371 */ 372 public ArchiveStreamFactory() { 373 this(null); 374 } 375 376 /** 377 * Create an instance using the specified encoding. 378 * 379 * @param encoding the encoding to be used. 380 * 381 * @since 1.10 382 */ 383 public ArchiveStreamFactory(final String encoding) { 384 this.encoding = encoding; 385 // Also set the original field so can continue to use it. 386 this.entryEncoding = encoding; 387 } 388 389 /** 390 * Create an archive input stream from an input stream, autodetecting the archive type from the first few bytes of the stream. The InputStream must support 391 * marks, like BufferedInputStream. 392 * 393 * @param <I> The {@link ArchiveInputStream} type. 394 * @param in the input stream 395 * @return the archive input stream 396 * @throws ArchiveException if the archiver name is not known 397 * @throws StreamingNotSupportedException if the format cannot be read from a stream 398 * @throws IllegalArgumentException if the stream is null or does not support mark 399 */ 400 public <I extends ArchiveInputStream<? extends ArchiveEntry>> I createArchiveInputStream(final InputStream in) throws ArchiveException { 401 return createArchiveInputStream(detect(in), in); 402 } 403 404 /** 405 * Creates an archive input stream from an archiver name and an input stream. 406 * 407 * @param <I> The {@link ArchiveInputStream} type. 408 * @param archiverName the archive name, i.e. {@value #AR}, {@value #ARJ}, {@value #ZIP}, {@value #TAR}, {@value #JAR}, {@value #CPIO}, {@value #DUMP} or 409 * {@value #SEVEN_Z} 410 * @param in the input stream 411 * @return the archive input stream 412 * @throws ArchiveException if the archiver name is not known 413 * @throws StreamingNotSupportedException if the format cannot be read from a stream 414 * @throws IllegalArgumentException if the archiver name or stream is null 415 */ 416 public <I extends ArchiveInputStream<? extends ArchiveEntry>> I createArchiveInputStream(final String archiverName, final InputStream in) 417 throws ArchiveException { 418 return createArchiveInputStream(archiverName, in, entryEncoding); 419 } 420 421 @SuppressWarnings("unchecked") 422 @Override 423 public <I extends ArchiveInputStream<? extends ArchiveEntry>> I createArchiveInputStream(final String archiverName, final InputStream in, 424 final String actualEncoding) throws ArchiveException { 425 426 if (archiverName == null) { 427 throw new IllegalArgumentException("Archiver name must not be null."); 428 } 429 430 if (in == null) { 431 throw new IllegalArgumentException("InputStream must not be null."); 432 } 433 434 if (AR.equalsIgnoreCase(archiverName)) { 435 return (I) new ArArchiveInputStream(in); 436 } 437 if (ARJ.equalsIgnoreCase(archiverName)) { 438 if (actualEncoding != null) { 439 return (I) new ArjArchiveInputStream(in, actualEncoding); 440 } 441 return (I) new ArjArchiveInputStream(in); 442 } 443 if (ZIP.equalsIgnoreCase(archiverName)) { 444 if (actualEncoding != null) { 445 return (I) new ZipArchiveInputStream(in, actualEncoding); 446 } 447 return (I) new ZipArchiveInputStream(in); 448 } 449 if (TAR.equalsIgnoreCase(archiverName)) { 450 if (actualEncoding != null) { 451 return (I) new TarArchiveInputStream(in, actualEncoding); 452 } 453 return (I) new TarArchiveInputStream(in); 454 } 455 if (JAR.equalsIgnoreCase(archiverName) || APK.equalsIgnoreCase(archiverName)) { 456 if (actualEncoding != null) { 457 return (I) new JarArchiveInputStream(in, actualEncoding); 458 } 459 return (I) new JarArchiveInputStream(in); 460 } 461 if (CPIO.equalsIgnoreCase(archiverName)) { 462 if (actualEncoding != null) { 463 return (I) new CpioArchiveInputStream(in, actualEncoding); 464 } 465 return (I) new CpioArchiveInputStream(in); 466 } 467 if (DUMP.equalsIgnoreCase(archiverName)) { 468 if (actualEncoding != null) { 469 return (I) new DumpArchiveInputStream(in, actualEncoding); 470 } 471 return (I) new DumpArchiveInputStream(in); 472 } 473 if (SEVEN_Z.equalsIgnoreCase(archiverName)) { 474 throw new StreamingNotSupportedException(SEVEN_Z); 475 } 476 477 final ArchiveStreamProvider archiveStreamProvider = getArchiveInputStreamProviders().get(toKey(archiverName)); 478 if (archiveStreamProvider != null) { 479 return archiveStreamProvider.createArchiveInputStream(archiverName, in, actualEncoding); 480 } 481 482 throw new ArchiveException("Archiver: " + archiverName + " not found."); 483 } 484 485 /** 486 * Creates an archive output stream from an archiver name and an output stream. 487 * 488 * @param <O> The {@link ArchiveOutputStream} type. 489 * @param archiverName the archive name, i.e. {@value #AR}, {@value #ZIP}, {@value #TAR}, {@value #JAR} or {@value #CPIO} 490 * @param out the output stream 491 * @return the archive output stream 492 * @throws ArchiveException if the archiver name is not known 493 * @throws StreamingNotSupportedException if the format cannot be written to a stream 494 * @throws IllegalArgumentException if the archiver name or stream is null 495 */ 496 public <O extends ArchiveOutputStream<? extends ArchiveEntry>> O createArchiveOutputStream(final String archiverName, final OutputStream out) 497 throws ArchiveException { 498 return createArchiveOutputStream(archiverName, out, entryEncoding); 499 } 500 501 @SuppressWarnings("unchecked") 502 @Override 503 public <O extends ArchiveOutputStream<? extends ArchiveEntry>> O createArchiveOutputStream(final String archiverName, final OutputStream out, 504 final String actualEncoding) throws ArchiveException { 505 if (archiverName == null) { 506 throw new IllegalArgumentException("Archiver name must not be null."); 507 } 508 if (out == null) { 509 throw new IllegalArgumentException("OutputStream must not be null."); 510 } 511 512 if (AR.equalsIgnoreCase(archiverName)) { 513 return (O) new ArArchiveOutputStream(out); 514 } 515 if (ZIP.equalsIgnoreCase(archiverName)) { 516 final ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out); 517 if (actualEncoding != null) { 518 zip.setEncoding(actualEncoding); 519 } 520 return (O) zip; 521 } 522 if (TAR.equalsIgnoreCase(archiverName)) { 523 if (actualEncoding != null) { 524 return (O) new TarArchiveOutputStream(out, actualEncoding); 525 } 526 return (O) new TarArchiveOutputStream(out); 527 } 528 if (JAR.equalsIgnoreCase(archiverName)) { 529 if (actualEncoding != null) { 530 return (O) new JarArchiveOutputStream(out, actualEncoding); 531 } 532 return (O) new JarArchiveOutputStream(out); 533 } 534 if (CPIO.equalsIgnoreCase(archiverName)) { 535 if (actualEncoding != null) { 536 return (O) new CpioArchiveOutputStream(out, actualEncoding); 537 } 538 return (O) new CpioArchiveOutputStream(out); 539 } 540 if (SEVEN_Z.equalsIgnoreCase(archiverName)) { 541 throw new StreamingNotSupportedException(SEVEN_Z); 542 } 543 544 final ArchiveStreamProvider archiveStreamProvider = getArchiveOutputStreamProviders().get(toKey(archiverName)); 545 if (archiveStreamProvider != null) { 546 return archiveStreamProvider.createArchiveOutputStream(archiverName, out, actualEncoding); 547 } 548 549 throw new ArchiveException("Archiver: " + archiverName + " not found."); 550 } 551 552 public SortedMap<String, ArchiveStreamProvider> getArchiveInputStreamProviders() { 553 if (archiveInputStreamProviders == null) { 554 archiveInputStreamProviders = Collections.unmodifiableSortedMap(findAvailableArchiveInputStreamProviders()); 555 } 556 return archiveInputStreamProviders; 557 } 558 559 public SortedMap<String, ArchiveStreamProvider> getArchiveOutputStreamProviders() { 560 if (archiveOutputStreamProviders == null) { 561 archiveOutputStreamProviders = Collections.unmodifiableSortedMap(findAvailableArchiveOutputStreamProviders()); 562 } 563 return archiveOutputStreamProviders; 564 } 565 566 /** 567 * Gets the encoding to use for arj, jar, ZIP, dump, cpio and tar files, or null for the archiver default. 568 * 569 * @return entry encoding, or null for the archiver default 570 * @since 1.5 571 */ 572 public String getEntryEncoding() { 573 return entryEncoding; 574 } 575 576 @Override 577 public Set<String> getInputStreamArchiveNames() { 578 return Sets.newHashSet(AR, ARJ, ZIP, TAR, JAR, CPIO, DUMP, SEVEN_Z); 579 } 580 581 @Override 582 public Set<String> getOutputStreamArchiveNames() { 583 return Sets.newHashSet(AR, ZIP, TAR, JAR, CPIO, SEVEN_Z); 584 } 585 586 /** 587 * Sets the encoding to use for arj, jar, ZIP, dump, cpio and tar files. Use null for the archiver default. 588 * 589 * @param entryEncoding the entry encoding, null uses the archiver default. 590 * @since 1.5 591 * @deprecated 1.10 use {@link #ArchiveStreamFactory(String)} to specify the encoding 592 * @throws IllegalStateException if the constructor {@link #ArchiveStreamFactory(String)} was used to specify the factory encoding. 593 */ 594 @Deprecated 595 public void setEntryEncoding(final String entryEncoding) { 596 // Note: this does not detect new ArchiveStreamFactory(null) but that does not set the encoding anyway 597 if (encoding != null) { 598 throw new IllegalStateException("Cannot overide encoding set by the constructor"); 599 } 600 this.entryEncoding = entryEncoding; 601 } 602 603}