001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers;
020
021import java.io.ByteArrayInputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.io.OutputStream;
025import java.security.AccessController;
026import java.security.PrivilegedAction;
027import java.util.Collections;
028import java.util.Locale;
029import java.util.ServiceLoader;
030import java.util.Set;
031import java.util.SortedMap;
032import java.util.TreeMap;
033
034import org.apache.commons.compress.archivers.ar.ArArchiveInputStream;
035import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream;
036import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream;
037import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream;
038import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream;
039import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream;
040import org.apache.commons.compress.archivers.jar.JarArchiveInputStream;
041import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream;
042import org.apache.commons.compress.archivers.sevenz.SevenZFile;
043import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
044import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
045import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
046import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
047import org.apache.commons.compress.utils.IOUtils;
048import org.apache.commons.compress.utils.Sets;
049
050/**
051 * Factory to create Archive[In|Out]putStreams from names or the first bytes of the InputStream. In order to add other implementations, you should extend
052 * ArchiveStreamFactory and override the appropriate methods (and call their implementation from super of course).
053 *
054 * Compressing a ZIP-File:
055 *
056 * <pre>
057 * final OutputStream out = Files.newOutputStream(output.toPath());
058 * ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.ZIP, out);
059 *
060 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml"));
061 * IOUtils.copy(Files.newInputStream(file1.toPath()), os);
062 * os.closeArchiveEntry();
063 *
064 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml"));
065 * IOUtils.copy(Files.newInputStream(file2.toPath()), os);
066 * os.closeArchiveEntry();
067 * os.close();
068 * </pre>
069 *
070 * Decompressing a ZIP-File:
071 *
072 * <pre>
073 * final InputStream is = Files.newInputStream(input.toPath());
074 * ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is);
075 * ZipArchiveEntry entry = (ZipArchiveEntry) in.getNextEntry();
076 * OutputStream out = Files.newOutputStream(dir.toPath().resolve(entry.getName()));
077 * IOUtils.copy(in, out);
078 * out.close();
079 * in.close();
080 * </pre>
081 * 
082 * @Immutable provided that the deprecated method setEntryEncoding is not used.
083 * @ThreadSafe even if the deprecated method setEntryEncoding is used
084 */
085public class ArchiveStreamFactory implements ArchiveStreamProvider {
086
087    private static final int TAR_HEADER_SIZE = 512;
088
089    private static final int DUMP_SIGNATURE_SIZE = 32;
090
091    private static final int SIGNATURE_SIZE = 12;
092
093    /**
094     * The singleton instance using the platform default encoding.
095     * 
096     * @since 1.21
097     */
098    public static final ArchiveStreamFactory DEFAULT = new ArchiveStreamFactory();
099
100    /**
101     * Constant (value {@value}) used to identify the APK archive format.
102     * <p>
103     * APK file extensions are .apk, .xapk, .apks, .apkm
104     * </p>
105     *
106     * @since 1.22
107     */
108    public static final String APK = "apk";
109
110    /**
111     * Constant (value {@value}) used to identify the XAPK archive format.
112     * <p>
113     * APK file extensions are .apk, .xapk, .apks, .apkm
114     * </p>
115     *
116     * @since 1.22
117     */
118    public static final String XAPK = "xapk";
119
120    /**
121     * Constant (value {@value}) used to identify the APKS archive format.
122     * <p>
123     * APK file extensions are .apk, .xapk, .apks, .apkm
124     * </p>
125     *
126     * @since 1.22
127     */
128    public static final String APKS = "apks";
129
130    /**
131     * Constant (value {@value}) used to identify the APKM archive format.
132     * <p>
133     * APK file extensions are .apk, .xapk, .apks, .apkm
134     * </p>
135     *
136     * @since 1.22
137     */
138    public static final String APKM = "apkm";
139
140    /**
141     * Constant (value {@value}) used to identify the AR archive format.
142     * 
143     * @since 1.1
144     */
145    public static final String AR = "ar";
146
147    /**
148     * Constant (value {@value}) used to identify the ARJ archive format. Not supported as an output stream type.
149     * 
150     * @since 1.6
151     */
152    public static final String ARJ = "arj";
153
154    /**
155     * Constant (value {@value}) used to identify the CPIO archive format.
156     * 
157     * @since 1.1
158     */
159    public static final String CPIO = "cpio";
160
161    /**
162     * Constant (value {@value}) used to identify the Unix DUMP archive format. Not supported as an output stream type.
163     * 
164     * @since 1.3
165     */
166    public static final String DUMP = "dump";
167
168    /**
169     * Constant (value {@value}) used to identify the JAR archive format.
170     * 
171     * @since 1.1
172     */
173    public static final String JAR = "jar";
174
175    /**
176     * Constant used to identify the TAR archive format.
177     * 
178     * @since 1.1
179     */
180    public static final String TAR = "tar";
181
182    /**
183     * Constant (value {@value}) used to identify the ZIP archive format.
184     * 
185     * @since 1.1
186     */
187    public static final String ZIP = "zip";
188
189    /**
190     * Constant (value {@value}) used to identify the 7z archive format.
191     * 
192     * @since 1.8
193     */
194    public static final String SEVEN_Z = "7z";
195
196    private static Iterable<ArchiveStreamProvider> archiveStreamProviderIterable() {
197        return ServiceLoader.load(ArchiveStreamProvider.class, ClassLoader.getSystemClassLoader());
198    }
199
200    /**
201     * Try to determine the type of Archiver
202     * 
203     * @param in input stream
204     * @return type of archiver if found
205     * @throws ArchiveException if an archiver cannot be detected in the stream
206     * @since 1.14
207     */
208    public static String detect(final InputStream in) throws ArchiveException {
209        if (in == null) {
210            throw new IllegalArgumentException("Stream must not be null.");
211        }
212
213        if (!in.markSupported()) {
214            throw new IllegalArgumentException("Mark is not supported.");
215        }
216
217        final byte[] signature = new byte[SIGNATURE_SIZE];
218        in.mark(signature.length);
219        int signatureLength = -1;
220        try {
221            signatureLength = IOUtils.readFully(in, signature);
222            in.reset();
223        } catch (final IOException e) {
224            throw new ArchiveException("IOException while reading signature.", e);
225        }
226
227        if (ZipArchiveInputStream.matches(signature, signatureLength)) {
228            return ZIP;
229        }
230        if (JarArchiveInputStream.matches(signature, signatureLength)) {
231            return JAR;
232        }
233        if (ArArchiveInputStream.matches(signature, signatureLength)) {
234            return AR;
235        }
236        if (CpioArchiveInputStream.matches(signature, signatureLength)) {
237            return CPIO;
238        }
239        if (ArjArchiveInputStream.matches(signature, signatureLength)) {
240            return ARJ;
241        }
242        if (SevenZFile.matches(signature, signatureLength)) {
243            return SEVEN_Z;
244        }
245
246        // Dump needs a bigger buffer to check the signature;
247        final byte[] dumpsig = new byte[DUMP_SIGNATURE_SIZE];
248        in.mark(dumpsig.length);
249        try {
250            signatureLength = IOUtils.readFully(in, dumpsig);
251            in.reset();
252        } catch (final IOException e) {
253            throw new ArchiveException("IOException while reading dump signature", e);
254        }
255        if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) {
256            return DUMP;
257        }
258
259        // Tar needs an even bigger buffer to check the signature; read the first block
260        final byte[] tarHeader = new byte[TAR_HEADER_SIZE];
261        in.mark(tarHeader.length);
262        try {
263            signatureLength = IOUtils.readFully(in, tarHeader);
264            in.reset();
265        } catch (final IOException e) {
266            throw new ArchiveException("IOException while reading tar signature", e);
267        }
268        if (TarArchiveInputStream.matches(tarHeader, signatureLength)) {
269            return TAR;
270        }
271
272        // COMPRESS-117 - improve auto-recognition
273        if (signatureLength >= TAR_HEADER_SIZE) {
274            try (TarArchiveInputStream tais = new TarArchiveInputStream(new ByteArrayInputStream(tarHeader))) {
275                // COMPRESS-191 - verify the header checksum
276                if (tais.getNextEntry().isCheckSumOK()) {
277                    return TAR;
278                }
279            } catch (final Exception e) { // NOPMD NOSONAR
280                // can generate IllegalArgumentException as well
281                // as IOException
282                // autodetection, simply not a TAR
283                // ignored
284            }
285        }
286        throw new ArchiveException("No Archiver found for the stream signature");
287    }
288
289    /**
290     * Constructs a new sorted map from input stream provider names to provider objects.
291     *
292     * <p>
293     * The map returned by this method will have one entry for each provider for which support is available in the current Java virtual machine. If two or more
294     * supported provider have the same name then the resulting map will contain just one of them; which one it will contain is not specified.
295     * </p>
296     *
297     * <p>
298     * The invocation of this method, and the subsequent use of the resulting map, may cause time-consuming disk or network I/O operations to occur. This method
299     * is provided for applications that need to enumerate all of the available providers, for example to allow user provider selection.
300     * </p>
301     *
302     * <p>
303     * This method may return different results at different times if new providers are dynamically made available to the current Java virtual machine.
304     * </p>
305     *
306     * @return An immutable, map from names to provider objects
307     * @since 1.13
308     */
309    public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveInputStreamProviders() {
310        return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> {
311            final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>();
312            putAll(DEFAULT.getInputStreamArchiveNames(), DEFAULT, map);
313            archiveStreamProviderIterable().forEach(provider -> putAll(provider.getInputStreamArchiveNames(), provider, map));
314            return map;
315        });
316    }
317
318    /**
319     * Constructs a new sorted map from output stream provider names to provider objects.
320     *
321     * <p>
322     * The map returned by this method will have one entry for each provider for which support is available in the current Java virtual machine. If two or more
323     * supported provider have the same name then the resulting map will contain just one of them; which one it will contain is not specified.
324     * </p>
325     *
326     * <p>
327     * The invocation of this method, and the subsequent use of the resulting map, may cause time-consuming disk or network I/O operations to occur. This method
328     * is provided for applications that need to enumerate all of the available providers, for example to allow user provider selection.
329     * </p>
330     *
331     * <p>
332     * This method may return different results at different times if new providers are dynamically made available to the current Java virtual machine.
333     * </p>
334     *
335     * @return An immutable, map from names to provider objects
336     * @since 1.13
337     */
338    public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveOutputStreamProviders() {
339        return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> {
340            final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>();
341            putAll(DEFAULT.getOutputStreamArchiveNames(), DEFAULT, map);
342            archiveStreamProviderIterable().forEach(provider -> putAll(provider.getOutputStreamArchiveNames(), provider, map));
343            return map;
344        });
345    }
346
347    static void putAll(final Set<String> names, final ArchiveStreamProvider provider, final TreeMap<String, ArchiveStreamProvider> map) {
348        names.forEach(name -> map.put(toKey(name), provider));
349    }
350
351    private static String toKey(final String name) {
352        return name.toUpperCase(Locale.ROOT);
353    }
354
355    /**
356     * Entry encoding, null for the platform default.
357     */
358    private final String encoding;
359
360    /**
361     * Entry encoding, null for the default.
362     */
363    private volatile String entryEncoding;
364
365    private SortedMap<String, ArchiveStreamProvider> archiveInputStreamProviders;
366
367    private SortedMap<String, ArchiveStreamProvider> archiveOutputStreamProviders;
368
369    /**
370     * Create an instance using the platform default encoding.
371     */
372    public ArchiveStreamFactory() {
373        this(null);
374    }
375
376    /**
377     * Create an instance using the specified encoding.
378     *
379     * @param encoding the encoding to be used.
380     *
381     * @since 1.10
382     */
383    public ArchiveStreamFactory(final String encoding) {
384        this.encoding = encoding;
385        // Also set the original field so can continue to use it.
386        this.entryEncoding = encoding;
387    }
388
389    /**
390     * Create an archive input stream from an input stream, autodetecting the archive type from the first few bytes of the stream. The InputStream must support
391     * marks, like BufferedInputStream.
392     *
393     * @param <I> The {@link ArchiveInputStream} type.
394     * @param in  the input stream
395     * @return the archive input stream
396     * @throws ArchiveException               if the archiver name is not known
397     * @throws StreamingNotSupportedException if the format cannot be read from a stream
398     * @throws IllegalArgumentException       if the stream is null or does not support mark
399     */
400    public <I extends ArchiveInputStream<? extends ArchiveEntry>> I createArchiveInputStream(final InputStream in) throws ArchiveException {
401        return createArchiveInputStream(detect(in), in);
402    }
403
404    /**
405     * Creates an archive input stream from an archiver name and an input stream.
406     *
407     * @param <I>          The {@link ArchiveInputStream} type.
408     * @param archiverName the archive name, i.e. {@value #AR}, {@value #ARJ}, {@value #ZIP}, {@value #TAR}, {@value #JAR}, {@value #CPIO}, {@value #DUMP} or
409     *                     {@value #SEVEN_Z}
410     * @param in           the input stream
411     * @return the archive input stream
412     * @throws ArchiveException               if the archiver name is not known
413     * @throws StreamingNotSupportedException if the format cannot be read from a stream
414     * @throws IllegalArgumentException       if the archiver name or stream is null
415     */
416    public <I extends ArchiveInputStream<? extends ArchiveEntry>> I createArchiveInputStream(final String archiverName, final InputStream in)
417            throws ArchiveException {
418        return createArchiveInputStream(archiverName, in, entryEncoding);
419    }
420
421    @SuppressWarnings("unchecked")
422    @Override
423    public <I extends ArchiveInputStream<? extends ArchiveEntry>> I createArchiveInputStream(final String archiverName, final InputStream in,
424            final String actualEncoding) throws ArchiveException {
425
426        if (archiverName == null) {
427            throw new IllegalArgumentException("Archiver name must not be null.");
428        }
429
430        if (in == null) {
431            throw new IllegalArgumentException("InputStream must not be null.");
432        }
433
434        if (AR.equalsIgnoreCase(archiverName)) {
435            return (I) new ArArchiveInputStream(in);
436        }
437        if (ARJ.equalsIgnoreCase(archiverName)) {
438            if (actualEncoding != null) {
439                return (I) new ArjArchiveInputStream(in, actualEncoding);
440            }
441            return (I) new ArjArchiveInputStream(in);
442        }
443        if (ZIP.equalsIgnoreCase(archiverName)) {
444            if (actualEncoding != null) {
445                return (I) new ZipArchiveInputStream(in, actualEncoding);
446            }
447            return (I) new ZipArchiveInputStream(in);
448        }
449        if (TAR.equalsIgnoreCase(archiverName)) {
450            if (actualEncoding != null) {
451                return (I) new TarArchiveInputStream(in, actualEncoding);
452            }
453            return (I) new TarArchiveInputStream(in);
454        }
455        if (JAR.equalsIgnoreCase(archiverName) || APK.equalsIgnoreCase(archiverName)) {
456            if (actualEncoding != null) {
457                return (I) new JarArchiveInputStream(in, actualEncoding);
458            }
459            return (I) new JarArchiveInputStream(in);
460        }
461        if (CPIO.equalsIgnoreCase(archiverName)) {
462            if (actualEncoding != null) {
463                return (I) new CpioArchiveInputStream(in, actualEncoding);
464            }
465            return (I) new CpioArchiveInputStream(in);
466        }
467        if (DUMP.equalsIgnoreCase(archiverName)) {
468            if (actualEncoding != null) {
469                return (I) new DumpArchiveInputStream(in, actualEncoding);
470            }
471            return (I) new DumpArchiveInputStream(in);
472        }
473        if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
474            throw new StreamingNotSupportedException(SEVEN_Z);
475        }
476
477        final ArchiveStreamProvider archiveStreamProvider = getArchiveInputStreamProviders().get(toKey(archiverName));
478        if (archiveStreamProvider != null) {
479            return archiveStreamProvider.createArchiveInputStream(archiverName, in, actualEncoding);
480        }
481
482        throw new ArchiveException("Archiver: " + archiverName + " not found.");
483    }
484
485    /**
486     * Creates an archive output stream from an archiver name and an output stream.
487     *
488     * @param <O>          The {@link ArchiveOutputStream} type.
489     * @param archiverName the archive name, i.e. {@value #AR}, {@value #ZIP}, {@value #TAR}, {@value #JAR} or {@value #CPIO}
490     * @param out          the output stream
491     * @return the archive output stream
492     * @throws ArchiveException               if the archiver name is not known
493     * @throws StreamingNotSupportedException if the format cannot be written to a stream
494     * @throws IllegalArgumentException       if the archiver name or stream is null
495     */
496    public <O extends ArchiveOutputStream<? extends ArchiveEntry>> O createArchiveOutputStream(final String archiverName, final OutputStream out)
497            throws ArchiveException {
498        return createArchiveOutputStream(archiverName, out, entryEncoding);
499    }
500
501    @SuppressWarnings("unchecked")
502    @Override
503    public <O extends ArchiveOutputStream<? extends ArchiveEntry>> O createArchiveOutputStream(final String archiverName, final OutputStream out,
504            final String actualEncoding) throws ArchiveException {
505        if (archiverName == null) {
506            throw new IllegalArgumentException("Archiver name must not be null.");
507        }
508        if (out == null) {
509            throw new IllegalArgumentException("OutputStream must not be null.");
510        }
511
512        if (AR.equalsIgnoreCase(archiverName)) {
513            return (O) new ArArchiveOutputStream(out);
514        }
515        if (ZIP.equalsIgnoreCase(archiverName)) {
516            final ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out);
517            if (actualEncoding != null) {
518                zip.setEncoding(actualEncoding);
519            }
520            return (O) zip;
521        }
522        if (TAR.equalsIgnoreCase(archiverName)) {
523            if (actualEncoding != null) {
524                return (O) new TarArchiveOutputStream(out, actualEncoding);
525            }
526            return (O) new TarArchiveOutputStream(out);
527        }
528        if (JAR.equalsIgnoreCase(archiverName)) {
529            if (actualEncoding != null) {
530                return (O) new JarArchiveOutputStream(out, actualEncoding);
531            }
532            return (O) new JarArchiveOutputStream(out);
533        }
534        if (CPIO.equalsIgnoreCase(archiverName)) {
535            if (actualEncoding != null) {
536                return (O) new CpioArchiveOutputStream(out, actualEncoding);
537            }
538            return (O) new CpioArchiveOutputStream(out);
539        }
540        if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
541            throw new StreamingNotSupportedException(SEVEN_Z);
542        }
543
544        final ArchiveStreamProvider archiveStreamProvider = getArchiveOutputStreamProviders().get(toKey(archiverName));
545        if (archiveStreamProvider != null) {
546            return archiveStreamProvider.createArchiveOutputStream(archiverName, out, actualEncoding);
547        }
548
549        throw new ArchiveException("Archiver: " + archiverName + " not found.");
550    }
551
552    public SortedMap<String, ArchiveStreamProvider> getArchiveInputStreamProviders() {
553        if (archiveInputStreamProviders == null) {
554            archiveInputStreamProviders = Collections.unmodifiableSortedMap(findAvailableArchiveInputStreamProviders());
555        }
556        return archiveInputStreamProviders;
557    }
558
559    public SortedMap<String, ArchiveStreamProvider> getArchiveOutputStreamProviders() {
560        if (archiveOutputStreamProviders == null) {
561            archiveOutputStreamProviders = Collections.unmodifiableSortedMap(findAvailableArchiveOutputStreamProviders());
562        }
563        return archiveOutputStreamProviders;
564    }
565
566    /**
567     * Gets the encoding to use for arj, jar, ZIP, dump, cpio and tar files, or null for the archiver default.
568     *
569     * @return entry encoding, or null for the archiver default
570     * @since 1.5
571     */
572    public String getEntryEncoding() {
573        return entryEncoding;
574    }
575
576    @Override
577    public Set<String> getInputStreamArchiveNames() {
578        return Sets.newHashSet(AR, ARJ, ZIP, TAR, JAR, CPIO, DUMP, SEVEN_Z);
579    }
580
581    @Override
582    public Set<String> getOutputStreamArchiveNames() {
583        return Sets.newHashSet(AR, ZIP, TAR, JAR, CPIO, SEVEN_Z);
584    }
585
586    /**
587     * Sets the encoding to use for arj, jar, ZIP, dump, cpio and tar files. Use null for the archiver default.
588     *
589     * @param entryEncoding the entry encoding, null uses the archiver default.
590     * @since 1.5
591     * @deprecated 1.10 use {@link #ArchiveStreamFactory(String)} to specify the encoding
592     * @throws IllegalStateException if the constructor {@link #ArchiveStreamFactory(String)} was used to specify the factory encoding.
593     */
594    @Deprecated
595    public void setEntryEncoding(final String entryEncoding) {
596        // Note: this does not detect new ArchiveStreamFactory(null) but that does not set the encoding anyway
597        if (encoding != null) {
598            throw new IllegalStateException("Cannot overide encoding set by the constructor");
599        }
600        this.entryEncoding = entryEncoding;
601    }
602
603}