001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 */
017
018package org.apache.commons.compress.archivers.zip;
019
020import java.io.File;
021import java.io.IOException;
022import java.io.Serializable;
023import java.nio.ByteBuffer;
024import java.nio.channels.SeekableByteChannel;
025import java.nio.file.Files;
026import java.nio.file.Path;
027import java.nio.file.StandardOpenOption;
028import java.util.ArrayList;
029import java.util.Arrays;
030import java.util.Comparator;
031import java.util.List;
032import java.util.Objects;
033import java.util.regex.Pattern;
034import java.util.stream.Collectors;
035import java.util.stream.Stream;
036
037import org.apache.commons.compress.archivers.ArchiveStreamFactory;
038import org.apache.commons.compress.utils.FileNameUtils;
039import org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel;
040
041/**
042 * {@link MultiReadOnlySeekableByteChannel} that knows what a split ZIP archive should look like.
043 *
044 * <p>If you want to read a split archive using {@link ZipFile} then create an instance of this class from the parts of
045 * the archive.</p>
046 *
047 * @since 1.20
048 */
049public class ZipSplitReadOnlySeekableByteChannel extends MultiReadOnlySeekableByteChannel {
050
051    private static class ZipSplitSegmentComparator implements Comparator<Path>, Serializable {
052        private static final long serialVersionUID = 20200123L;
053
054        @Override
055        public int compare(final Path file1, final Path file2) {
056            final String extension1 = FileNameUtils.getExtension(file1);
057            final String extension2 = FileNameUtils.getExtension(file2);
058
059            if (!extension1.startsWith("z")) {
060                return -1;
061            }
062
063            if (!extension2.startsWith("z")) {
064                return 1;
065            }
066
067            final Integer splitSegmentNumber1 = Integer.parseInt(extension1.substring(1));
068            final Integer splitSegmentNumber2 = Integer.parseInt(extension2.substring(1));
069
070            return splitSegmentNumber1.compareTo(splitSegmentNumber2);
071        }
072    }
073    private static final Path[] EMPTY_PATH_ARRAY = {};
074    private static final int ZIP_SPLIT_SIGNATURE_LENGTH = 4;
075
076    /**
077     * Concatenates ZIP split files from the last segment(the extension SHOULD be .zip)
078     *
079     * @param lastSegmentFile the last segment of ZIP split files, note that the extension SHOULD be .zip
080     * @return SeekableByteChannel that concatenates all ZIP split files
081     * @throws IllegalArgumentException if the lastSegmentFile's extension is NOT .zip
082     * @throws IOException if the first channel doesn't seem to hold
083     * the beginning of a split archive
084     */
085    public static SeekableByteChannel buildFromLastSplitSegment(final File lastSegmentFile) throws IOException {
086        return buildFromLastSplitSegment(lastSegmentFile.toPath());
087    }
088
089    /**
090     * Concatenates ZIP split files from the last segment (the extension MUST be .zip)
091     * @param lastSegmentPath the last segment of ZIP split files, note that the extension MUST be .zip
092     * @return SeekableByteChannel that concatenates all ZIP split files
093     * @throws IllegalArgumentException if the lastSegmentPath's extension is NOT .zip
094     * @throws IOException if the first channel doesn't seem to hold
095     * the beginning of a split archive
096     * @since 1.22
097     */
098    public static SeekableByteChannel buildFromLastSplitSegment(final Path lastSegmentPath) throws IOException {
099        final String extension = FileNameUtils.getExtension(lastSegmentPath);
100        if (!extension.equalsIgnoreCase(ArchiveStreamFactory.ZIP)) {
101            throw new IllegalArgumentException("The extension of last ZIP split segment should be .zip");
102        }
103
104        final Path parent = Objects.nonNull(lastSegmentPath.getParent()) ? lastSegmentPath.getParent()
105                : lastSegmentPath.getFileSystem().getPath(".");
106        final String fileBaseName = FileNameUtils.getBaseName(lastSegmentPath);
107        final ArrayList<Path> splitZipSegments;
108
109        // ZIP split segments should be like z01,z02....z(n-1) based on the ZIP specification
110        final Pattern pattern = Pattern.compile(Pattern.quote(fileBaseName) + ".[zZ][0-9]+");
111        try (Stream<Path> walk = Files.walk(parent, 1)) {
112            splitZipSegments = walk
113                    .filter(Files::isRegularFile)
114                    .filter(path -> pattern.matcher(path.getFileName().toString()).matches())
115                    .sorted(new ZipSplitSegmentComparator())
116                    .collect(Collectors.toCollection(ArrayList::new));
117        }
118
119        return forPaths(lastSegmentPath, splitZipSegments);
120    }
121
122    /**
123     * Concatenates the given files.
124     *
125     * @param files the files to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip)
126     *              and these files should be added in correct order (e.g. .z01, .z02... .z99, .zip)
127     * @return SeekableByteChannel that concatenates all provided files
128     * @throws NullPointerException if files is null
129     * @throws IOException          if opening a channel for one of the files fails
130     * @throws IOException if the first channel doesn't seem to hold
131     * the beginning of a split archive
132     */
133    public static SeekableByteChannel forFiles(final File... files) throws IOException {
134        final List<Path> paths = new ArrayList<>();
135        for (final File f : Objects.requireNonNull(files, "files must not be null")) {
136            paths.add(f.toPath());
137        }
138
139        return forPaths(paths.toArray(EMPTY_PATH_ARRAY));
140    }
141
142    /**
143     * Concatenates the given files.
144     *
145     * @param lastSegmentFile the last segment of split ZIP segments, its extension should be .zip
146     * @param files           the files to concatenate except for the last segment,
147     *                        note these files should be added in correct order (e.g. .z01, .z02... .z99)
148     * @return SeekableByteChannel that concatenates all provided files
149     * @throws IOException if the first channel doesn't seem to hold
150     * the beginning of a split archive
151     * @throws NullPointerException if files or lastSegmentFile is null
152     */
153    public static SeekableByteChannel forFiles(final File lastSegmentFile, final Iterable<File> files) throws IOException {
154        Objects.requireNonNull(files, "files");
155        Objects.requireNonNull(lastSegmentFile, "lastSegmentFile");
156
157        final List<Path> filesList = new ArrayList<>();
158        files.forEach(f -> filesList.add(f.toPath()));
159
160        return forPaths(lastSegmentFile.toPath(), filesList);
161    }
162
163    /**
164     * Concatenates the given channels.
165     *
166     * @param channels the channels to concatenate, note that the LAST CHANNEL of channels should be the LAST SEGMENT(.zip)
167     *                 and these channels should be added in correct order (e.g. .z01, .z02... .z99, .zip)
168     * @return SeekableByteChannel that concatenates all provided channels
169     * @throws NullPointerException if channels is null
170     * @throws IOException if reading channels fails
171     */
172    public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel... channels) throws IOException {
173        if (Objects.requireNonNull(channels, "channels must not be null").length == 1) {
174            return channels[0];
175        }
176        return new ZipSplitReadOnlySeekableByteChannel(Arrays.asList(channels));
177    }
178
179    /**
180     * Concatenates the given channels.
181     *
182     * @param lastSegmentChannel channel of the last segment of split ZIP segments, its extension should be .zip
183     * @param channels           the channels to concatenate except for the last segment,
184     *                           note these channels should be added in correct order (e.g. .z01, .z02... .z99)
185     * @return SeekableByteChannel that concatenates all provided channels
186     * @throws NullPointerException if lastSegmentChannel or channels is null
187     * @throws IOException if the first channel doesn't seem to hold
188     * the beginning of a split archive
189     */
190    public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel lastSegmentChannel,
191        final Iterable<SeekableByteChannel> channels) throws IOException {
192        Objects.requireNonNull(channels, "channels");
193        Objects.requireNonNull(lastSegmentChannel, "lastSegmentChannel");
194
195        final List<SeekableByteChannel> channelsList = new ArrayList<>();
196        channels.forEach(channelsList::add);
197        channelsList.add(lastSegmentChannel);
198
199        return forOrderedSeekableByteChannels(channelsList.toArray(new SeekableByteChannel[0]));
200    }
201
202    /**
203     * Concatenates the given file paths.
204     * @param paths the file paths to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip)
205     * and these files should be added in correct order (e.g.: .z01, .z02... .z99, .zip)
206     * @return SeekableByteChannel that concatenates all provided files
207     * @throws NullPointerException if files is null
208     * @throws IOException if opening a channel for one of the files fails
209     * @throws IOException if the first channel doesn't seem to hold
210     * the beginning of a split archive
211     * @since 1.22
212     */
213    public static SeekableByteChannel forPaths(final Path... paths) throws IOException {
214        final List<SeekableByteChannel> channels = new ArrayList<>();
215        for (final Path path : Objects.requireNonNull(paths, "paths must not be null")) {
216            channels.add(Files.newByteChannel(path, StandardOpenOption.READ));
217        }
218        if (channels.size() == 1) {
219            return channels.get(0);
220        }
221        return new ZipSplitReadOnlySeekableByteChannel(channels);
222    }
223
224    /**
225     * Concatenates the given file paths.
226     * @param lastSegmentPath the last segment path of split ZIP segments, its extension must be .zip
227     * @param paths the file paths to concatenate except for the last segment,
228     * note these files should be added in correct order (e.g.: .z01, .z02... .z99)
229     * @return SeekableByteChannel that concatenates all provided files
230     * @throws IOException if the first channel doesn't seem to hold
231     * the beginning of a split archive
232     * @throws NullPointerException if files or lastSegmentPath is null
233     * @since 1.22
234     */
235    public static SeekableByteChannel forPaths(final Path lastSegmentPath, final Iterable<Path> paths) throws IOException {
236        Objects.requireNonNull(paths, "paths");
237        Objects.requireNonNull(lastSegmentPath, "lastSegmentPath");
238
239        final List<Path> filesList = new ArrayList<>();
240        paths.forEach(filesList::add);
241        filesList.add(lastSegmentPath);
242
243        return forPaths(filesList.toArray(EMPTY_PATH_ARRAY));
244    }
245
246    private final ByteBuffer zipSplitSignatureByteBuffer =
247        ByteBuffer.allocate(ZIP_SPLIT_SIGNATURE_LENGTH);
248
249    /**
250     * Concatenates the given channels.
251     *
252     * <p>The channels should be add in ascending order, e.g. z01,
253     * z02, ... z99, ZIP please note that the .zip file is the last
254     * segment and should be added as the last one in the channels</p>
255     *
256     * @param channels the channels to concatenate
257     * @throws NullPointerException if channels is null
258     * @throws IOException if the first channel doesn't seem to hold
259     * the beginning of a split archive
260     */
261    public ZipSplitReadOnlySeekableByteChannel(final List<SeekableByteChannel> channels)
262        throws IOException {
263        super(channels);
264
265        // the first split ZIP segment should begin with ZIP split signature
266        assertSplitSignature(channels);
267    }
268
269    /**
270     * Based on the ZIP specification:
271     *
272     * <p>
273     * 8.5.3 Spanned/Split archives created using PKZIP for Windows
274     * (V2.50 or greater), PKZIP Command Line (V2.50 or greater),
275     * or PKZIP Explorer will include a special spanning
276     * signature as the first 4 bytes of the first segment of
277     * the archive.  This signature (0x08074b50) will be
278     * followed immediately by the local header signature for
279     * the first file in the archive.
280     *
281     * <p>
282     * the first 4 bytes of the first ZIP split segment should be the ZIP split signature(0x08074B50)
283     *
284     * @param channels channels to be validated
285     * @throws IOException
286     */
287    private void assertSplitSignature(final List<SeekableByteChannel> channels)
288        throws IOException {
289        final SeekableByteChannel channel = channels.get(0);
290        // the ZIP split file signature is at the beginning of the first split segment
291        channel.position(0L);
292
293        zipSplitSignatureByteBuffer.rewind();
294        channel.read(zipSplitSignatureByteBuffer);
295        final ZipLong signature = new ZipLong(zipSplitSignatureByteBuffer.array());
296        if (!signature.equals(ZipLong.DD_SIG)) {
297            channel.position(0L);
298            throw new IOException("The first ZIP split segment does not begin with split ZIP file signature");
299        }
300
301        channel.position(0L);
302    }
303}