001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.tar;
020
021import java.io.ByteArrayOutputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.io.UncheckedIOException;
025import java.math.BigInteger;
026import java.nio.ByteBuffer;
027import java.nio.charset.Charset;
028import java.nio.charset.StandardCharsets;
029import java.util.ArrayList;
030import java.util.Collections;
031import java.util.HashMap;
032import java.util.List;
033import java.util.Map;
034
035import org.apache.commons.compress.archivers.zip.ZipEncoding;
036import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
037import org.apache.commons.compress.utils.CharsetNames;
038import org.apache.commons.compress.utils.IOUtils;
039
040/**
041 * This class provides static utility methods to work with byte streams.
042 *
043 * @Immutable
044 */
045// CheckStyle:HideUtilityClassConstructorCheck OFF (bc)
046public class TarUtils {
047
048    private static final int BYTE_MASK = 255;
049
050    static final ZipEncoding DEFAULT_ENCODING = ZipEncodingHelper.getZipEncoding(null);
051
052    /**
053     * Encapsulates the algorithms used up to Commons Compress 1.3 as
054     * ZipEncoding.
055     */
056    static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() {
057        @Override
058        public boolean canEncode(final String name) {
059            return true;
060        }
061
062        @Override
063        public String decode(final byte[] buffer) {
064            final int length = buffer.length;
065            final StringBuilder result = new StringBuilder(length);
066
067            for (final byte b : buffer) {
068                if (b == 0) { // Trailing null
069                    break;
070                }
071                result.append((char) (b & 0xFF)); // Allow for sign-extension
072            }
073
074            return result.toString();
075        }
076
077        @Override
078        public ByteBuffer encode(final String name) {
079            final int length = name.length();
080            final byte[] buf = new byte[length];
081
082            // copy until end of input or output is reached.
083            for (int i = 0; i < length; ++i) {
084                buf[i] = (byte) name.charAt(i);
085            }
086            return ByteBuffer.wrap(buf);
087        }
088    };
089
090    /**
091     * Computes the checksum of a tar entry header.
092     *
093     * @param buf The tar entry's header buffer.
094     * @return The computed checksum.
095     */
096    public static long computeCheckSum(final byte[] buf) {
097        long sum = 0;
098
099        for (final byte element : buf) {
100            sum += BYTE_MASK & element;
101        }
102
103        return sum;
104    }
105
106    // Helper method to generate the exception message
107    private static String exceptionMessage(final byte[] buffer, final int offset,
108            final int length, final int current, final byte currentByte) {
109        // default charset is good enough for an exception message,
110        //
111        // the alternative was to modify parseOctal and
112        // parseOctalOrBinary to receive the ZipEncoding of the
113        // archive (deprecating the existing public methods, of
114        // course) and dealing with the fact that ZipEncoding#decode
115        // can throw an IOException which parseOctal* doesn't declare
116        String string = new String(buffer, offset, length, Charset.defaultCharset());
117
118        string = string.replace("\0", "{NUL}"); // Replace NULs to allow string to be printed
119        return "Invalid byte " + currentByte + " at offset " + (current - offset) + " in '" + string + "' len=" + length;
120    }
121
122    private static void formatBigIntegerBinary(final long value, final byte[] buf,
123                                               final int offset,
124                                               final int length,
125                                               final boolean negative) {
126        final BigInteger val = BigInteger.valueOf(value);
127        final byte[] b = val.toByteArray();
128        final int len = b.length;
129        if (len > length - 1) {
130            throw new IllegalArgumentException("Value " + value +
131                " is too large for " + length + " byte field.");
132        }
133        final int off = offset + length - len;
134        System.arraycopy(b, 0, buf, off, len);
135        final byte fill = (byte) (negative ? 0xff : 0);
136        for (int i = offset + 1; i < off; i++) {
137            buf[i] = fill;
138        }
139    }
140
141    /**
142     * Writes an octal value into a buffer.
143     *
144     * Uses {@link #formatUnsignedOctalString} to format
145     * the value as an octal string with leading zeros.
146     * The converted number is followed by NUL and then space.
147     *
148     * @param value The value to convert
149     * @param buf The destination buffer
150     * @param offset The starting offset into the buffer.
151     * @param length The size of the buffer.
152     * @return The updated value of offset, i.e. offset+length
153     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
154     */
155    public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
156
157        int idx=length-2; // for NUL and space
158        formatUnsignedOctalString(value, buf, offset, idx);
159
160        buf[offset + idx++]   = 0; // Trailing null
161        buf[offset + idx]     = (byte) ' '; // Trailing space
162
163        return offset + length;
164    }
165
166    private static void formatLongBinary(final long value, final byte[] buf,
167                                         final int offset, final int length,
168                                         final boolean negative) {
169        final int bits = (length - 1) * 8;
170        final long max = 1L << bits;
171        long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE
172        if (val < 0 || val >= max) {
173            throw new IllegalArgumentException("Value " + value +
174                " is too large for " + length + " byte field.");
175        }
176        if (negative) {
177            val ^= max - 1;
178            val++;
179            val |= 0xffL << bits;
180        }
181        for (int i = offset + length - 1; i >= offset; i--) {
182            buf[i] = (byte) val;
183            val >>= 8;
184        }
185    }
186
187    /**
188     * Writes an octal long integer into a buffer.
189     *
190     * Uses {@link #formatUnsignedOctalString} to format
191     * the value as an octal string with leading zeros.
192     * The converted number is followed by a space.
193     *
194     * @param value The value to write as octal
195     * @param buf The destinationbuffer.
196     * @param offset The starting offset into the buffer.
197     * @param length The length of the buffer
198     * @return The updated offset
199     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
200     */
201    public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
202
203        final int idx=length-1; // For space
204
205        formatUnsignedOctalString(value, buf, offset, idx);
206        buf[offset + idx] = (byte) ' '; // Trailing space
207
208        return offset + length;
209    }
210
211    /**
212     * Writes a long integer into a buffer as an octal string if this
213     * will fit, or as a binary number otherwise.
214     *
215     * Uses {@link #formatUnsignedOctalString} to format
216     * the value as an octal string with leading zeros.
217     * The converted number is followed by a space.
218     *
219     * @param value The value to write into the buffer.
220     * @param buf The destination buffer.
221     * @param offset The starting offset into the buffer.
222     * @param length The length of the buffer.
223     * @return The updated offset.
224     * @throws IllegalArgumentException if the value (and trailer)
225     * will not fit in the buffer.
226     * @since 1.4
227     */
228    public static int formatLongOctalOrBinaryBytes(
229        final long value, final byte[] buf, final int offset, final int length) {
230
231        // Check whether we are dealing with UID/GID or SIZE field
232        final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE;
233
234        final boolean negative = value < 0;
235        if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars
236            return formatLongOctalBytes(value, buf, offset, length);
237        }
238
239        if (length < 9) {
240            formatLongBinary(value, buf, offset, length, negative);
241        } else {
242            formatBigIntegerBinary(value, buf, offset, length, negative);
243        }
244
245        buf[offset] = (byte) (negative ? 0xff : 0x80);
246        return offset + length;
247    }
248
249    /**
250     * Copies a name into a buffer.
251     * Copies characters from the name into the buffer
252     * starting at the specified offset.
253     * If the buffer is longer than the name, the buffer
254     * is filled with trailing NULs.
255     * If the name is longer than the buffer,
256     * the output is truncated.
257     *
258     * @param name The header name from which to copy the characters.
259     * @param buf The buffer where the name is to be stored.
260     * @param offset The starting offset into the buffer
261     * @param length The maximum number of header bytes to copy.
262     * @return The updated offset, i.e. offset + length
263     */
264    public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) {
265        try {
266            return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING);
267        } catch (final IOException ex) { // NOSONAR
268            try {
269                return formatNameBytes(name, buf, offset, length,
270                                       FALLBACK_ENCODING);
271            } catch (final IOException ex2) {
272                // impossible
273                throw new UncheckedIOException(ex2); //NOSONAR
274            }
275        }
276    }
277
278    /**
279     * Copies a name into a buffer.
280     * Copies characters from the name into the buffer
281     * starting at the specified offset.
282     * If the buffer is longer than the name, the buffer
283     * is filled with trailing NULs.
284     * If the name is longer than the buffer,
285     * the output is truncated.
286     *
287     * @param name The header name from which to copy the characters.
288     * @param buf The buffer where the name is to be stored.
289     * @param offset The starting offset into the buffer
290     * @param length The maximum number of header bytes to copy.
291     * @param encoding name of the encoding to use for file names
292     * @since 1.4
293     * @return The updated offset, i.e. offset + length
294     * @throws IOException on error
295     */
296    public static int formatNameBytes(final String name, final byte[] buf, final int offset,
297                                      final int length,
298                                      final ZipEncoding encoding)
299        throws IOException {
300        int len = name.length();
301        ByteBuffer b = encoding.encode(name);
302        while (b.limit() > length && len > 0) {
303            b = encoding.encode(name.substring(0, --len));
304        }
305        final int limit = b.limit() - b.position();
306        System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit);
307
308        // Pad any remaining output bytes with NUL
309        for (int i = limit; i < length; ++i) {
310            buf[offset + i] = 0;
311        }
312
313        return offset + length;
314    }
315
316    /**
317     * Writes an octal integer into a buffer.
318     *
319     * Uses {@link #formatUnsignedOctalString} to format
320     * the value as an octal string with leading zeros.
321     * The converted number is followed by space and NUL
322     *
323     * @param value The value to write
324     * @param buf The buffer to receive the output
325     * @param offset The starting offset into the buffer
326     * @param length The size of the output buffer
327     * @return The updated offset, i.e. offset+length
328     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
329     */
330    public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
331
332        int idx=length-2; // For space and trailing null
333        formatUnsignedOctalString(value, buf, offset, idx);
334
335        buf[offset + idx++] = (byte) ' '; // Trailing space
336        buf[offset + idx]   = 0; // Trailing null
337
338        return offset + length;
339    }
340
341    /**
342     * Fills a buffer with unsigned octal number, padded with leading zeroes.
343     *
344     * @param value number to convert to octal - treated as unsigned
345     * @param buffer destination buffer
346     * @param offset starting offset in buffer
347     * @param length length of buffer to fill
348     * @throws IllegalArgumentException if the value will not fit in the buffer
349     */
350    public static void formatUnsignedOctalString(final long value, final byte[] buffer,
351            final int offset, final int length) {
352        int remaining = length;
353        remaining--;
354        if (value == 0) {
355            buffer[offset + remaining--] = (byte) '0';
356        } else {
357            long val = value;
358            for (; remaining >= 0 && val != 0; --remaining) {
359                // CheckStyle:MagicNumber OFF
360                buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7));
361                val = val >>> 3;
362                // CheckStyle:MagicNumber ON
363            }
364            if (val != 0){
365                throw new IllegalArgumentException
366                (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length);
367            }
368        }
369
370        for (; remaining >= 0; --remaining) { // leading zeros
371            buffer[offset + remaining] = (byte) '0';
372        }
373    }
374
375    private static long parseBinaryBigInteger(final byte[] buffer,
376                                              final int offset,
377                                              final int length,
378                                              final boolean negative) {
379        final byte[] remainder = new byte[length - 1];
380        System.arraycopy(buffer, offset + 1, remainder, 0, length - 1);
381        BigInteger val = new BigInteger(remainder);
382        if (negative) {
383            // 2's complement
384            val = val.add(BigInteger.valueOf(-1)).not();
385        }
386        if (val.bitLength() > 63) {
387            throw new IllegalArgumentException("At offset " + offset + ", "
388                                               + length + " byte binary number"
389                                               + " exceeds maximum signed long"
390                                               + " value");
391        }
392        return negative ? -val.longValue() : val.longValue();
393    }
394
395    private static long parseBinaryLong(final byte[] buffer, final int offset,
396                                        final int length,
397                                        final boolean negative) {
398        if (length >= 9) {
399            throw new IllegalArgumentException("At offset " + offset + ", "
400                                               + length + " byte binary number"
401                                               + " exceeds maximum signed long"
402                                               + " value");
403        }
404        long val = 0;
405        for (int i = 1; i < length; i++) {
406            val = (val << 8) + (buffer[offset + i] & 0xff);
407        }
408        if (negative) {
409            // 2's complement
410            val--;
411            val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1;
412        }
413        return negative ? -val : val;
414    }
415
416    /**
417     * Parses a boolean byte from a buffer.
418     * Leading spaces and NUL are ignored.
419     * The buffer may contain trailing spaces or NULs.
420     *
421     * @param buffer The buffer from which to parse.
422     * @param offset The offset into the buffer from which to parse.
423     * @return The boolean value of the bytes.
424     * @throws IllegalArgumentException if an invalid byte is detected.
425     */
426    public static boolean parseBoolean(final byte[] buffer, final int offset) {
427        return buffer[offset] == 1;
428    }
429
430    /**
431     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
432     * GNU.sparse.map
433     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
434     *
435     * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
436     * @return unmodifiable list of sparse headers parsed from sparse map
437     * @throws IOException Corrupted TAR archive.
438     * @since 1.21
439     */
440    protected static List<TarArchiveStructSparse> parseFromPAX01SparseHeaders(final String sparseMap)
441        throws IOException {
442        final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
443        final String[] sparseHeaderStrings = sparseMap.split(",");
444        if (sparseHeaderStrings.length % 2 == 1) {
445            throw new IOException("Corrupted TAR archive. Bad format in GNU.sparse.map PAX Header");
446        }
447
448        for (int i = 0; i < sparseHeaderStrings.length; i += 2) {
449            long sparseOffset;
450            try {
451                sparseOffset = Long.parseLong(sparseHeaderStrings[i]);
452            } catch (final NumberFormatException ex) {
453                throw new IOException("Corrupted TAR archive."
454                    + " Sparse struct offset contains a non-numeric value");
455            }
456            if (sparseOffset < 0) {
457                throw new IOException("Corrupted TAR archive."
458                    + " Sparse struct offset contains negative value");
459            }
460            long sparseNumbytes;
461            try {
462                sparseNumbytes = Long.parseLong(sparseHeaderStrings[i + 1]);
463            } catch (final NumberFormatException ex) {
464                throw new IOException("Corrupted TAR archive."
465                    + " Sparse struct numbytes contains a non-numeric value");
466            }
467            if (sparseNumbytes < 0) {
468                throw new IOException("Corrupted TAR archive."
469                    + " Sparse struct numbytes contains negative value");
470            }
471            sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes));
472        }
473
474        return Collections.unmodifiableList(sparseHeaders);
475    }
476
477    /**
478     * Parses an entry name from a buffer.
479     * Parsing stops when a NUL is found
480     * or the buffer length is reached.
481     *
482     * @param buffer The buffer from which to parse.
483     * @param offset The offset into the buffer from which to parse.
484     * @param length The maximum number of bytes to parse.
485     * @return The entry name.
486     */
487    public static String parseName(final byte[] buffer, final int offset, final int length) {
488        try {
489            return parseName(buffer, offset, length, DEFAULT_ENCODING);
490        } catch (final IOException ex) { // NOSONAR
491            try {
492                return parseName(buffer, offset, length, FALLBACK_ENCODING);
493            } catch (final IOException ex2) {
494                // impossible
495                throw new UncheckedIOException(ex2); //NOSONAR
496            }
497        }
498    }
499
500    /**
501     * Parses an entry name from a buffer.
502     * Parsing stops when a NUL is found
503     * or the buffer length is reached.
504     *
505     * @param buffer The buffer from which to parse.
506     * @param offset The offset into the buffer from which to parse.
507     * @param length The maximum number of bytes to parse.
508     * @param encoding name of the encoding to use for file names
509     * @since 1.4
510     * @return The entry name.
511     * @throws IOException on error
512     */
513    public static String parseName(final byte[] buffer, final int offset,
514                                   final int length,
515                                   final ZipEncoding encoding)
516        throws IOException {
517
518        int len = 0;
519        for (int i = offset; len < length && buffer[i] != 0; i++) {
520            len++;
521        }
522        if (len > 0) {
523            final byte[] b = new byte[len];
524            System.arraycopy(buffer, offset, b, 0, len);
525            return encoding.decode(b);
526        }
527        return "";
528    }
529
530    /**
531     * Parses an octal string from a buffer.
532     *
533     * <p>Leading spaces are ignored.
534     * The buffer must contain a trailing space or NUL,
535     * and may contain an additional trailing space or NUL.</p>
536     *
537     * <p>The input buffer is allowed to contain all NULs,
538     * in which case the method returns 0L
539     * (this allows for missing fields).</p>
540     *
541     * <p>To work-around some tar implementations that insert a
542     * leading NUL this method returns 0 if it detects a leading NUL
543     * since Commons Compress 1.4.</p>
544     *
545     * @param buffer The buffer from which to parse.
546     * @param offset The offset into the buffer from which to parse.
547     * @param length The maximum number of bytes to parse - must be at least 2 bytes.
548     * @return The long value of the octal string.
549     * @throws IllegalArgumentException if the trailing space/NUL is missing or if an invalid byte is detected.
550     */
551    public static long parseOctal(final byte[] buffer, final int offset, final int length) {
552        long result = 0;
553        int end = offset + length;
554        int start = offset;
555
556        if (length < 2) {
557            throw new IllegalArgumentException("Length " + length + " must be at least 2");
558        }
559
560        if (buffer[start] == 0) {
561            return 0L;
562        }
563
564        // Skip leading spaces
565        while (start < end) {
566            if (buffer[start] != ' ') {
567                break;
568            }
569            start++;
570        }
571
572        // Trim all trailing NULs and spaces.
573        // The ustar and POSIX tar specs require a trailing NUL or
574        // space but some implementations use the extra digit for big
575        // sizes/uids/gids ...
576        byte trailer = buffer[end - 1];
577        while (start < end && (trailer == 0 || trailer == ' ')) {
578            end--;
579            trailer = buffer[end - 1];
580        }
581
582        for (; start < end; start++) {
583            final byte currentByte = buffer[start];
584            // CheckStyle:MagicNumber OFF
585            if (currentByte < '0' || currentByte > '7') {
586                throw new IllegalArgumentException(exceptionMessage(buffer, offset, length, start, currentByte));
587            }
588            result = (result << 3) + (currentByte - '0'); // convert from ASCII
589            // CheckStyle:MagicNumber ON
590        }
591
592        return result;
593    }
594
595    /**
596     * Computes the value contained in a byte buffer.  If the most
597     * significant bit of the first byte in the buffer is set, this
598     * bit is ignored and the rest of the buffer is interpreted as a
599     * binary number.  Otherwise, the buffer is interpreted as an
600     * octal number as per the parseOctal function above.
601     *
602     * @param buffer The buffer from which to parse.
603     * @param offset The offset into the buffer from which to parse.
604     * @param length The maximum number of bytes to parse.
605     * @return The long value of the octal or binary string.
606     * @throws IllegalArgumentException if the trailing space/NUL is
607     * missing or an invalid byte is detected in an octal number, or
608     * if a binary number would exceed the size of a signed long
609     * 64-bit integer.
610     * @since 1.4
611     */
612    public static long parseOctalOrBinary(final byte[] buffer, final int offset,
613                                          final int length) {
614
615        if ((buffer[offset] & 0x80) == 0) {
616            return parseOctal(buffer, offset, length);
617        }
618        final boolean negative = buffer[offset] == (byte) 0xff;
619        if (length < 9) {
620            return parseBinaryLong(buffer, offset, length, negative);
621        }
622        return parseBinaryBigInteger(buffer, offset, length, negative);
623    }
624
625    /**
626     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
627     *
628     * <p>
629     * <em>GNU.sparse.map</em>:
630     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
631     * </p>
632     * <p>
633     * Will internally invoke {@link #parseFromPAX01SparseHeaders} and map IOExceptions to a RzuntimeException, You
634     * should use {@link #parseFromPAX01SparseHeaders} directly instead.
635     * </p>
636     *
637     * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
638     * @return sparse headers parsed from sparse map
639     * @deprecated use #parseFromPAX01SparseHeaders instead
640     */
641    @Deprecated
642    protected static List<TarArchiveStructSparse> parsePAX01SparseHeaders(final String sparseMap) {
643        try {
644            return parseFromPAX01SparseHeaders(sparseMap);
645        } catch (final IOException ex) {
646            throw new UncheckedIOException(ex.getMessage(), ex);
647        }
648    }
649
650    /**
651     * For PAX Format 1.X:
652     * The sparse map itself is stored in the file data block, preceding the actual file data.
653     * It consists of a series of decimal numbers delimited by newlines. The map is padded with nulls to the nearest block boundary.
654     * The first number gives the number of entries in the map. Following are map entries, each one consisting of two numbers
655     * giving the offset and size of the data block it describes.
656     * @param inputStream parsing source.
657     * @param recordSize The size the TAR header
658     * @return sparse headers
659     * @throws IOException if an I/O error occurs.
660     */
661    protected static List<TarArchiveStructSparse> parsePAX1XSparseHeaders(final InputStream inputStream, final int recordSize) throws IOException {
662        // for 1.X PAX Headers
663        final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
664        long bytesRead = 0;
665
666        long[] readResult = readLineOfNumberForPax1X(inputStream);
667        long sparseHeadersCount = readResult[0];
668        if (sparseHeadersCount < 0) {
669            // overflow while reading number?
670            throw new IOException("Corrupted TAR archive. Negative value in sparse headers block");
671        }
672        bytesRead += readResult[1];
673        while (sparseHeadersCount-- > 0) {
674            readResult = readLineOfNumberForPax1X(inputStream);
675            final long sparseOffset = readResult[0];
676            if (sparseOffset < 0) {
677                throw new IOException("Corrupted TAR archive."
678                    + " Sparse header block offset contains negative value");
679            }
680            bytesRead += readResult[1];
681
682            readResult = readLineOfNumberForPax1X(inputStream);
683            final long sparseNumbytes = readResult[0];
684            if (sparseNumbytes < 0) {
685                throw new IOException("Corrupted TAR archive."
686                    + " Sparse header block numbytes contains negative value");
687            }
688            bytesRead += readResult[1];
689            sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes));
690        }
691
692        // skip the rest of this record data
693        final long bytesToSkip = recordSize - bytesRead % recordSize;
694        IOUtils.skip(inputStream, bytesToSkip);
695        return sparseHeaders;
696    }
697
698    /**
699     * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes)
700     * may appear multi times, and they look like:
701     * <pre>
702     * GNU.sparse.size=size
703     * GNU.sparse.numblocks=numblocks
704     * repeat numblocks times
705     *   GNU.sparse.offset=offset
706     *   GNU.sparse.numbytes=numbytes
707     * end repeat
708     * </pre>
709     * <p>
710     * For PAX Format 0.1, the sparse headers are stored in a single variable: GNU.sparse.map
711     * </p>
712     * <p>
713     * <em>GNU.sparse.map</em>:
714     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
715     * </p>
716     *
717     * @param inputStream input stream to read keys and values
718     * @param sparseHeaders used in PAX Format 0.0 &amp; 0.1, as it may appear multiple times,
719     *                      the sparse headers need to be stored in an array, not a map
720     * @param globalPaxHeaders global PAX headers of the tar archive
721     * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry.
722     * @throws IOException if an I/O error occurs.
723     * @deprecated use the four-arg version instead
724     */
725    @Deprecated
726    protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders, final Map<String, String> globalPaxHeaders)
727            throws IOException {
728        return parsePaxHeaders(inputStream, sparseHeaders, globalPaxHeaders, -1);
729    }
730
731    /**
732     * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes)
733     * may appear multi times, and they look like:
734     * <pre>
735     * GNU.sparse.size=size
736     * GNU.sparse.numblocks=numblocks
737     * repeat numblocks times
738     *   GNU.sparse.offset=offset
739     *   GNU.sparse.numbytes=numbytes
740     * end repeat
741     * </pre>
742     * <p>
743     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
744     * </p>
745     * <p>
746     * <em>GNU.sparse.map</em>:
747     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
748     * </p>
749     *
750     * @param inputStream input stream to read keys and values
751     * @param sparseHeaders used in PAX Format 0.0 &amp; 0.1, as it may appear multiple times,
752     *                      the sparse headers need to be stored in an array, not a map
753     * @param globalPaxHeaders global PAX headers of the tar archive
754     * @param headerSize total size of the PAX header, will be ignored if negative
755     * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry.
756     * @throws IOException if an I/O error occurs.
757     * @since 1.21
758     */
759    protected static Map<String, String> parsePaxHeaders(final InputStream inputStream,
760            final List<TarArchiveStructSparse> sparseHeaders, final Map<String, String> globalPaxHeaders,
761            final long headerSize) throws IOException {
762        final Map<String, String> headers = new HashMap<>(globalPaxHeaders);
763        Long offset = null;
764        // Format is "length keyword=value\n";
765        int totalRead = 0;
766        while(true) { // get length
767            int ch;
768            int len = 0;
769            int read = 0;
770            while((ch = inputStream.read()) != -1) {
771                read++;
772                totalRead++;
773                if (ch == '\n') { // blank line in header
774                    break;
775                }
776                if (ch == ' '){ // End of length string
777                    // Get keyword
778                    final ByteArrayOutputStream coll = new ByteArrayOutputStream();
779                    while((ch = inputStream.read()) != -1) {
780                        read++;
781                        totalRead++;
782                        if (totalRead < 0 || headerSize >= 0 && totalRead >= headerSize) {
783                            break;
784                        }
785                        if (ch == '='){ // end of keyword
786                            final String keyword = coll.toString(CharsetNames.UTF_8);
787                            // Get rest of entry
788                            final int restLen = len - read;
789                            if (restLen <= 1) { // only NL
790                                headers.remove(keyword);
791                            } else if (headerSize >= 0 && restLen > headerSize - totalRead) {
792                                throw new IOException("Paxheader value size " + restLen
793                                    + " exceeds size of header record");
794                            } else {
795                                final byte[] rest = IOUtils.readRange(inputStream, restLen);
796                                final int got = rest.length;
797                                if (got != restLen) {
798                                    throw new IOException("Failed to read "
799                                            + "Paxheader. Expected "
800                                            + restLen
801                                            + " bytes, read "
802                                            + got);
803                                }
804                                totalRead += restLen;
805                                // Drop trailing NL
806                                if (rest[restLen - 1] != '\n') {
807                                    throw new IOException("Failed to read Paxheader."
808                                       + "Value should end with a newline");
809                                }
810                                final String value = new String(rest, 0, restLen - 1, StandardCharsets.UTF_8);
811                                headers.put(keyword, value);
812
813                                // for 0.0 PAX Headers
814                                if (keyword.equals(TarGnuSparseKeys.OFFSET)) {
815                                    if (offset != null) {
816                                        // previous GNU.sparse.offset header but no numBytes
817                                        sparseHeaders.add(new TarArchiveStructSparse(offset, 0));
818                                    }
819                                    try {
820                                        offset = Long.valueOf(value);
821                                    } catch (final NumberFormatException ex) {
822                                        throw new IOException("Failed to read Paxheader."
823                                            + TarGnuSparseKeys.OFFSET + " contains a non-numeric value");
824                                    }
825                                    if (offset < 0) {
826                                        throw new IOException("Failed to read Paxheader."
827                                            + TarGnuSparseKeys.OFFSET + " contains negative value");
828                                    }
829                                }
830
831                                // for 0.0 PAX Headers
832                                if (keyword.equals(TarGnuSparseKeys.NUMBYTES)) {
833                                    if (offset == null) {
834                                        throw new IOException("Failed to read Paxheader."
835                                                + TarGnuSparseKeys.OFFSET + " is expected before GNU.sparse.numbytes shows up.");
836                                    }
837                                    long numbytes;
838                                    try {
839                                        numbytes = Long.parseLong(value);
840                                    } catch (final NumberFormatException ex) {
841                                        throw new IOException("Failed to read Paxheader."
842                                            + TarGnuSparseKeys.NUMBYTES + " contains a non-numeric value.");
843                                    }
844                                    if (numbytes < 0) {
845                                        throw new IOException("Failed to read Paxheader."
846                                            + TarGnuSparseKeys.NUMBYTES + " contains negative value");
847                                    }
848                                    sparseHeaders.add(new TarArchiveStructSparse(offset, numbytes));
849                                    offset = null;
850                                }
851                            }
852                            break;
853                        }
854                        coll.write((byte) ch);
855                    }
856                    break; // Processed single header
857                }
858
859                // COMPRESS-530 : throw if we encounter a non-number while reading length
860                if (ch < '0' || ch > '9') {
861                    throw new IOException("Failed to read Paxheader. Encountered a non-number while reading length");
862                }
863
864                len *= 10;
865                len += ch - '0';
866            }
867            if (ch == -1){ // EOF
868                break;
869            }
870        }
871        if (offset != null) {
872            // offset but no numBytes
873            sparseHeaders.add(new TarArchiveStructSparse(offset, 0));
874        }
875        return headers;
876    }
877
878    /**
879     * Parses the content of a PAX 1.0 sparse block.
880     * @since 1.20
881     * @param buffer The buffer from which to parse.
882     * @param offset The offset into the buffer from which to parse.
883     * @return a parsed sparse struct
884     */
885    public static TarArchiveStructSparse parseSparse(final byte[] buffer, final int offset) {
886        final long sparseOffset = parseOctalOrBinary(buffer, offset, TarConstants.SPARSE_OFFSET_LEN);
887        final long sparseNumbytes = parseOctalOrBinary(buffer, offset + TarConstants.SPARSE_OFFSET_LEN, TarConstants.SPARSE_NUMBYTES_LEN);
888
889        return new TarArchiveStructSparse(sparseOffset, sparseNumbytes);
890    }
891
892    /**
893     * For 1.X PAX Format, the sparse headers are stored in the file data block, preceding the actual file data.
894     * It consists of a series of decimal numbers delimited by newlines.
895     *
896     * @param inputStream the input stream of the tar file
897     * @return the decimal number delimited by '\n', and the bytes read from input stream
898     * @throws IOException
899     */
900    private static long[] readLineOfNumberForPax1X(final InputStream inputStream) throws IOException {
901        int number;
902        long result = 0;
903        long bytesRead = 0;
904
905        while ((number = inputStream.read()) != '\n') {
906            bytesRead += 1;
907            if (number == -1) {
908                throw new IOException("Unexpected EOF when reading parse information of 1.X PAX format");
909            }
910            if (number < '0' || number > '9') {
911                throw new IOException("Corrupted TAR archive. Non-numeric value in sparse headers block");
912            }
913            result = result * 10 + (number - '0');
914        }
915        bytesRead += 1;
916
917        return new long[]{result, bytesRead};
918    }
919
920    /**
921     * @since 1.21
922     */
923    static List<TarArchiveStructSparse> readSparseStructs(final byte[] buffer, final int offset, final int entries)
924        throws IOException {
925        final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
926        for (int i = 0; i < entries; i++) {
927            try {
928                final TarArchiveStructSparse sparseHeader =
929                    parseSparse(buffer, offset + i * (TarConstants.SPARSE_OFFSET_LEN + TarConstants.SPARSE_NUMBYTES_LEN));
930
931                if (sparseHeader.getOffset() < 0) {
932                    throw new IOException("Corrupted TAR archive, sparse entry with negative offset");
933                }
934                if (sparseHeader.getNumbytes() < 0) {
935                    throw new IOException("Corrupted TAR archive, sparse entry with negative numbytes");
936                }
937                sparseHeaders.add(sparseHeader);
938            } catch (final IllegalArgumentException ex) {
939                // thrown internally by parseOctalOrBinary
940                throw new IOException("Corrupted TAR archive, sparse entry is invalid", ex);
941            }
942        }
943        return Collections.unmodifiableList(sparseHeaders);
944    }
945
946    /**
947     * Wikipedia <a href="https://en.wikipedia.org/wiki/Tar_(computing)#File_header">says</a>:
948     * <blockquote>
949     * The checksum is calculated by taking the sum of the unsigned byte values
950     * of the header block with the eight checksum bytes taken to be ASCII
951     * spaces (decimal value 32). It is stored as a six digit octal number with
952     * leading zeroes followed by a NUL and then a space. Various
953     * implementations do not adhere to this format. For better compatibility,
954     * ignore leading and trailing whitespace, and get the first six digits. In
955     * addition, some historic tar implementations treated bytes as signed.
956     * Implementations typically calculate the checksum both ways, and treat it
957     * as good if either the signed or unsigned sum matches the included
958     * checksum.
959     * </blockquote>
960     * <p>
961     * The return value of this method should be treated as a best-effort
962     * heuristic rather than an absolute and final truth. The checksum
963     * verification logic may well evolve over time as more special cases
964     * are encountered.
965     * </p>
966     *
967     * @param header tar header
968     * @return whether the checksum is reasonably good
969     * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a>
970     * @since 1.5
971     */
972    public static boolean verifyCheckSum(final byte[] header) {
973        final long storedSum = parseOctal(header, TarConstants.CHKSUM_OFFSET, TarConstants.CHKSUMLEN);
974        long unsignedSum = 0;
975        long signedSum = 0;
976
977        for (int i = 0; i < header.length; i++) {
978            byte b = header[i];
979            if (TarConstants.CHKSUM_OFFSET <= i && i < TarConstants.CHKSUM_OFFSET + TarConstants.CHKSUMLEN) {
980                b = ' ';
981            }
982            unsignedSum += 0xff & b;
983            signedSum += b;
984        }
985        return storedSum == unsignedSum || storedSum == signedSum;
986    }
987
988    /** Prevents instantiation. */
989    private TarUtils(){
990    }
991
992}