001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.tar; 020 021import java.io.ByteArrayOutputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.io.UncheckedIOException; 025import java.math.BigInteger; 026import java.nio.ByteBuffer; 027import java.nio.charset.Charset; 028import java.nio.charset.StandardCharsets; 029import java.util.ArrayList; 030import java.util.Collections; 031import java.util.HashMap; 032import java.util.List; 033import java.util.Map; 034 035import org.apache.commons.compress.archivers.zip.ZipEncoding; 036import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 037import org.apache.commons.compress.utils.CharsetNames; 038import org.apache.commons.compress.utils.IOUtils; 039 040/** 041 * This class provides static utility methods to work with byte streams. 042 * 043 * @Immutable 044 */ 045// CheckStyle:HideUtilityClassConstructorCheck OFF (bc) 046public class TarUtils { 047 048 private static final int BYTE_MASK = 255; 049 050 static final ZipEncoding DEFAULT_ENCODING = ZipEncodingHelper.getZipEncoding(null); 051 052 /** 053 * Encapsulates the algorithms used up to Commons Compress 1.3 as 054 * ZipEncoding. 055 */ 056 static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() { 057 @Override 058 public boolean canEncode(final String name) { 059 return true; 060 } 061 062 @Override 063 public String decode(final byte[] buffer) { 064 final int length = buffer.length; 065 final StringBuilder result = new StringBuilder(length); 066 067 for (final byte b : buffer) { 068 if (b == 0) { // Trailing null 069 break; 070 } 071 result.append((char) (b & 0xFF)); // Allow for sign-extension 072 } 073 074 return result.toString(); 075 } 076 077 @Override 078 public ByteBuffer encode(final String name) { 079 final int length = name.length(); 080 final byte[] buf = new byte[length]; 081 082 // copy until end of input or output is reached. 083 for (int i = 0; i < length; ++i) { 084 buf[i] = (byte) name.charAt(i); 085 } 086 return ByteBuffer.wrap(buf); 087 } 088 }; 089 090 /** 091 * Computes the checksum of a tar entry header. 092 * 093 * @param buf The tar entry's header buffer. 094 * @return The computed checksum. 095 */ 096 public static long computeCheckSum(final byte[] buf) { 097 long sum = 0; 098 099 for (final byte element : buf) { 100 sum += BYTE_MASK & element; 101 } 102 103 return sum; 104 } 105 106 // Helper method to generate the exception message 107 private static String exceptionMessage(final byte[] buffer, final int offset, 108 final int length, final int current, final byte currentByte) { 109 // default charset is good enough for an exception message, 110 // 111 // the alternative was to modify parseOctal and 112 // parseOctalOrBinary to receive the ZipEncoding of the 113 // archive (deprecating the existing public methods, of 114 // course) and dealing with the fact that ZipEncoding#decode 115 // can throw an IOException which parseOctal* doesn't declare 116 String string = new String(buffer, offset, length, Charset.defaultCharset()); 117 118 string = string.replace("\0", "{NUL}"); // Replace NULs to allow string to be printed 119 return "Invalid byte " + currentByte + " at offset " + (current - offset) + " in '" + string + "' len=" + length; 120 } 121 122 private static void formatBigIntegerBinary(final long value, final byte[] buf, 123 final int offset, 124 final int length, 125 final boolean negative) { 126 final BigInteger val = BigInteger.valueOf(value); 127 final byte[] b = val.toByteArray(); 128 final int len = b.length; 129 if (len > length - 1) { 130 throw new IllegalArgumentException("Value " + value + 131 " is too large for " + length + " byte field."); 132 } 133 final int off = offset + length - len; 134 System.arraycopy(b, 0, buf, off, len); 135 final byte fill = (byte) (negative ? 0xff : 0); 136 for (int i = offset + 1; i < off; i++) { 137 buf[i] = fill; 138 } 139 } 140 141 /** 142 * Writes an octal value into a buffer. 143 * 144 * Uses {@link #formatUnsignedOctalString} to format 145 * the value as an octal string with leading zeros. 146 * The converted number is followed by NUL and then space. 147 * 148 * @param value The value to convert 149 * @param buf The destination buffer 150 * @param offset The starting offset into the buffer. 151 * @param length The size of the buffer. 152 * @return The updated value of offset, i.e. offset+length 153 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 154 */ 155 public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 156 157 int idx=length-2; // for NUL and space 158 formatUnsignedOctalString(value, buf, offset, idx); 159 160 buf[offset + idx++] = 0; // Trailing null 161 buf[offset + idx] = (byte) ' '; // Trailing space 162 163 return offset + length; 164 } 165 166 private static void formatLongBinary(final long value, final byte[] buf, 167 final int offset, final int length, 168 final boolean negative) { 169 final int bits = (length - 1) * 8; 170 final long max = 1L << bits; 171 long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE 172 if (val < 0 || val >= max) { 173 throw new IllegalArgumentException("Value " + value + 174 " is too large for " + length + " byte field."); 175 } 176 if (negative) { 177 val ^= max - 1; 178 val++; 179 val |= 0xffL << bits; 180 } 181 for (int i = offset + length - 1; i >= offset; i--) { 182 buf[i] = (byte) val; 183 val >>= 8; 184 } 185 } 186 187 /** 188 * Writes an octal long integer into a buffer. 189 * 190 * Uses {@link #formatUnsignedOctalString} to format 191 * the value as an octal string with leading zeros. 192 * The converted number is followed by a space. 193 * 194 * @param value The value to write as octal 195 * @param buf The destinationbuffer. 196 * @param offset The starting offset into the buffer. 197 * @param length The length of the buffer 198 * @return The updated offset 199 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 200 */ 201 public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 202 203 final int idx=length-1; // For space 204 205 formatUnsignedOctalString(value, buf, offset, idx); 206 buf[offset + idx] = (byte) ' '; // Trailing space 207 208 return offset + length; 209 } 210 211 /** 212 * Writes a long integer into a buffer as an octal string if this 213 * will fit, or as a binary number otherwise. 214 * 215 * Uses {@link #formatUnsignedOctalString} to format 216 * the value as an octal string with leading zeros. 217 * The converted number is followed by a space. 218 * 219 * @param value The value to write into the buffer. 220 * @param buf The destination buffer. 221 * @param offset The starting offset into the buffer. 222 * @param length The length of the buffer. 223 * @return The updated offset. 224 * @throws IllegalArgumentException if the value (and trailer) 225 * will not fit in the buffer. 226 * @since 1.4 227 */ 228 public static int formatLongOctalOrBinaryBytes( 229 final long value, final byte[] buf, final int offset, final int length) { 230 231 // Check whether we are dealing with UID/GID or SIZE field 232 final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE; 233 234 final boolean negative = value < 0; 235 if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars 236 return formatLongOctalBytes(value, buf, offset, length); 237 } 238 239 if (length < 9) { 240 formatLongBinary(value, buf, offset, length, negative); 241 } else { 242 formatBigIntegerBinary(value, buf, offset, length, negative); 243 } 244 245 buf[offset] = (byte) (negative ? 0xff : 0x80); 246 return offset + length; 247 } 248 249 /** 250 * Copies a name into a buffer. 251 * Copies characters from the name into the buffer 252 * starting at the specified offset. 253 * If the buffer is longer than the name, the buffer 254 * is filled with trailing NULs. 255 * If the name is longer than the buffer, 256 * the output is truncated. 257 * 258 * @param name The header name from which to copy the characters. 259 * @param buf The buffer where the name is to be stored. 260 * @param offset The starting offset into the buffer 261 * @param length The maximum number of header bytes to copy. 262 * @return The updated offset, i.e. offset + length 263 */ 264 public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) { 265 try { 266 return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING); 267 } catch (final IOException ex) { // NOSONAR 268 try { 269 return formatNameBytes(name, buf, offset, length, 270 FALLBACK_ENCODING); 271 } catch (final IOException ex2) { 272 // impossible 273 throw new UncheckedIOException(ex2); //NOSONAR 274 } 275 } 276 } 277 278 /** 279 * Copies a name into a buffer. 280 * Copies characters from the name into the buffer 281 * starting at the specified offset. 282 * If the buffer is longer than the name, the buffer 283 * is filled with trailing NULs. 284 * If the name is longer than the buffer, 285 * the output is truncated. 286 * 287 * @param name The header name from which to copy the characters. 288 * @param buf The buffer where the name is to be stored. 289 * @param offset The starting offset into the buffer 290 * @param length The maximum number of header bytes to copy. 291 * @param encoding name of the encoding to use for file names 292 * @since 1.4 293 * @return The updated offset, i.e. offset + length 294 * @throws IOException on error 295 */ 296 public static int formatNameBytes(final String name, final byte[] buf, final int offset, 297 final int length, 298 final ZipEncoding encoding) 299 throws IOException { 300 int len = name.length(); 301 ByteBuffer b = encoding.encode(name); 302 while (b.limit() > length && len > 0) { 303 b = encoding.encode(name.substring(0, --len)); 304 } 305 final int limit = b.limit() - b.position(); 306 System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit); 307 308 // Pad any remaining output bytes with NUL 309 for (int i = limit; i < length; ++i) { 310 buf[offset + i] = 0; 311 } 312 313 return offset + length; 314 } 315 316 /** 317 * Writes an octal integer into a buffer. 318 * 319 * Uses {@link #formatUnsignedOctalString} to format 320 * the value as an octal string with leading zeros. 321 * The converted number is followed by space and NUL 322 * 323 * @param value The value to write 324 * @param buf The buffer to receive the output 325 * @param offset The starting offset into the buffer 326 * @param length The size of the output buffer 327 * @return The updated offset, i.e. offset+length 328 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 329 */ 330 public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 331 332 int idx=length-2; // For space and trailing null 333 formatUnsignedOctalString(value, buf, offset, idx); 334 335 buf[offset + idx++] = (byte) ' '; // Trailing space 336 buf[offset + idx] = 0; // Trailing null 337 338 return offset + length; 339 } 340 341 /** 342 * Fills a buffer with unsigned octal number, padded with leading zeroes. 343 * 344 * @param value number to convert to octal - treated as unsigned 345 * @param buffer destination buffer 346 * @param offset starting offset in buffer 347 * @param length length of buffer to fill 348 * @throws IllegalArgumentException if the value will not fit in the buffer 349 */ 350 public static void formatUnsignedOctalString(final long value, final byte[] buffer, 351 final int offset, final int length) { 352 int remaining = length; 353 remaining--; 354 if (value == 0) { 355 buffer[offset + remaining--] = (byte) '0'; 356 } else { 357 long val = value; 358 for (; remaining >= 0 && val != 0; --remaining) { 359 // CheckStyle:MagicNumber OFF 360 buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7)); 361 val = val >>> 3; 362 // CheckStyle:MagicNumber ON 363 } 364 if (val != 0){ 365 throw new IllegalArgumentException 366 (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length); 367 } 368 } 369 370 for (; remaining >= 0; --remaining) { // leading zeros 371 buffer[offset + remaining] = (byte) '0'; 372 } 373 } 374 375 private static long parseBinaryBigInteger(final byte[] buffer, 376 final int offset, 377 final int length, 378 final boolean negative) { 379 final byte[] remainder = new byte[length - 1]; 380 System.arraycopy(buffer, offset + 1, remainder, 0, length - 1); 381 BigInteger val = new BigInteger(remainder); 382 if (negative) { 383 // 2's complement 384 val = val.add(BigInteger.valueOf(-1)).not(); 385 } 386 if (val.bitLength() > 63) { 387 throw new IllegalArgumentException("At offset " + offset + ", " 388 + length + " byte binary number" 389 + " exceeds maximum signed long" 390 + " value"); 391 } 392 return negative ? -val.longValue() : val.longValue(); 393 } 394 395 private static long parseBinaryLong(final byte[] buffer, final int offset, 396 final int length, 397 final boolean negative) { 398 if (length >= 9) { 399 throw new IllegalArgumentException("At offset " + offset + ", " 400 + length + " byte binary number" 401 + " exceeds maximum signed long" 402 + " value"); 403 } 404 long val = 0; 405 for (int i = 1; i < length; i++) { 406 val = (val << 8) + (buffer[offset + i] & 0xff); 407 } 408 if (negative) { 409 // 2's complement 410 val--; 411 val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1; 412 } 413 return negative ? -val : val; 414 } 415 416 /** 417 * Parses a boolean byte from a buffer. 418 * Leading spaces and NUL are ignored. 419 * The buffer may contain trailing spaces or NULs. 420 * 421 * @param buffer The buffer from which to parse. 422 * @param offset The offset into the buffer from which to parse. 423 * @return The boolean value of the bytes. 424 * @throws IllegalArgumentException if an invalid byte is detected. 425 */ 426 public static boolean parseBoolean(final byte[] buffer, final int offset) { 427 return buffer[offset] == 1; 428 } 429 430 /** 431 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 432 * GNU.sparse.map 433 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 434 * 435 * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 436 * @return unmodifiable list of sparse headers parsed from sparse map 437 * @throws IOException Corrupted TAR archive. 438 * @since 1.21 439 */ 440 protected static List<TarArchiveStructSparse> parseFromPAX01SparseHeaders(final String sparseMap) 441 throws IOException { 442 final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 443 final String[] sparseHeaderStrings = sparseMap.split(","); 444 if (sparseHeaderStrings.length % 2 == 1) { 445 throw new IOException("Corrupted TAR archive. Bad format in GNU.sparse.map PAX Header"); 446 } 447 448 for (int i = 0; i < sparseHeaderStrings.length; i += 2) { 449 long sparseOffset; 450 try { 451 sparseOffset = Long.parseLong(sparseHeaderStrings[i]); 452 } catch (final NumberFormatException ex) { 453 throw new IOException("Corrupted TAR archive." 454 + " Sparse struct offset contains a non-numeric value"); 455 } 456 if (sparseOffset < 0) { 457 throw new IOException("Corrupted TAR archive." 458 + " Sparse struct offset contains negative value"); 459 } 460 long sparseNumbytes; 461 try { 462 sparseNumbytes = Long.parseLong(sparseHeaderStrings[i + 1]); 463 } catch (final NumberFormatException ex) { 464 throw new IOException("Corrupted TAR archive." 465 + " Sparse struct numbytes contains a non-numeric value"); 466 } 467 if (sparseNumbytes < 0) { 468 throw new IOException("Corrupted TAR archive." 469 + " Sparse struct numbytes contains negative value"); 470 } 471 sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes)); 472 } 473 474 return Collections.unmodifiableList(sparseHeaders); 475 } 476 477 /** 478 * Parses an entry name from a buffer. 479 * Parsing stops when a NUL is found 480 * or the buffer length is reached. 481 * 482 * @param buffer The buffer from which to parse. 483 * @param offset The offset into the buffer from which to parse. 484 * @param length The maximum number of bytes to parse. 485 * @return The entry name. 486 */ 487 public static String parseName(final byte[] buffer, final int offset, final int length) { 488 try { 489 return parseName(buffer, offset, length, DEFAULT_ENCODING); 490 } catch (final IOException ex) { // NOSONAR 491 try { 492 return parseName(buffer, offset, length, FALLBACK_ENCODING); 493 } catch (final IOException ex2) { 494 // impossible 495 throw new UncheckedIOException(ex2); //NOSONAR 496 } 497 } 498 } 499 500 /** 501 * Parses an entry name from a buffer. 502 * Parsing stops when a NUL is found 503 * or the buffer length is reached. 504 * 505 * @param buffer The buffer from which to parse. 506 * @param offset The offset into the buffer from which to parse. 507 * @param length The maximum number of bytes to parse. 508 * @param encoding name of the encoding to use for file names 509 * @since 1.4 510 * @return The entry name. 511 * @throws IOException on error 512 */ 513 public static String parseName(final byte[] buffer, final int offset, 514 final int length, 515 final ZipEncoding encoding) 516 throws IOException { 517 518 int len = 0; 519 for (int i = offset; len < length && buffer[i] != 0; i++) { 520 len++; 521 } 522 if (len > 0) { 523 final byte[] b = new byte[len]; 524 System.arraycopy(buffer, offset, b, 0, len); 525 return encoding.decode(b); 526 } 527 return ""; 528 } 529 530 /** 531 * Parses an octal string from a buffer. 532 * 533 * <p>Leading spaces are ignored. 534 * The buffer must contain a trailing space or NUL, 535 * and may contain an additional trailing space or NUL.</p> 536 * 537 * <p>The input buffer is allowed to contain all NULs, 538 * in which case the method returns 0L 539 * (this allows for missing fields).</p> 540 * 541 * <p>To work-around some tar implementations that insert a 542 * leading NUL this method returns 0 if it detects a leading NUL 543 * since Commons Compress 1.4.</p> 544 * 545 * @param buffer The buffer from which to parse. 546 * @param offset The offset into the buffer from which to parse. 547 * @param length The maximum number of bytes to parse - must be at least 2 bytes. 548 * @return The long value of the octal string. 549 * @throws IllegalArgumentException if the trailing space/NUL is missing or if an invalid byte is detected. 550 */ 551 public static long parseOctal(final byte[] buffer, final int offset, final int length) { 552 long result = 0; 553 int end = offset + length; 554 int start = offset; 555 556 if (length < 2) { 557 throw new IllegalArgumentException("Length " + length + " must be at least 2"); 558 } 559 560 if (buffer[start] == 0) { 561 return 0L; 562 } 563 564 // Skip leading spaces 565 while (start < end) { 566 if (buffer[start] != ' ') { 567 break; 568 } 569 start++; 570 } 571 572 // Trim all trailing NULs and spaces. 573 // The ustar and POSIX tar specs require a trailing NUL or 574 // space but some implementations use the extra digit for big 575 // sizes/uids/gids ... 576 byte trailer = buffer[end - 1]; 577 while (start < end && (trailer == 0 || trailer == ' ')) { 578 end--; 579 trailer = buffer[end - 1]; 580 } 581 582 for (; start < end; start++) { 583 final byte currentByte = buffer[start]; 584 // CheckStyle:MagicNumber OFF 585 if (currentByte < '0' || currentByte > '7') { 586 throw new IllegalArgumentException(exceptionMessage(buffer, offset, length, start, currentByte)); 587 } 588 result = (result << 3) + (currentByte - '0'); // convert from ASCII 589 // CheckStyle:MagicNumber ON 590 } 591 592 return result; 593 } 594 595 /** 596 * Computes the value contained in a byte buffer. If the most 597 * significant bit of the first byte in the buffer is set, this 598 * bit is ignored and the rest of the buffer is interpreted as a 599 * binary number. Otherwise, the buffer is interpreted as an 600 * octal number as per the parseOctal function above. 601 * 602 * @param buffer The buffer from which to parse. 603 * @param offset The offset into the buffer from which to parse. 604 * @param length The maximum number of bytes to parse. 605 * @return The long value of the octal or binary string. 606 * @throws IllegalArgumentException if the trailing space/NUL is 607 * missing or an invalid byte is detected in an octal number, or 608 * if a binary number would exceed the size of a signed long 609 * 64-bit integer. 610 * @since 1.4 611 */ 612 public static long parseOctalOrBinary(final byte[] buffer, final int offset, 613 final int length) { 614 615 if ((buffer[offset] & 0x80) == 0) { 616 return parseOctal(buffer, offset, length); 617 } 618 final boolean negative = buffer[offset] == (byte) 0xff; 619 if (length < 9) { 620 return parseBinaryLong(buffer, offset, length, negative); 621 } 622 return parseBinaryBigInteger(buffer, offset, length, negative); 623 } 624 625 /** 626 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 627 * 628 * <p> 629 * <em>GNU.sparse.map</em>: 630 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 631 * </p> 632 * <p> 633 * Will internally invoke {@link #parseFromPAX01SparseHeaders} and map IOExceptions to a RzuntimeException, You 634 * should use {@link #parseFromPAX01SparseHeaders} directly instead. 635 * </p> 636 * 637 * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 638 * @return sparse headers parsed from sparse map 639 * @deprecated use #parseFromPAX01SparseHeaders instead 640 */ 641 @Deprecated 642 protected static List<TarArchiveStructSparse> parsePAX01SparseHeaders(final String sparseMap) { 643 try { 644 return parseFromPAX01SparseHeaders(sparseMap); 645 } catch (final IOException ex) { 646 throw new UncheckedIOException(ex.getMessage(), ex); 647 } 648 } 649 650 /** 651 * For PAX Format 1.X: 652 * The sparse map itself is stored in the file data block, preceding the actual file data. 653 * It consists of a series of decimal numbers delimited by newlines. The map is padded with nulls to the nearest block boundary. 654 * The first number gives the number of entries in the map. Following are map entries, each one consisting of two numbers 655 * giving the offset and size of the data block it describes. 656 * @param inputStream parsing source. 657 * @param recordSize The size the TAR header 658 * @return sparse headers 659 * @throws IOException if an I/O error occurs. 660 */ 661 protected static List<TarArchiveStructSparse> parsePAX1XSparseHeaders(final InputStream inputStream, final int recordSize) throws IOException { 662 // for 1.X PAX Headers 663 final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 664 long bytesRead = 0; 665 666 long[] readResult = readLineOfNumberForPax1X(inputStream); 667 long sparseHeadersCount = readResult[0]; 668 if (sparseHeadersCount < 0) { 669 // overflow while reading number? 670 throw new IOException("Corrupted TAR archive. Negative value in sparse headers block"); 671 } 672 bytesRead += readResult[1]; 673 while (sparseHeadersCount-- > 0) { 674 readResult = readLineOfNumberForPax1X(inputStream); 675 final long sparseOffset = readResult[0]; 676 if (sparseOffset < 0) { 677 throw new IOException("Corrupted TAR archive." 678 + " Sparse header block offset contains negative value"); 679 } 680 bytesRead += readResult[1]; 681 682 readResult = readLineOfNumberForPax1X(inputStream); 683 final long sparseNumbytes = readResult[0]; 684 if (sparseNumbytes < 0) { 685 throw new IOException("Corrupted TAR archive." 686 + " Sparse header block numbytes contains negative value"); 687 } 688 bytesRead += readResult[1]; 689 sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes)); 690 } 691 692 // skip the rest of this record data 693 final long bytesToSkip = recordSize - bytesRead % recordSize; 694 IOUtils.skip(inputStream, bytesToSkip); 695 return sparseHeaders; 696 } 697 698 /** 699 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) 700 * may appear multi times, and they look like: 701 * <pre> 702 * GNU.sparse.size=size 703 * GNU.sparse.numblocks=numblocks 704 * repeat numblocks times 705 * GNU.sparse.offset=offset 706 * GNU.sparse.numbytes=numbytes 707 * end repeat 708 * </pre> 709 * <p> 710 * For PAX Format 0.1, the sparse headers are stored in a single variable: GNU.sparse.map 711 * </p> 712 * <p> 713 * <em>GNU.sparse.map</em>: 714 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 715 * </p> 716 * 717 * @param inputStream input stream to read keys and values 718 * @param sparseHeaders used in PAX Format 0.0 & 0.1, as it may appear multiple times, 719 * the sparse headers need to be stored in an array, not a map 720 * @param globalPaxHeaders global PAX headers of the tar archive 721 * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry. 722 * @throws IOException if an I/O error occurs. 723 * @deprecated use the four-arg version instead 724 */ 725 @Deprecated 726 protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders, final Map<String, String> globalPaxHeaders) 727 throws IOException { 728 return parsePaxHeaders(inputStream, sparseHeaders, globalPaxHeaders, -1); 729 } 730 731 /** 732 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) 733 * may appear multi times, and they look like: 734 * <pre> 735 * GNU.sparse.size=size 736 * GNU.sparse.numblocks=numblocks 737 * repeat numblocks times 738 * GNU.sparse.offset=offset 739 * GNU.sparse.numbytes=numbytes 740 * end repeat 741 * </pre> 742 * <p> 743 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 744 * </p> 745 * <p> 746 * <em>GNU.sparse.map</em>: 747 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 748 * </p> 749 * 750 * @param inputStream input stream to read keys and values 751 * @param sparseHeaders used in PAX Format 0.0 & 0.1, as it may appear multiple times, 752 * the sparse headers need to be stored in an array, not a map 753 * @param globalPaxHeaders global PAX headers of the tar archive 754 * @param headerSize total size of the PAX header, will be ignored if negative 755 * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry. 756 * @throws IOException if an I/O error occurs. 757 * @since 1.21 758 */ 759 protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, 760 final List<TarArchiveStructSparse> sparseHeaders, final Map<String, String> globalPaxHeaders, 761 final long headerSize) throws IOException { 762 final Map<String, String> headers = new HashMap<>(globalPaxHeaders); 763 Long offset = null; 764 // Format is "length keyword=value\n"; 765 int totalRead = 0; 766 while(true) { // get length 767 int ch; 768 int len = 0; 769 int read = 0; 770 while((ch = inputStream.read()) != -1) { 771 read++; 772 totalRead++; 773 if (ch == '\n') { // blank line in header 774 break; 775 } 776 if (ch == ' '){ // End of length string 777 // Get keyword 778 final ByteArrayOutputStream coll = new ByteArrayOutputStream(); 779 while((ch = inputStream.read()) != -1) { 780 read++; 781 totalRead++; 782 if (totalRead < 0 || headerSize >= 0 && totalRead >= headerSize) { 783 break; 784 } 785 if (ch == '='){ // end of keyword 786 final String keyword = coll.toString(CharsetNames.UTF_8); 787 // Get rest of entry 788 final int restLen = len - read; 789 if (restLen <= 1) { // only NL 790 headers.remove(keyword); 791 } else if (headerSize >= 0 && restLen > headerSize - totalRead) { 792 throw new IOException("Paxheader value size " + restLen 793 + " exceeds size of header record"); 794 } else { 795 final byte[] rest = IOUtils.readRange(inputStream, restLen); 796 final int got = rest.length; 797 if (got != restLen) { 798 throw new IOException("Failed to read " 799 + "Paxheader. Expected " 800 + restLen 801 + " bytes, read " 802 + got); 803 } 804 totalRead += restLen; 805 // Drop trailing NL 806 if (rest[restLen - 1] != '\n') { 807 throw new IOException("Failed to read Paxheader." 808 + "Value should end with a newline"); 809 } 810 final String value = new String(rest, 0, restLen - 1, StandardCharsets.UTF_8); 811 headers.put(keyword, value); 812 813 // for 0.0 PAX Headers 814 if (keyword.equals(TarGnuSparseKeys.OFFSET)) { 815 if (offset != null) { 816 // previous GNU.sparse.offset header but no numBytes 817 sparseHeaders.add(new TarArchiveStructSparse(offset, 0)); 818 } 819 try { 820 offset = Long.valueOf(value); 821 } catch (final NumberFormatException ex) { 822 throw new IOException("Failed to read Paxheader." 823 + TarGnuSparseKeys.OFFSET + " contains a non-numeric value"); 824 } 825 if (offset < 0) { 826 throw new IOException("Failed to read Paxheader." 827 + TarGnuSparseKeys.OFFSET + " contains negative value"); 828 } 829 } 830 831 // for 0.0 PAX Headers 832 if (keyword.equals(TarGnuSparseKeys.NUMBYTES)) { 833 if (offset == null) { 834 throw new IOException("Failed to read Paxheader." 835 + TarGnuSparseKeys.OFFSET + " is expected before GNU.sparse.numbytes shows up."); 836 } 837 long numbytes; 838 try { 839 numbytes = Long.parseLong(value); 840 } catch (final NumberFormatException ex) { 841 throw new IOException("Failed to read Paxheader." 842 + TarGnuSparseKeys.NUMBYTES + " contains a non-numeric value."); 843 } 844 if (numbytes < 0) { 845 throw new IOException("Failed to read Paxheader." 846 + TarGnuSparseKeys.NUMBYTES + " contains negative value"); 847 } 848 sparseHeaders.add(new TarArchiveStructSparse(offset, numbytes)); 849 offset = null; 850 } 851 } 852 break; 853 } 854 coll.write((byte) ch); 855 } 856 break; // Processed single header 857 } 858 859 // COMPRESS-530 : throw if we encounter a non-number while reading length 860 if (ch < '0' || ch > '9') { 861 throw new IOException("Failed to read Paxheader. Encountered a non-number while reading length"); 862 } 863 864 len *= 10; 865 len += ch - '0'; 866 } 867 if (ch == -1){ // EOF 868 break; 869 } 870 } 871 if (offset != null) { 872 // offset but no numBytes 873 sparseHeaders.add(new TarArchiveStructSparse(offset, 0)); 874 } 875 return headers; 876 } 877 878 /** 879 * Parses the content of a PAX 1.0 sparse block. 880 * @since 1.20 881 * @param buffer The buffer from which to parse. 882 * @param offset The offset into the buffer from which to parse. 883 * @return a parsed sparse struct 884 */ 885 public static TarArchiveStructSparse parseSparse(final byte[] buffer, final int offset) { 886 final long sparseOffset = parseOctalOrBinary(buffer, offset, TarConstants.SPARSE_OFFSET_LEN); 887 final long sparseNumbytes = parseOctalOrBinary(buffer, offset + TarConstants.SPARSE_OFFSET_LEN, TarConstants.SPARSE_NUMBYTES_LEN); 888 889 return new TarArchiveStructSparse(sparseOffset, sparseNumbytes); 890 } 891 892 /** 893 * For 1.X PAX Format, the sparse headers are stored in the file data block, preceding the actual file data. 894 * It consists of a series of decimal numbers delimited by newlines. 895 * 896 * @param inputStream the input stream of the tar file 897 * @return the decimal number delimited by '\n', and the bytes read from input stream 898 * @throws IOException 899 */ 900 private static long[] readLineOfNumberForPax1X(final InputStream inputStream) throws IOException { 901 int number; 902 long result = 0; 903 long bytesRead = 0; 904 905 while ((number = inputStream.read()) != '\n') { 906 bytesRead += 1; 907 if (number == -1) { 908 throw new IOException("Unexpected EOF when reading parse information of 1.X PAX format"); 909 } 910 if (number < '0' || number > '9') { 911 throw new IOException("Corrupted TAR archive. Non-numeric value in sparse headers block"); 912 } 913 result = result * 10 + (number - '0'); 914 } 915 bytesRead += 1; 916 917 return new long[]{result, bytesRead}; 918 } 919 920 /** 921 * @since 1.21 922 */ 923 static List<TarArchiveStructSparse> readSparseStructs(final byte[] buffer, final int offset, final int entries) 924 throws IOException { 925 final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 926 for (int i = 0; i < entries; i++) { 927 try { 928 final TarArchiveStructSparse sparseHeader = 929 parseSparse(buffer, offset + i * (TarConstants.SPARSE_OFFSET_LEN + TarConstants.SPARSE_NUMBYTES_LEN)); 930 931 if (sparseHeader.getOffset() < 0) { 932 throw new IOException("Corrupted TAR archive, sparse entry with negative offset"); 933 } 934 if (sparseHeader.getNumbytes() < 0) { 935 throw new IOException("Corrupted TAR archive, sparse entry with negative numbytes"); 936 } 937 sparseHeaders.add(sparseHeader); 938 } catch (final IllegalArgumentException ex) { 939 // thrown internally by parseOctalOrBinary 940 throw new IOException("Corrupted TAR archive, sparse entry is invalid", ex); 941 } 942 } 943 return Collections.unmodifiableList(sparseHeaders); 944 } 945 946 /** 947 * Wikipedia <a href="https://en.wikipedia.org/wiki/Tar_(computing)#File_header">says</a>: 948 * <blockquote> 949 * The checksum is calculated by taking the sum of the unsigned byte values 950 * of the header block with the eight checksum bytes taken to be ASCII 951 * spaces (decimal value 32). It is stored as a six digit octal number with 952 * leading zeroes followed by a NUL and then a space. Various 953 * implementations do not adhere to this format. For better compatibility, 954 * ignore leading and trailing whitespace, and get the first six digits. In 955 * addition, some historic tar implementations treated bytes as signed. 956 * Implementations typically calculate the checksum both ways, and treat it 957 * as good if either the signed or unsigned sum matches the included 958 * checksum. 959 * </blockquote> 960 * <p> 961 * The return value of this method should be treated as a best-effort 962 * heuristic rather than an absolute and final truth. The checksum 963 * verification logic may well evolve over time as more special cases 964 * are encountered. 965 * </p> 966 * 967 * @param header tar header 968 * @return whether the checksum is reasonably good 969 * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a> 970 * @since 1.5 971 */ 972 public static boolean verifyCheckSum(final byte[] header) { 973 final long storedSum = parseOctal(header, TarConstants.CHKSUM_OFFSET, TarConstants.CHKSUMLEN); 974 long unsignedSum = 0; 975 long signedSum = 0; 976 977 for (int i = 0; i < header.length; i++) { 978 byte b = header[i]; 979 if (TarConstants.CHKSUM_OFFSET <= i && i < TarConstants.CHKSUM_OFFSET + TarConstants.CHKSUMLEN) { 980 b = ' '; 981 } 982 unsignedSum += 0xff & b; 983 signedSum += b; 984 } 985 return storedSum == unsignedSum || storedSum == signedSum; 986 } 987 988 /** Prevents instantiation. */ 989 private TarUtils(){ 990 } 991 992}