001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.bzip2; 020 021import java.io.IOException; 022import java.io.OutputStream; 023import java.util.Arrays; 024 025import org.apache.commons.compress.compressors.CompressorOutputStream; 026 027/** 028 * An output stream that compresses into the BZip2 format into another stream. 029 * 030 * <p> 031 * The compression requires large amounts of memory. Thus you should call the 032 * {@link #close() close()} method as soon as possible, to force 033 * {@code BZip2CompressorOutputStream} to release the allocated memory. 034 * </p> 035 * 036 * <p> You can shrink the amount of allocated memory and maybe raise 037 * the compression speed by choosing a lower blocksize, which in turn 038 * may cause a lower compression ratio. You can avoid unnecessary 039 * memory allocation by avoiding using a blocksize which is bigger 040 * than the size of the input. </p> 041 * 042 * <p> You can compute the memory usage for compressing by the 043 * following formula: </p> 044 * 045 * <pre> 046 * <code>400k + (9 * blocksize)</code>. 047 * </pre> 048 * 049 * <p> To get the memory required for decompression by {@link 050 * BZip2CompressorInputStream} use </p> 051 * 052 * <pre> 053 * <code>65k + (5 * blocksize)</code>. 054 * </pre> 055 * 056 * <table style="width:100%" border="1"> 057 * <caption>Memory usage by blocksize</caption> 058 * <tr> 059 * <th colspan="3">Memory usage by blocksize</th> 060 * </tr> 061 * <tr> 062 * <th style="text-align: right">Blocksize</th> <th style="text-align: right">Compression<br> 063 * memory usage</th> <th style="text-align: right">Decompression<br> 064 * memory usage</th> 065 * </tr> 066 * <tr> 067 * <td style="text-align: right">100k</td> 068 * <td style="text-align: right">1300k</td> 069 * <td style="text-align: right">565k</td> 070 * </tr> 071 * <tr> 072 * <td style="text-align: right">200k</td> 073 * <td style="text-align: right">2200k</td> 074 * <td style="text-align: right">1065k</td> 075 * </tr> 076 * <tr> 077 * <td style="text-align: right">300k</td> 078 * <td style="text-align: right">3100k</td> 079 * <td style="text-align: right">1565k</td> 080 * </tr> 081 * <tr> 082 * <td style="text-align: right">400k</td> 083 * <td style="text-align: right">4000k</td> 084 * <td style="text-align: right">2065k</td> 085 * </tr> 086 * <tr> 087 * <td style="text-align: right">500k</td> 088 * <td style="text-align: right">4900k</td> 089 * <td style="text-align: right">2565k</td> 090 * </tr> 091 * <tr> 092 * <td style="text-align: right">600k</td> 093 * <td style="text-align: right">5800k</td> 094 * <td style="text-align: right">3065k</td> 095 * </tr> 096 * <tr> 097 * <td style="text-align: right">700k</td> 098 * <td style="text-align: right">6700k</td> 099 * <td style="text-align: right">3565k</td> 100 * </tr> 101 * <tr> 102 * <td style="text-align: right">800k</td> 103 * <td style="text-align: right">7600k</td> 104 * <td style="text-align: right">4065k</td> 105 * </tr> 106 * <tr> 107 * <td style="text-align: right">900k</td> 108 * <td style="text-align: right">8500k</td> 109 * <td style="text-align: right">4565k</td> 110 * </tr> 111 * </table> 112 * 113 * <p> 114 * For decompression {@code BZip2CompressorInputStream} allocates less memory if the 115 * bzipped input is smaller than one block. 116 * </p> 117 * 118 * <p> 119 * Instances of this class are not threadsafe. 120 * </p> 121 * 122 * <p> 123 * TODO: Update to BZip2 1.0.1 124 * </p> 125 * @NotThreadSafe 126 */ 127public class BZip2CompressorOutputStream extends CompressorOutputStream 128 implements BZip2Constants { 129 130 static final class Data { 131 132 // with blockSize 900k 133 /* maps unsigned byte => "does it occur in block" */ 134 final boolean[] inUse = new boolean[256]; // 256 byte 135 final byte[] unseqToSeq = new byte[256]; // 256 byte 136 final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte 137 final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte 138 final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte 139 140 final byte[] generateMTFValues_yy = new byte[256]; // 256 byte 141 final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548 142 // byte 143 final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 144 // byte 145 final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte 146 final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte 147 final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 148 // byte 149 final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte 150 final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte 151 152 final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte 153 final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte 154 final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte 155 156 // ------------ 157 // 333408 byte 158 159 /* holds the RLEd block of original data starting at index 1. 160 * After sorting the last byte added to the buffer is at index 161 * 0. */ 162 final byte[] block; // 900021 byte 163 /* maps index in Burrows-Wheeler transformed block => index of 164 * byte in original block */ 165 final int[] fmap; // 3600000 byte 166 final char[] sfmap; // 3600000 byte 167 // ------------ 168 // 8433529 byte 169 // ============ 170 171 /** 172 * Index of original line in Burrows-Wheeler table. 173 * 174 * <p>This is the index in fmap that points to the last byte 175 * of the original data.</p> 176 */ 177 int origPtr; 178 179 Data(final int blockSize100k) { 180 final int n = blockSize100k * BZip2Constants.BASEBLOCKSIZE; 181 this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)]; 182 this.fmap = new int[n]; 183 this.sfmap = new char[2 * n]; 184 } 185 186 } 187 188 /** 189 * The minimum supported blocksize {@code == 1}. 190 */ 191 public static final int MIN_BLOCKSIZE = 1; 192 193 /** 194 * The maximum supported blocksize {@code == 9}. 195 */ 196 public static final int MAX_BLOCKSIZE = 9; 197 private static final int GREATER_ICOST = 15; 198 199 private static final int LESSER_ICOST = 0; 200 201 /** 202 * Chooses a blocksize based on the given length of the data to compress. 203 * 204 * @return The blocksize, between {@link #MIN_BLOCKSIZE} and 205 * {@link #MAX_BLOCKSIZE} both inclusive. For a negative 206 * {@code inputLength} this method returns {@code MAX_BLOCKSIZE} 207 * always. 208 * 209 * @param inputLength 210 * The length of the data which will be compressed by 211 * {@code BZip2CompressorOutputStream}. 212 */ 213 public static int chooseBlockSize(final long inputLength) { 214 return (inputLength > 0) ? (int) Math 215 .min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE; 216 } 217 218 private static void hbAssignCodes(final int[] code, final byte[] length, 219 final int minLen, final int maxLen, 220 final int alphaSize) { 221 int vec = 0; 222 for (int n = minLen; n <= maxLen; n++) { 223 for (int i = 0; i < alphaSize; i++) { 224 if ((length[i] & 0xff) == n) { 225 code[i] = vec; 226 vec++; 227 } 228 } 229 vec <<= 1; 230 } 231 } 232 233 private static void hbMakeCodeLengths(final byte[] len, final int[] freq, 234 final Data dat, final int alphaSize, 235 final int maxLen) { 236 /* 237 * Nodes and heap entries run from 1. Entry 0 for both the heap and 238 * nodes is a sentinel. 239 */ 240 final int[] heap = dat.heap; 241 final int[] weight = dat.weight; 242 final int[] parent = dat.parent; 243 244 for (int i = alphaSize; --i >= 0;) { 245 weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; 246 } 247 248 for (boolean tooLong = true; tooLong;) { 249 tooLong = false; 250 251 int nNodes = alphaSize; 252 int nHeap = 0; 253 heap[0] = 0; 254 weight[0] = 0; 255 parent[0] = -2; 256 257 for (int i = 1; i <= alphaSize; i++) { 258 parent[i] = -1; 259 nHeap++; 260 heap[nHeap] = i; 261 262 int zz = nHeap; 263 final int tmp = heap[zz]; 264 while (weight[tmp] < weight[heap[zz >> 1]]) { 265 heap[zz] = heap[zz >> 1]; 266 zz >>= 1; 267 } 268 heap[zz] = tmp; 269 } 270 271 while (nHeap > 1) { 272 final int n1 = heap[1]; 273 heap[1] = heap[nHeap]; 274 nHeap--; 275 276 int yy = 0; 277 int zz = 1; 278 int tmp = heap[1]; 279 280 while (true) { 281 yy = zz << 1; 282 283 if (yy > nHeap) { 284 break; 285 } 286 287 if ((yy < nHeap) 288 && (weight[heap[yy + 1]] < weight[heap[yy]])) { 289 yy++; 290 } 291 292 if (weight[tmp] < weight[heap[yy]]) { 293 break; 294 } 295 296 heap[zz] = heap[yy]; 297 zz = yy; 298 } 299 300 heap[zz] = tmp; 301 302 final int n2 = heap[1]; 303 heap[1] = heap[nHeap]; 304 nHeap--; 305 306 yy = 0; 307 zz = 1; 308 tmp = heap[1]; 309 310 while (true) { 311 yy = zz << 1; 312 313 if (yy > nHeap) { 314 break; 315 } 316 317 if ((yy < nHeap) 318 && (weight[heap[yy + 1]] < weight[heap[yy]])) { 319 yy++; 320 } 321 322 if (weight[tmp] < weight[heap[yy]]) { 323 break; 324 } 325 326 heap[zz] = heap[yy]; 327 zz = yy; 328 } 329 330 heap[zz] = tmp; 331 nNodes++; 332 parent[n1] = parent[n2] = nNodes; 333 334 final int weight_n1 = weight[n1]; 335 final int weight_n2 = weight[n2]; 336 weight[nNodes] = ((weight_n1 & 0xffffff00) 337 + (weight_n2 & 0xffffff00)) 338 | (1 + Math.max(weight_n1 & 0x000000ff, weight_n2 & 0x000000ff)); 339 340 parent[nNodes] = -1; 341 nHeap++; 342 heap[nHeap] = nNodes; 343 344 tmp = 0; 345 zz = nHeap; 346 tmp = heap[zz]; 347 final int weight_tmp = weight[tmp]; 348 while (weight_tmp < weight[heap[zz >> 1]]) { 349 heap[zz] = heap[zz >> 1]; 350 zz >>= 1; 351 } 352 heap[zz] = tmp; 353 354 } 355 356 for (int i = 1; i <= alphaSize; i++) { 357 int j = 0; 358 int k = i; 359 360 for (int parent_k; (parent_k = parent[k]) >= 0;) { 361 k = parent_k; 362 j++; 363 } 364 365 len[i - 1] = (byte) j; 366 if (j > maxLen) { 367 tooLong = true; 368 } 369 } 370 371 if (tooLong) { 372 for (int i = 1; i < alphaSize; i++) { 373 int j = weight[i] >> 8; 374 j = 1 + (j >> 1); 375 weight[i] = j << 8; 376 } 377 } 378 } 379 } 380 /** 381 * Index of the last char in the block, so the block size == last + 1. 382 */ 383 private int last; 384 /** 385 * Always: in the range 0 .. 9. The current block size is 100000 * this 386 * number. 387 */ 388 private final int blockSize100k; 389 390 private int bsBuff; 391 392 private int bsLive; 393 394 private final CRC crc = new CRC(); 395 private int nInUse; 396 397 private int nMTF; 398 private int currentChar = -1; 399 private int runLength; 400 401 private int blockCRC; 402 private int combinedCRC; 403 404 private final int allowableBlockSize; 405 /** 406 * All memory intensive stuff. 407 */ 408 private Data data; 409 410 private BlockSort blockSorter; 411 412 private OutputStream out; 413 414 private volatile boolean closed; 415 416 /** 417 * Constructs a new {@code BZip2CompressorOutputStream} with a blocksize of 900k. 418 * 419 * @param out 420 * the destination stream. 421 * 422 * @throws IOException 423 * if an I/O error occurs in the specified stream. 424 * @throws NullPointerException 425 * if {@code out == null}. 426 */ 427 public BZip2CompressorOutputStream(final OutputStream out) 428 throws IOException { 429 this(out, MAX_BLOCKSIZE); 430 } 431 432 /** 433 * Constructs a new {@code BZip2CompressorOutputStream} with specified blocksize. 434 * 435 * @param out 436 * the destination stream. 437 * @param blockSize 438 * the blockSize as 100k units. 439 * 440 * @throws IOException 441 * if an I/O error occurs in the specified stream. 442 * @throws IllegalArgumentException 443 * if {@code (blockSize < 1) || (blockSize > 9)}. 444 * @throws NullPointerException 445 * if {@code out == null}. 446 * 447 * @see #MIN_BLOCKSIZE 448 * @see #MAX_BLOCKSIZE 449 */ 450 public BZip2CompressorOutputStream(final OutputStream out, final int blockSize) throws IOException { 451 if (blockSize < 1) { 452 throw new IllegalArgumentException("blockSize(" + blockSize + ") < 1"); 453 } 454 if (blockSize > 9) { 455 throw new IllegalArgumentException("blockSize(" + blockSize + ") > 9"); 456 } 457 458 this.blockSize100k = blockSize; 459 this.out = out; 460 461 /* 20 is just a paranoia constant */ 462 this.allowableBlockSize = (this.blockSize100k * BZip2Constants.BASEBLOCKSIZE) - 20; 463 init(); 464 } 465 466 private void blockSort() { 467 blockSorter.blockSort(data, last); 468 } 469 470 471 private void bsFinishedWithStream() throws IOException { 472 while (this.bsLive > 0) { 473 final int ch = this.bsBuff >> 24; 474 this.out.write(ch); // write 8-bit 475 this.bsBuff <<= 8; 476 this.bsLive -= 8; 477 } 478 } 479 480 private void bsPutInt(final int u) throws IOException { 481 bsW(8, (u >> 24) & 0xff); 482 bsW(8, (u >> 16) & 0xff); 483 bsW(8, (u >> 8) & 0xff); 484 bsW(8, u & 0xff); 485 } 486 487 private void bsPutUByte(final int c) throws IOException { 488 bsW(8, c); 489 } 490 491 private void bsW(final int n, final int v) throws IOException { 492 final OutputStream outShadow = this.out; 493 int bsLiveShadow = this.bsLive; 494 int bsBuffShadow = this.bsBuff; 495 496 while (bsLiveShadow >= 8) { 497 outShadow.write(bsBuffShadow >> 24); // write 8-bit 498 bsBuffShadow <<= 8; 499 bsLiveShadow -= 8; 500 } 501 502 this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n)); 503 this.bsLive = bsLiveShadow + n; 504 } 505 506 @Override 507 public void close() throws IOException { 508 if (!closed) { 509 try (OutputStream outShadow = this.out) { 510 finish(); 511 } 512 } 513 } 514 515 private void endBlock() throws IOException { 516 this.blockCRC = this.crc.getFinalCRC(); 517 this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31); 518 this.combinedCRC ^= this.blockCRC; 519 520 // empty block at end of file 521 if (this.last == -1) { 522 return; 523 } 524 525 /* sort the block and establish posn of original string */ 526 blockSort(); 527 528 /* 529 * A 6-byte block header, the value chosen arbitrarily as 0x314159265359 530 * :-). A 32 bit value does not really give a strong enough guarantee 531 * that the value will not appear by chance in the compressed 532 * datastream. Worst-case probability of this event, for a 900k block, 533 * is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 534 * bits. For a compressed file of size 100Gb -- about 100000 blocks -- 535 * only a 48-bit marker will do. NB: normal compression/ decompression 536 * donot rely on these statistical properties. They are only important 537 * when trying to recover blocks from damaged files. 538 */ 539 bsPutUByte(0x31); 540 bsPutUByte(0x41); 541 bsPutUByte(0x59); 542 bsPutUByte(0x26); 543 bsPutUByte(0x53); 544 bsPutUByte(0x59); 545 546 /* Now the block's CRC, so it is in a known place. */ 547 bsPutInt(this.blockCRC); 548 549 /* Now a single bit indicating no randomisation. */ 550 bsW(1, 0); 551 552 /* Finally, block's contents proper. */ 553 moveToFrontCodeAndSend(); 554 } 555 556 private void endCompression() throws IOException { 557 /* 558 * Now another magic 48-bit number, 0x177245385090, to indicate the end 559 * of the last block. (sqrt(pi), if you want to know. I did want to use 560 * e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me 561 * to feel statistically comfortable. Call me paranoid.) 562 */ 563 bsPutUByte(0x17); 564 bsPutUByte(0x72); 565 bsPutUByte(0x45); 566 bsPutUByte(0x38); 567 bsPutUByte(0x50); 568 bsPutUByte(0x90); 569 570 bsPutInt(this.combinedCRC); 571 bsFinishedWithStream(); 572 } 573 574 public void finish() throws IOException { 575 if (!closed) { 576 closed = true; 577 try { 578 if (this.runLength > 0) { 579 writeRun(); 580 } 581 this.currentChar = -1; 582 endBlock(); 583 endCompression(); 584 } finally { 585 this.out = null; 586 this.blockSorter = null; 587 this.data = null; 588 } 589 } 590 } 591 592 @Override 593 public void flush() throws IOException { 594 final OutputStream outShadow = this.out; 595 if (outShadow != null) { 596 outShadow.flush(); 597 } 598 } 599 600 /* 601 * Performs Move-To-Front on the Burrows-Wheeler transformed 602 * buffer, storing the MTFed data in data.sfmap in RUNA/RUNB 603 * run-length-encoded form. 604 * 605 * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p> 606 */ 607 private void generateMTFValues() { 608 final int lastShadow = this.last; 609 final Data dataShadow = this.data; 610 final boolean[] inUse = dataShadow.inUse; 611 final byte[] block = dataShadow.block; 612 final int[] fmap = dataShadow.fmap; 613 final char[] sfmap = dataShadow.sfmap; 614 final int[] mtfFreq = dataShadow.mtfFreq; 615 final byte[] unseqToSeq = dataShadow.unseqToSeq; 616 final byte[] yy = dataShadow.generateMTFValues_yy; 617 618 // make maps 619 int nInUseShadow = 0; 620 for (int i = 0; i < 256; i++) { 621 if (inUse[i]) { 622 unseqToSeq[i] = (byte) nInUseShadow; 623 nInUseShadow++; 624 } 625 } 626 this.nInUse = nInUseShadow; 627 628 final int eob = nInUseShadow + 1; 629 630 Arrays.fill(mtfFreq, 0, eob + 1, 0); 631 632 for (int i = nInUseShadow; --i >= 0;) { 633 yy[i] = (byte) i; 634 } 635 636 int wr = 0; 637 int zPend = 0; 638 639 for (int i = 0; i <= lastShadow; i++) { 640 final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff]; 641 byte tmp = yy[0]; 642 int j = 0; 643 644 while (ll_i != tmp) { 645 j++; 646 final byte tmp2 = tmp; 647 tmp = yy[j]; 648 yy[j] = tmp2; 649 } 650 yy[0] = tmp; 651 652 if (j == 0) { 653 zPend++; 654 } else { 655 if (zPend > 0) { 656 zPend--; 657 while (true) { 658 if ((zPend & 1) == 0) { 659 sfmap[wr] = RUNA; 660 wr++; 661 mtfFreq[RUNA]++; 662 } else { 663 sfmap[wr] = RUNB; 664 wr++; 665 mtfFreq[RUNB]++; 666 } 667 668 if (zPend < 2) { 669 break; 670 } 671 zPend = (zPend - 2) >> 1; 672 } 673 zPend = 0; 674 } 675 sfmap[wr] = (char) (j + 1); 676 wr++; 677 mtfFreq[j + 1]++; 678 } 679 } 680 681 if (zPend > 0) { 682 zPend--; 683 while (true) { 684 if ((zPend & 1) == 0) { 685 sfmap[wr] = RUNA; 686 wr++; 687 mtfFreq[RUNA]++; 688 } else { 689 sfmap[wr] = RUNB; 690 wr++; 691 mtfFreq[RUNB]++; 692 } 693 694 if (zPend < 2) { 695 break; 696 } 697 zPend = (zPend - 2) >> 1; 698 } 699 } 700 701 sfmap[wr] = (char) eob; 702 mtfFreq[eob]++; 703 this.nMTF = wr + 1; 704 } 705 706 /** 707 * Returns the blocksize parameter specified at construction time. 708 * @return the blocksize parameter specified at construction time 709 */ 710 public final int getBlockSize() { 711 return this.blockSize100k; 712 } 713 714 /** 715 * Writes magic bytes like BZ on the first position of the stream 716 * and bytes indicating the file-format, which is 717 * huffmanised, followed by a digit indicating blockSize100k. 718 * @throws IOException if the magic bytes could not been written 719 */ 720 private void init() throws IOException { 721 bsPutUByte('B'); 722 bsPutUByte('Z'); 723 724 this.data = new Data(this.blockSize100k); 725 this.blockSorter = new BlockSort(this.data); 726 727 // huffmanised magic bytes 728 bsPutUByte('h'); 729 bsPutUByte('0' + this.blockSize100k); 730 731 this.combinedCRC = 0; 732 initBlock(); 733 } 734 735 private void initBlock() { 736 // blockNo++; 737 this.crc.initializeCRC(); 738 this.last = -1; 739 // ch = 0; 740 741 final boolean[] inUse = this.data.inUse; 742 for (int i = 256; --i >= 0;) { 743 inUse[i] = false; 744 } 745 746 } 747 748 private void moveToFrontCodeAndSend() throws IOException { 749 bsW(24, this.data.origPtr); 750 generateMTFValues(); 751 sendMTFValues(); 752 } 753 754 private void sendMTFValues() throws IOException { 755 final byte[][] len = this.data.sendMTFValues_len; 756 final int alphaSize = this.nInUse + 2; 757 758 for (int t = N_GROUPS; --t >= 0;) { 759 final byte[] len_t = len[t]; 760 for (int v = alphaSize; --v >= 0;) { 761 len_t[v] = GREATER_ICOST; 762 } 763 } 764 765 /* Decide how many coding tables to use */ 766 // assert (this.nMTF > 0) : this.nMTF; 767 final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3 768 : (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6; 769 770 /* Generate an initial set of coding tables */ 771 sendMTFValues0(nGroups, alphaSize); 772 773 /* 774 * Iterate up to N_ITERS times to improve the tables. 775 */ 776 final int nSelectors = sendMTFValues1(nGroups, alphaSize); 777 778 /* Compute MTF values for the selectors. */ 779 sendMTFValues2(nGroups, nSelectors); 780 781 /* Assign actual codes for the tables. */ 782 sendMTFValues3(nGroups, alphaSize); 783 784 /* Transmit the mapping table. */ 785 sendMTFValues4(); 786 787 /* Now the selectors. */ 788 sendMTFValues5(nGroups, nSelectors); 789 790 /* Now the coding tables. */ 791 sendMTFValues6(nGroups, alphaSize); 792 793 /* And finally, the block data proper */ 794 sendMTFValues7(); 795 } 796 797 private void sendMTFValues0(final int nGroups, final int alphaSize) { 798 final byte[][] len = this.data.sendMTFValues_len; 799 final int[] mtfFreq = this.data.mtfFreq; 800 801 int remF = this.nMTF; 802 int gs = 0; 803 804 for (int nPart = nGroups; nPart > 0; nPart--) { 805 final int tFreq = remF / nPart; 806 int ge = gs - 1; 807 int aFreq = 0; 808 809 for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) { 810 aFreq += mtfFreq[++ge]; 811 } 812 813 if ((ge > gs) && (nPart != nGroups) && (nPart != 1) 814 && (((nGroups - nPart) & 1) != 0)) { 815 aFreq -= mtfFreq[ge--]; 816 } 817 818 final byte[] len_np = len[nPart - 1]; 819 for (int v = alphaSize; --v >= 0;) { 820 if ((v >= gs) && (v <= ge)) { 821 len_np[v] = LESSER_ICOST; 822 } else { 823 len_np[v] = GREATER_ICOST; 824 } 825 } 826 827 gs = ge + 1; 828 remF -= aFreq; 829 } 830 } 831 832 private int sendMTFValues1(final int nGroups, final int alphaSize) { 833 final Data dataShadow = this.data; 834 final int[][] rfreq = dataShadow.sendMTFValues_rfreq; 835 final int[] fave = dataShadow.sendMTFValues_fave; 836 final short[] cost = dataShadow.sendMTFValues_cost; 837 final char[] sfmap = dataShadow.sfmap; 838 final byte[] selector = dataShadow.selector; 839 final byte[][] len = dataShadow.sendMTFValues_len; 840 final byte[] len_0 = len[0]; 841 final byte[] len_1 = len[1]; 842 final byte[] len_2 = len[2]; 843 final byte[] len_3 = len[3]; 844 final byte[] len_4 = len[4]; 845 final byte[] len_5 = len[5]; 846 final int nMTFShadow = this.nMTF; 847 848 int nSelectors = 0; 849 850 for (int iter = 0; iter < N_ITERS; iter++) { 851 for (int t = nGroups; --t >= 0;) { 852 fave[t] = 0; 853 final int[] rfreqt = rfreq[t]; 854 for (int i = alphaSize; --i >= 0;) { 855 rfreqt[i] = 0; 856 } 857 } 858 859 nSelectors = 0; 860 861 for (int gs = 0; gs < this.nMTF;) { 862 /* Set group start & end marks. */ 863 864 /* 865 * Calculate the cost of this group as coded by each of the 866 * coding tables. 867 */ 868 869 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); 870 871 final byte mask = (byte) 0xff; 872 if (nGroups == N_GROUPS) { 873 // unrolled version of the else-block 874 875 short cost0 = 0; 876 short cost1 = 0; 877 short cost2 = 0; 878 short cost3 = 0; 879 short cost4 = 0; 880 short cost5 = 0; 881 882 for (int i = gs; i <= ge; i++) { 883 final int icv = sfmap[i]; 884 cost0 += (short) (len_0[icv] & mask); 885 cost1 += (short) (len_1[icv] & mask); 886 cost2 += (short) (len_2[icv] & mask); 887 cost3 += (short) (len_3[icv] & mask); 888 cost4 += (short) (len_4[icv] & mask); 889 cost5 += (short) (len_5[icv] & mask); 890 } 891 892 cost[0] = cost0; 893 cost[1] = cost1; 894 cost[2] = cost2; 895 cost[3] = cost3; 896 cost[4] = cost4; 897 cost[5] = cost5; 898 899 } else { 900 for (int t = nGroups; --t >= 0;) { 901 cost[t] = 0; 902 } 903 904 for (int i = gs; i <= ge; i++) { 905 final int icv = sfmap[i]; 906 for (int t = nGroups; --t >= 0;) { 907 cost[t] += (short) (len[t][icv] & mask); 908 } 909 } 910 } 911 912 /* 913 * Find the coding table which is best for this group, and 914 * record its identity in the selector table. 915 */ 916 int bt = -1; 917 for (int t = nGroups, bc = 999999999; --t >= 0;) { 918 final int cost_t = cost[t]; 919 if (cost_t < bc) { 920 bc = cost_t; 921 bt = t; 922 } 923 } 924 925 fave[bt]++; 926 selector[nSelectors] = (byte) bt; 927 nSelectors++; 928 929 /* 930 * Increment the symbol frequencies for the selected table. 931 */ 932 final int[] rfreq_bt = rfreq[bt]; 933 for (int i = gs; i <= ge; i++) { 934 rfreq_bt[sfmap[i]]++; 935 } 936 937 gs = ge + 1; 938 } 939 940 /* 941 * Recompute the tables based on the accumulated frequencies. 942 */ 943 for (int t = 0; t < nGroups; t++) { 944 hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20); 945 } 946 } 947 948 return nSelectors; 949 } 950 951 private void sendMTFValues2(final int nGroups, final int nSelectors) { 952 // assert (nGroups < 8) : nGroups; 953 954 final Data dataShadow = this.data; 955 final byte[] pos = dataShadow.sendMTFValues2_pos; 956 957 for (int i = nGroups; --i >= 0;) { 958 pos[i] = (byte) i; 959 } 960 961 for (int i = 0; i < nSelectors; i++) { 962 final byte ll_i = dataShadow.selector[i]; 963 byte tmp = pos[0]; 964 int j = 0; 965 966 while (ll_i != tmp) { 967 j++; 968 final byte tmp2 = tmp; 969 tmp = pos[j]; 970 pos[j] = tmp2; 971 } 972 973 pos[0] = tmp; 974 dataShadow.selectorMtf[i] = (byte) j; 975 } 976 } 977 978 private void sendMTFValues3(final int nGroups, final int alphaSize) { 979 final int[][] code = this.data.sendMTFValues_code; 980 final byte[][] len = this.data.sendMTFValues_len; 981 982 for (int t = 0; t < nGroups; t++) { 983 int minLen = 32; 984 int maxLen = 0; 985 final byte[] len_t = len[t]; 986 for (int i = alphaSize; --i >= 0;) { 987 final int l = len_t[i] & 0xff; 988 if (l > maxLen) { 989 maxLen = l; 990 } 991 if (l < minLen) { 992 minLen = l; 993 } 994 } 995 996 // assert (maxLen <= 20) : maxLen; 997 // assert (minLen >= 1) : minLen; 998 999 hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize); 1000 } 1001 } 1002 1003 private void sendMTFValues4() throws IOException { 1004 final boolean[] inUse = this.data.inUse; 1005 final boolean[] inUse16 = this.data.sentMTFValues4_inUse16; 1006 1007 for (int i = 16; --i >= 0;) { 1008 inUse16[i] = false; 1009 final int i16 = i * 16; 1010 for (int j = 16; --j >= 0;) { 1011 if (inUse[i16 + j]) { 1012 inUse16[i] = true; 1013 break; 1014 } 1015 } 1016 } 1017 1018 for (int i = 0; i < 16; i++) { 1019 bsW(1, inUse16[i] ? 1 : 0); 1020 } 1021 1022 final OutputStream outShadow = this.out; 1023 int bsLiveShadow = this.bsLive; 1024 int bsBuffShadow = this.bsBuff; 1025 1026 for (int i = 0; i < 16; i++) { 1027 if (inUse16[i]) { 1028 final int i16 = i * 16; 1029 for (int j = 0; j < 16; j++) { 1030 // inlined: bsW(1, inUse[i16 + j] ? 1 : 0); 1031 while (bsLiveShadow >= 8) { 1032 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1033 bsBuffShadow <<= 8; 1034 bsLiveShadow -= 8; 1035 } 1036 if (inUse[i16 + j]) { 1037 bsBuffShadow |= 1 << (32 - bsLiveShadow - 1); 1038 } 1039 bsLiveShadow++; 1040 } 1041 } 1042 } 1043 1044 this.bsBuff = bsBuffShadow; 1045 this.bsLive = bsLiveShadow; 1046 } 1047 1048 private void sendMTFValues5(final int nGroups, final int nSelectors) 1049 throws IOException { 1050 bsW(3, nGroups); 1051 bsW(15, nSelectors); 1052 1053 final OutputStream outShadow = this.out; 1054 final byte[] selectorMtf = this.data.selectorMtf; 1055 1056 int bsLiveShadow = this.bsLive; 1057 int bsBuffShadow = this.bsBuff; 1058 1059 for (int i = 0; i < nSelectors; i++) { 1060 for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) { 1061 // inlined: bsW(1, 1); 1062 while (bsLiveShadow >= 8) { 1063 outShadow.write(bsBuffShadow >> 24); 1064 bsBuffShadow <<= 8; 1065 bsLiveShadow -= 8; 1066 } 1067 bsBuffShadow |= 1 << (32 - bsLiveShadow - 1); 1068 bsLiveShadow++; 1069 } 1070 1071 // inlined: bsW(1, 0); 1072 while (bsLiveShadow >= 8) { 1073 outShadow.write(bsBuffShadow >> 24); 1074 bsBuffShadow <<= 8; 1075 bsLiveShadow -= 8; 1076 } 1077 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); 1078 bsLiveShadow++; 1079 } 1080 1081 this.bsBuff = bsBuffShadow; 1082 this.bsLive = bsLiveShadow; 1083 } 1084 1085 private void sendMTFValues6(final int nGroups, final int alphaSize) 1086 throws IOException { 1087 final byte[][] len = this.data.sendMTFValues_len; 1088 final OutputStream outShadow = this.out; 1089 1090 int bsLiveShadow = this.bsLive; 1091 int bsBuffShadow = this.bsBuff; 1092 1093 for (int t = 0; t < nGroups; t++) { 1094 final byte[] len_t = len[t]; 1095 int curr = len_t[0] & 0xff; 1096 1097 // inlined: bsW(5, curr); 1098 while (bsLiveShadow >= 8) { 1099 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1100 bsBuffShadow <<= 8; 1101 bsLiveShadow -= 8; 1102 } 1103 bsBuffShadow |= curr << (32 - bsLiveShadow - 5); 1104 bsLiveShadow += 5; 1105 1106 for (int i = 0; i < alphaSize; i++) { 1107 final int lti = len_t[i] & 0xff; 1108 while (curr < lti) { 1109 // inlined: bsW(2, 2); 1110 while (bsLiveShadow >= 8) { 1111 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1112 bsBuffShadow <<= 8; 1113 bsLiveShadow -= 8; 1114 } 1115 bsBuffShadow |= 2 << (32 - bsLiveShadow - 2); 1116 bsLiveShadow += 2; 1117 1118 curr++; /* 10 */ 1119 } 1120 1121 while (curr > lti) { 1122 // inlined: bsW(2, 3); 1123 while (bsLiveShadow >= 8) { 1124 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1125 bsBuffShadow <<= 8; 1126 bsLiveShadow -= 8; 1127 } 1128 bsBuffShadow |= 3 << (32 - bsLiveShadow - 2); 1129 bsLiveShadow += 2; 1130 1131 curr--; /* 11 */ 1132 } 1133 1134 // inlined: bsW(1, 0); 1135 while (bsLiveShadow >= 8) { 1136 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1137 bsBuffShadow <<= 8; 1138 bsLiveShadow -= 8; 1139 } 1140 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); 1141 bsLiveShadow++; 1142 } 1143 } 1144 1145 this.bsBuff = bsBuffShadow; 1146 this.bsLive = bsLiveShadow; 1147 } 1148 1149 private void sendMTFValues7() throws IOException { 1150 final Data dataShadow = this.data; 1151 final byte[][] len = dataShadow.sendMTFValues_len; 1152 final int[][] code = dataShadow.sendMTFValues_code; 1153 final OutputStream outShadow = this.out; 1154 final byte[] selector = dataShadow.selector; 1155 final char[] sfmap = dataShadow.sfmap; 1156 final int nMTFShadow = this.nMTF; 1157 1158 int selCtr = 0; 1159 1160 int bsLiveShadow = this.bsLive; 1161 int bsBuffShadow = this.bsBuff; 1162 1163 for (int gs = 0; gs < nMTFShadow;) { 1164 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); 1165 final int selector_selCtr = selector[selCtr] & 0xff; 1166 final int[] code_selCtr = code[selector_selCtr]; 1167 final byte[] len_selCtr = len[selector_selCtr]; 1168 1169 while (gs <= ge) { 1170 final int sfmap_i = sfmap[gs]; 1171 1172 // 1173 // inlined: bsW(len_selCtr[sfmap_i] & 0xff, 1174 // code_selCtr[sfmap_i]); 1175 // 1176 while (bsLiveShadow >= 8) { 1177 outShadow.write(bsBuffShadow >> 24); 1178 bsBuffShadow <<= 8; 1179 bsLiveShadow -= 8; 1180 } 1181 final int n = len_selCtr[sfmap_i] & 0xFF; 1182 bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n); 1183 bsLiveShadow += n; 1184 1185 gs++; 1186 } 1187 1188 gs = ge + 1; 1189 selCtr++; 1190 } 1191 1192 this.bsBuff = bsBuffShadow; 1193 this.bsLive = bsLiveShadow; 1194 } 1195 1196 @Override 1197 public void write(final byte[] buf, int offs, final int len) 1198 throws IOException { 1199 if (offs < 0) { 1200 throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); 1201 } 1202 if (len < 0) { 1203 throw new IndexOutOfBoundsException("len(" + len + ") < 0."); 1204 } 1205 if (offs + len > buf.length) { 1206 throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" 1207 + len + ") > buf.length(" 1208 + buf.length + ")."); 1209 } 1210 if (closed) { 1211 throw new IOException("Stream closed"); 1212 } 1213 1214 for (final int hi = offs + len; offs < hi;) { 1215 write0(buf[offs++]); 1216 } 1217 } 1218 1219 @Override 1220 public void write(final int b) throws IOException { 1221 if (closed) { 1222 throw new IOException("Closed"); 1223 } 1224 write0(b); 1225 } 1226 1227 /** 1228 * Keeps track of the last bytes written and implicitly performs 1229 * run-length encoding as the first step of the bzip2 algorithm. 1230 */ 1231 private void write0(int b) throws IOException { 1232 if (this.currentChar != -1) { 1233 b &= 0xff; 1234 if (this.currentChar == b) { 1235 if (++this.runLength > 254) { 1236 writeRun(); 1237 this.currentChar = -1; 1238 this.runLength = 0; 1239 } 1240 // else nothing to do 1241 } else { 1242 writeRun(); 1243 this.runLength = 1; 1244 this.currentChar = b; 1245 } 1246 } else { 1247 this.currentChar = b & 0xff; 1248 this.runLength++; 1249 } 1250 } 1251 1252 /** 1253 * Writes the current byte to the buffer, run-length encoding it 1254 * if it has been repeated at least four times (the first step 1255 * RLEs sequences of four identical bytes). 1256 * 1257 * <p>Flushes the current block before writing data if it is 1258 * full.</p> 1259 * 1260 * <p>"write to the buffer" means adding to data.buffer starting 1261 * two steps "after" this.last - initially starting at index 1 1262 * (not 0) - and updating this.last to point to the last index 1263 * written minus 1.</p> 1264 */ 1265 private void writeRun() throws IOException { 1266 final int lastShadow = this.last; 1267 1268 if (lastShadow < this.allowableBlockSize) { 1269 final int currentCharShadow = this.currentChar; 1270 final Data dataShadow = this.data; 1271 dataShadow.inUse[currentCharShadow] = true; 1272 final byte ch = (byte) currentCharShadow; 1273 1274 int runLengthShadow = this.runLength; 1275 this.crc.updateCRC(currentCharShadow, runLengthShadow); 1276 1277 switch (runLengthShadow) { 1278 case 1: 1279 dataShadow.block[lastShadow + 2] = ch; 1280 this.last = lastShadow + 1; 1281 break; 1282 1283 case 2: 1284 dataShadow.block[lastShadow + 2] = ch; 1285 dataShadow.block[lastShadow + 3] = ch; 1286 this.last = lastShadow + 2; 1287 break; 1288 1289 case 3: { 1290 final byte[] block = dataShadow.block; 1291 block[lastShadow + 2] = ch; 1292 block[lastShadow + 3] = ch; 1293 block[lastShadow + 4] = ch; 1294 this.last = lastShadow + 3; 1295 } 1296 break; 1297 1298 default: { 1299 runLengthShadow -= 4; 1300 dataShadow.inUse[runLengthShadow] = true; 1301 final byte[] block = dataShadow.block; 1302 block[lastShadow + 2] = ch; 1303 block[lastShadow + 3] = ch; 1304 block[lastShadow + 4] = ch; 1305 block[lastShadow + 5] = ch; 1306 block[lastShadow + 6] = (byte) runLengthShadow; 1307 this.last = lastShadow + 5; 1308 } 1309 break; 1310 1311 } 1312 } else { 1313 endBlock(); 1314 initBlock(); 1315 writeRun(); 1316 } 1317 } 1318 1319}