001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.bzip2;
020
021import java.io.IOException;
022import java.io.OutputStream;
023import java.util.Arrays;
024
025import org.apache.commons.compress.compressors.CompressorOutputStream;
026
027/**
028 * An output stream that compresses into the BZip2 format into another stream.
029 *
030 * <p>
031 * The compression requires large amounts of memory. Thus you should call the
032 * {@link #close() close()} method as soon as possible, to force
033 * {@code BZip2CompressorOutputStream} to release the allocated memory.
034 * </p>
035 *
036 * <p> You can shrink the amount of allocated memory and maybe raise
037 * the compression speed by choosing a lower blocksize, which in turn
038 * may cause a lower compression ratio. You can avoid unnecessary
039 * memory allocation by avoiding using a blocksize which is bigger
040 * than the size of the input.  </p>
041 *
042 * <p> You can compute the memory usage for compressing by the
043 * following formula: </p>
044 *
045 * <pre>
046 * &lt;code&gt;400k + (9 * blocksize)&lt;/code&gt;.
047 * </pre>
048 *
049 * <p> To get the memory required for decompression by {@link
050 * BZip2CompressorInputStream} use </p>
051 *
052 * <pre>
053 * &lt;code&gt;65k + (5 * blocksize)&lt;/code&gt;.
054 * </pre>
055 *
056 * <table style="width:100%" border="1">
057 * <caption>Memory usage by blocksize</caption>
058 * <tr>
059 * <th colspan="3">Memory usage by blocksize</th>
060 * </tr>
061 * <tr>
062 * <th style="text-align: right">Blocksize</th> <th style="text-align: right">Compression<br>
063 * memory usage</th> <th style="text-align: right">Decompression<br>
064 * memory usage</th>
065 * </tr>
066 * <tr>
067 * <td style="text-align: right">100k</td>
068 * <td style="text-align: right">1300k</td>
069 * <td style="text-align: right">565k</td>
070 * </tr>
071 * <tr>
072 * <td style="text-align: right">200k</td>
073 * <td style="text-align: right">2200k</td>
074 * <td style="text-align: right">1065k</td>
075 * </tr>
076 * <tr>
077 * <td style="text-align: right">300k</td>
078 * <td style="text-align: right">3100k</td>
079 * <td style="text-align: right">1565k</td>
080 * </tr>
081 * <tr>
082 * <td style="text-align: right">400k</td>
083 * <td style="text-align: right">4000k</td>
084 * <td style="text-align: right">2065k</td>
085 * </tr>
086 * <tr>
087 * <td style="text-align: right">500k</td>
088 * <td style="text-align: right">4900k</td>
089 * <td style="text-align: right">2565k</td>
090 * </tr>
091 * <tr>
092 * <td style="text-align: right">600k</td>
093 * <td style="text-align: right">5800k</td>
094 * <td style="text-align: right">3065k</td>
095 * </tr>
096 * <tr>
097 * <td style="text-align: right">700k</td>
098 * <td style="text-align: right">6700k</td>
099 * <td style="text-align: right">3565k</td>
100 * </tr>
101 * <tr>
102 * <td style="text-align: right">800k</td>
103 * <td style="text-align: right">7600k</td>
104 * <td style="text-align: right">4065k</td>
105 * </tr>
106 * <tr>
107 * <td style="text-align: right">900k</td>
108 * <td style="text-align: right">8500k</td>
109 * <td style="text-align: right">4565k</td>
110 * </tr>
111 * </table>
112 *
113 * <p>
114 * For decompression {@code BZip2CompressorInputStream} allocates less memory if the
115 * bzipped input is smaller than one block.
116 * </p>
117 *
118 * <p>
119 * Instances of this class are not threadsafe.
120 * </p>
121 *
122 * <p>
123 * TODO: Update to BZip2 1.0.1
124 * </p>
125 * @NotThreadSafe
126 */
127public class BZip2CompressorOutputStream extends CompressorOutputStream
128    implements BZip2Constants {
129
130    static final class Data {
131
132        // with blockSize 900k
133        /* maps unsigned byte => "does it occur in block" */
134        final boolean[] inUse = new boolean[256]; // 256 byte
135        final byte[] unseqToSeq = new byte[256]; // 256 byte
136        final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte
137        final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
138        final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
139
140        final byte[] generateMTFValues_yy = new byte[256]; // 256 byte
141        final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548
142        // byte
143        final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
144        // byte
145        final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte
146        final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte
147        final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
148        // byte
149        final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte
150        final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte
151
152        final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte
153        final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
154        final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
155
156        // ------------
157        // 333408 byte
158
159        /* holds the RLEd block of original data starting at index 1.
160         * After sorting the last byte added to the buffer is at index
161         * 0. */
162        final byte[] block; // 900021 byte
163        /* maps index in Burrows-Wheeler transformed block => index of
164         * byte in original block */
165        final int[] fmap; // 3600000 byte
166        final char[] sfmap; // 3600000 byte
167        // ------------
168        // 8433529 byte
169        // ============
170
171        /**
172         * Index of original line in Burrows-Wheeler table.
173         *
174         * <p>This is the index in fmap that points to the last byte
175         * of the original data.</p>
176         */
177        int origPtr;
178
179        Data(final int blockSize100k) {
180            final int n = blockSize100k * BZip2Constants.BASEBLOCKSIZE;
181            this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)];
182            this.fmap = new int[n];
183            this.sfmap = new char[2 * n];
184        }
185
186    }
187
188    /**
189     * The minimum supported blocksize {@code  == 1}.
190     */
191    public static final int MIN_BLOCKSIZE = 1;
192
193    /**
194     * The maximum supported blocksize {@code  == 9}.
195     */
196    public static final int MAX_BLOCKSIZE = 9;
197    private static final int GREATER_ICOST = 15;
198
199    private static final int LESSER_ICOST = 0;
200
201    /**
202     * Chooses a blocksize based on the given length of the data to compress.
203     *
204     * @return The blocksize, between {@link #MIN_BLOCKSIZE} and
205     *         {@link #MAX_BLOCKSIZE} both inclusive. For a negative
206     *         {@code inputLength} this method returns {@code MAX_BLOCKSIZE}
207     *         always.
208     *
209     * @param inputLength
210     *            The length of the data which will be compressed by
211     *            {@code BZip2CompressorOutputStream}.
212     */
213    public static int chooseBlockSize(final long inputLength) {
214        return (inputLength > 0) ? (int) Math
215            .min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE;
216    }
217
218    private static void hbAssignCodes(final int[] code, final byte[] length,
219                                      final int minLen, final int maxLen,
220                                      final int alphaSize) {
221        int vec = 0;
222        for (int n = minLen; n <= maxLen; n++) {
223            for (int i = 0; i < alphaSize; i++) {
224                if ((length[i] & 0xff) == n) {
225                    code[i] = vec;
226                    vec++;
227                }
228            }
229            vec <<= 1;
230        }
231    }
232
233    private static void hbMakeCodeLengths(final byte[] len, final int[] freq,
234                                          final Data dat, final int alphaSize,
235                                          final int maxLen) {
236        /*
237         * Nodes and heap entries run from 1. Entry 0 for both the heap and
238         * nodes is a sentinel.
239         */
240        final int[] heap = dat.heap;
241        final int[] weight = dat.weight;
242        final int[] parent = dat.parent;
243
244        for (int i = alphaSize; --i >= 0;) {
245            weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
246        }
247
248        for (boolean tooLong = true; tooLong;) {
249            tooLong = false;
250
251            int nNodes = alphaSize;
252            int nHeap = 0;
253            heap[0] = 0;
254            weight[0] = 0;
255            parent[0] = -2;
256
257            for (int i = 1; i <= alphaSize; i++) {
258                parent[i] = -1;
259                nHeap++;
260                heap[nHeap] = i;
261
262                int zz = nHeap;
263                final int tmp = heap[zz];
264                while (weight[tmp] < weight[heap[zz >> 1]]) {
265                    heap[zz] = heap[zz >> 1];
266                    zz >>= 1;
267                }
268                heap[zz] = tmp;
269            }
270
271            while (nHeap > 1) {
272                final int n1 = heap[1];
273                heap[1] = heap[nHeap];
274                nHeap--;
275
276                int yy = 0;
277                int zz = 1;
278                int tmp = heap[1];
279
280                while (true) {
281                    yy = zz << 1;
282
283                    if (yy > nHeap) {
284                        break;
285                    }
286
287                    if ((yy < nHeap)
288                        && (weight[heap[yy + 1]] < weight[heap[yy]])) {
289                        yy++;
290                    }
291
292                    if (weight[tmp] < weight[heap[yy]]) {
293                        break;
294                    }
295
296                    heap[zz] = heap[yy];
297                    zz = yy;
298                }
299
300                heap[zz] = tmp;
301
302                final int n2 = heap[1];
303                heap[1] = heap[nHeap];
304                nHeap--;
305
306                yy = 0;
307                zz = 1;
308                tmp = heap[1];
309
310                while (true) {
311                    yy = zz << 1;
312
313                    if (yy > nHeap) {
314                        break;
315                    }
316
317                    if ((yy < nHeap)
318                        && (weight[heap[yy + 1]] < weight[heap[yy]])) {
319                        yy++;
320                    }
321
322                    if (weight[tmp] < weight[heap[yy]]) {
323                        break;
324                    }
325
326                    heap[zz] = heap[yy];
327                    zz = yy;
328                }
329
330                heap[zz] = tmp;
331                nNodes++;
332                parent[n1] = parent[n2] = nNodes;
333
334                final int weight_n1 = weight[n1];
335                final int weight_n2 = weight[n2];
336                weight[nNodes] = ((weight_n1 & 0xffffff00)
337                                  + (weight_n2 & 0xffffff00))
338                    | (1 + Math.max(weight_n1 & 0x000000ff, weight_n2 & 0x000000ff));
339
340                parent[nNodes] = -1;
341                nHeap++;
342                heap[nHeap] = nNodes;
343
344                tmp = 0;
345                zz = nHeap;
346                tmp = heap[zz];
347                final int weight_tmp = weight[tmp];
348                while (weight_tmp < weight[heap[zz >> 1]]) {
349                    heap[zz] = heap[zz >> 1];
350                    zz >>= 1;
351                }
352                heap[zz] = tmp;
353
354            }
355
356            for (int i = 1; i <= alphaSize; i++) {
357                int j = 0;
358                int k = i;
359
360                for (int parent_k; (parent_k = parent[k]) >= 0;) {
361                    k = parent_k;
362                    j++;
363                }
364
365                len[i - 1] = (byte) j;
366                if (j > maxLen) {
367                    tooLong = true;
368                }
369            }
370
371            if (tooLong) {
372                for (int i = 1; i < alphaSize; i++) {
373                    int j = weight[i] >> 8;
374                    j = 1 + (j >> 1);
375                    weight[i] = j << 8;
376                }
377            }
378        }
379    }
380    /**
381     * Index of the last char in the block, so the block size == last + 1.
382     */
383    private int last;
384    /**
385     * Always: in the range 0 .. 9. The current block size is 100000 * this
386     * number.
387     */
388    private final int blockSize100k;
389
390    private int bsBuff;
391
392    private int bsLive;
393
394    private final CRC crc = new CRC();
395    private int nInUse;
396
397    private int nMTF;
398    private int currentChar = -1;
399    private int runLength;
400
401    private int blockCRC;
402    private int combinedCRC;
403
404    private final int allowableBlockSize;
405    /**
406     * All memory intensive stuff.
407     */
408    private Data data;
409
410    private BlockSort blockSorter;
411
412    private OutputStream out;
413
414    private volatile boolean closed;
415
416    /**
417     * Constructs a new {@code BZip2CompressorOutputStream} with a blocksize of 900k.
418     *
419     * @param out
420     *            the destination stream.
421     *
422     * @throws IOException
423     *             if an I/O error occurs in the specified stream.
424     * @throws NullPointerException
425     *             if {@code out == null}.
426     */
427    public BZip2CompressorOutputStream(final OutputStream out)
428        throws IOException {
429        this(out, MAX_BLOCKSIZE);
430    }
431
432    /**
433     * Constructs a new {@code BZip2CompressorOutputStream} with specified blocksize.
434     *
435     * @param out
436     *            the destination stream.
437     * @param blockSize
438     *            the blockSize as 100k units.
439     *
440     * @throws IOException
441     *             if an I/O error occurs in the specified stream.
442     * @throws IllegalArgumentException
443     *             if {@code (blockSize &lt; 1) || (blockSize &gt; 9)}.
444     * @throws NullPointerException
445     *             if {@code out == null}.
446     *
447     * @see #MIN_BLOCKSIZE
448     * @see #MAX_BLOCKSIZE
449     */
450    public BZip2CompressorOutputStream(final OutputStream out, final int blockSize) throws IOException {
451        if (blockSize < 1) {
452            throw new IllegalArgumentException("blockSize(" + blockSize + ") < 1");
453        }
454        if (blockSize > 9) {
455            throw new IllegalArgumentException("blockSize(" + blockSize + ") > 9");
456        }
457
458        this.blockSize100k = blockSize;
459        this.out = out;
460
461        /* 20 is just a paranoia constant */
462        this.allowableBlockSize = (this.blockSize100k * BZip2Constants.BASEBLOCKSIZE) - 20;
463        init();
464    }
465
466    private void blockSort() {
467        blockSorter.blockSort(data, last);
468    }
469
470
471    private void bsFinishedWithStream() throws IOException {
472        while (this.bsLive > 0) {
473            final int ch = this.bsBuff >> 24;
474            this.out.write(ch); // write 8-bit
475            this.bsBuff <<= 8;
476            this.bsLive -= 8;
477        }
478    }
479
480    private void bsPutInt(final int u) throws IOException {
481        bsW(8, (u >> 24) & 0xff);
482        bsW(8, (u >> 16) & 0xff);
483        bsW(8, (u >> 8) & 0xff);
484        bsW(8, u & 0xff);
485    }
486
487    private void bsPutUByte(final int c) throws IOException {
488        bsW(8, c);
489    }
490
491    private void bsW(final int n, final int v) throws IOException {
492        final OutputStream outShadow = this.out;
493        int bsLiveShadow = this.bsLive;
494        int bsBuffShadow = this.bsBuff;
495
496        while (bsLiveShadow >= 8) {
497            outShadow.write(bsBuffShadow >> 24); // write 8-bit
498            bsBuffShadow <<= 8;
499            bsLiveShadow -= 8;
500        }
501
502        this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n));
503        this.bsLive = bsLiveShadow + n;
504    }
505
506    @Override
507    public void close() throws IOException {
508        if (!closed) {
509            try (OutputStream outShadow = this.out) {
510                finish();
511            }
512        }
513    }
514
515    private void endBlock() throws IOException {
516        this.blockCRC = this.crc.getFinalCRC();
517        this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31);
518        this.combinedCRC ^= this.blockCRC;
519
520        // empty block at end of file
521        if (this.last == -1) {
522            return;
523        }
524
525        /* sort the block and establish posn of original string */
526        blockSort();
527
528        /*
529         * A 6-byte block header, the value chosen arbitrarily as 0x314159265359
530         * :-). A 32 bit value does not really give a strong enough guarantee
531         * that the value will not appear by chance in the compressed
532         * datastream. Worst-case probability of this event, for a 900k block,
533         * is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48
534         * bits. For a compressed file of size 100Gb -- about 100000 blocks --
535         * only a 48-bit marker will do. NB: normal compression/ decompression
536         * donot rely on these statistical properties. They are only important
537         * when trying to recover blocks from damaged files.
538         */
539        bsPutUByte(0x31);
540        bsPutUByte(0x41);
541        bsPutUByte(0x59);
542        bsPutUByte(0x26);
543        bsPutUByte(0x53);
544        bsPutUByte(0x59);
545
546        /* Now the block's CRC, so it is in a known place. */
547        bsPutInt(this.blockCRC);
548
549        /* Now a single bit indicating no randomisation. */
550        bsW(1, 0);
551
552        /* Finally, block's contents proper. */
553        moveToFrontCodeAndSend();
554    }
555
556    private void endCompression() throws IOException {
557        /*
558         * Now another magic 48-bit number, 0x177245385090, to indicate the end
559         * of the last block. (sqrt(pi), if you want to know. I did want to use
560         * e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me
561         * to feel statistically comfortable. Call me paranoid.)
562         */
563        bsPutUByte(0x17);
564        bsPutUByte(0x72);
565        bsPutUByte(0x45);
566        bsPutUByte(0x38);
567        bsPutUByte(0x50);
568        bsPutUByte(0x90);
569
570        bsPutInt(this.combinedCRC);
571        bsFinishedWithStream();
572    }
573
574    public void finish() throws IOException {
575        if (!closed) {
576            closed = true;
577            try {
578                if (this.runLength > 0) {
579                    writeRun();
580                }
581                this.currentChar = -1;
582                endBlock();
583                endCompression();
584            } finally {
585                this.out = null;
586                this.blockSorter = null;
587                this.data = null;
588            }
589        }
590    }
591
592    @Override
593    public void flush() throws IOException {
594        final OutputStream outShadow = this.out;
595        if (outShadow != null) {
596            outShadow.flush();
597        }
598    }
599
600    /*
601     * Performs Move-To-Front on the Burrows-Wheeler transformed
602     * buffer, storing the MTFed data in data.sfmap in RUNA/RUNB
603     * run-length-encoded form.
604     *
605     * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p>
606     */
607    private void generateMTFValues() {
608        final int lastShadow = this.last;
609        final Data dataShadow = this.data;
610        final boolean[] inUse = dataShadow.inUse;
611        final byte[] block = dataShadow.block;
612        final int[] fmap = dataShadow.fmap;
613        final char[] sfmap = dataShadow.sfmap;
614        final int[] mtfFreq = dataShadow.mtfFreq;
615        final byte[] unseqToSeq = dataShadow.unseqToSeq;
616        final byte[] yy = dataShadow.generateMTFValues_yy;
617
618        // make maps
619        int nInUseShadow = 0;
620        for (int i = 0; i < 256; i++) {
621            if (inUse[i]) {
622                unseqToSeq[i] = (byte) nInUseShadow;
623                nInUseShadow++;
624            }
625        }
626        this.nInUse = nInUseShadow;
627
628        final int eob = nInUseShadow + 1;
629
630        Arrays.fill(mtfFreq, 0, eob + 1, 0);
631
632        for (int i = nInUseShadow; --i >= 0;) {
633            yy[i] = (byte) i;
634        }
635
636        int wr = 0;
637        int zPend = 0;
638
639        for (int i = 0; i <= lastShadow; i++) {
640            final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff];
641            byte tmp = yy[0];
642            int j = 0;
643
644            while (ll_i != tmp) {
645                j++;
646                final byte tmp2 = tmp;
647                tmp = yy[j];
648                yy[j] = tmp2;
649            }
650            yy[0] = tmp;
651
652            if (j == 0) {
653                zPend++;
654            } else {
655                if (zPend > 0) {
656                    zPend--;
657                    while (true) {
658                        if ((zPend & 1) == 0) {
659                            sfmap[wr] = RUNA;
660                            wr++;
661                            mtfFreq[RUNA]++;
662                        } else {
663                            sfmap[wr] = RUNB;
664                            wr++;
665                            mtfFreq[RUNB]++;
666                        }
667
668                        if (zPend < 2) {
669                            break;
670                        }
671                        zPend = (zPend - 2) >> 1;
672                    }
673                    zPend = 0;
674                }
675                sfmap[wr] = (char) (j + 1);
676                wr++;
677                mtfFreq[j + 1]++;
678            }
679        }
680
681        if (zPend > 0) {
682            zPend--;
683            while (true) {
684                if ((zPend & 1) == 0) {
685                    sfmap[wr] = RUNA;
686                    wr++;
687                    mtfFreq[RUNA]++;
688                } else {
689                    sfmap[wr] = RUNB;
690                    wr++;
691                    mtfFreq[RUNB]++;
692                }
693
694                if (zPend < 2) {
695                    break;
696                }
697                zPend = (zPend - 2) >> 1;
698            }
699        }
700
701        sfmap[wr] = (char) eob;
702        mtfFreq[eob]++;
703        this.nMTF = wr + 1;
704    }
705
706    /**
707     * Returns the blocksize parameter specified at construction time.
708     * @return the blocksize parameter specified at construction time
709     */
710    public final int getBlockSize() {
711        return this.blockSize100k;
712    }
713
714    /**
715     * Writes magic bytes like BZ on the first position of the stream
716     * and bytes indicating the file-format, which is
717     * huffmanised, followed by a digit indicating blockSize100k.
718     * @throws IOException if the magic bytes could not been written
719     */
720    private void init() throws IOException {
721        bsPutUByte('B');
722        bsPutUByte('Z');
723
724        this.data = new Data(this.blockSize100k);
725        this.blockSorter = new BlockSort(this.data);
726
727        // huffmanised magic bytes
728        bsPutUByte('h');
729        bsPutUByte('0' + this.blockSize100k);
730
731        this.combinedCRC = 0;
732        initBlock();
733    }
734
735    private void initBlock() {
736        // blockNo++;
737        this.crc.initializeCRC();
738        this.last = -1;
739        // ch = 0;
740
741        final boolean[] inUse = this.data.inUse;
742        for (int i = 256; --i >= 0;) {
743            inUse[i] = false;
744        }
745
746    }
747
748    private void moveToFrontCodeAndSend() throws IOException {
749        bsW(24, this.data.origPtr);
750        generateMTFValues();
751        sendMTFValues();
752    }
753
754    private void sendMTFValues() throws IOException {
755        final byte[][] len = this.data.sendMTFValues_len;
756        final int alphaSize = this.nInUse + 2;
757
758        for (int t = N_GROUPS; --t >= 0;) {
759            final byte[] len_t = len[t];
760            for (int v = alphaSize; --v >= 0;) {
761                len_t[v] = GREATER_ICOST;
762            }
763        }
764
765        /* Decide how many coding tables to use */
766        // assert (this.nMTF > 0) : this.nMTF;
767        final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3
768            : (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6;
769
770        /* Generate an initial set of coding tables */
771        sendMTFValues0(nGroups, alphaSize);
772
773        /*
774         * Iterate up to N_ITERS times to improve the tables.
775         */
776        final int nSelectors = sendMTFValues1(nGroups, alphaSize);
777
778        /* Compute MTF values for the selectors. */
779        sendMTFValues2(nGroups, nSelectors);
780
781        /* Assign actual codes for the tables. */
782        sendMTFValues3(nGroups, alphaSize);
783
784        /* Transmit the mapping table. */
785        sendMTFValues4();
786
787        /* Now the selectors. */
788        sendMTFValues5(nGroups, nSelectors);
789
790        /* Now the coding tables. */
791        sendMTFValues6(nGroups, alphaSize);
792
793        /* And finally, the block data proper */
794        sendMTFValues7();
795    }
796
797    private void sendMTFValues0(final int nGroups, final int alphaSize) {
798        final byte[][] len = this.data.sendMTFValues_len;
799        final int[] mtfFreq = this.data.mtfFreq;
800
801        int remF = this.nMTF;
802        int gs = 0;
803
804        for (int nPart = nGroups; nPart > 0; nPart--) {
805            final int tFreq = remF / nPart;
806            int ge = gs - 1;
807            int aFreq = 0;
808
809            for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) {
810                aFreq += mtfFreq[++ge];
811            }
812
813            if ((ge > gs) && (nPart != nGroups) && (nPart != 1)
814                && (((nGroups - nPart) & 1) != 0)) {
815                aFreq -= mtfFreq[ge--];
816            }
817
818            final byte[] len_np = len[nPart - 1];
819            for (int v = alphaSize; --v >= 0;) {
820                if ((v >= gs) && (v <= ge)) {
821                    len_np[v] = LESSER_ICOST;
822                } else {
823                    len_np[v] = GREATER_ICOST;
824                }
825            }
826
827            gs = ge + 1;
828            remF -= aFreq;
829        }
830    }
831
832    private int sendMTFValues1(final int nGroups, final int alphaSize) {
833        final Data dataShadow = this.data;
834        final int[][] rfreq = dataShadow.sendMTFValues_rfreq;
835        final int[] fave = dataShadow.sendMTFValues_fave;
836        final short[] cost = dataShadow.sendMTFValues_cost;
837        final char[] sfmap = dataShadow.sfmap;
838        final byte[] selector = dataShadow.selector;
839        final byte[][] len = dataShadow.sendMTFValues_len;
840        final byte[] len_0 = len[0];
841        final byte[] len_1 = len[1];
842        final byte[] len_2 = len[2];
843        final byte[] len_3 = len[3];
844        final byte[] len_4 = len[4];
845        final byte[] len_5 = len[5];
846        final int nMTFShadow = this.nMTF;
847
848        int nSelectors = 0;
849
850        for (int iter = 0; iter < N_ITERS; iter++) {
851            for (int t = nGroups; --t >= 0;) {
852                fave[t] = 0;
853                final int[] rfreqt = rfreq[t];
854                for (int i = alphaSize; --i >= 0;) {
855                    rfreqt[i] = 0;
856                }
857            }
858
859            nSelectors = 0;
860
861            for (int gs = 0; gs < this.nMTF;) {
862                /* Set group start & end marks. */
863
864                /*
865                 * Calculate the cost of this group as coded by each of the
866                 * coding tables.
867                 */
868
869                final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
870
871                final byte mask = (byte) 0xff;
872                if (nGroups == N_GROUPS) {
873                    // unrolled version of the else-block
874
875                    short cost0 = 0;
876                    short cost1 = 0;
877                    short cost2 = 0;
878                    short cost3 = 0;
879                    short cost4 = 0;
880                    short cost5 = 0;
881
882                    for (int i = gs; i <= ge; i++) {
883                        final int icv = sfmap[i];
884                        cost0 += (short) (len_0[icv] & mask);
885                        cost1 += (short) (len_1[icv] & mask);
886                        cost2 += (short) (len_2[icv] & mask);
887                        cost3 += (short) (len_3[icv] & mask);
888                        cost4 += (short) (len_4[icv] & mask);
889                        cost5 += (short) (len_5[icv] & mask);
890                    }
891
892                    cost[0] = cost0;
893                    cost[1] = cost1;
894                    cost[2] = cost2;
895                    cost[3] = cost3;
896                    cost[4] = cost4;
897                    cost[5] = cost5;
898
899                } else {
900                    for (int t = nGroups; --t >= 0;) {
901                        cost[t] = 0;
902                    }
903
904                    for (int i = gs; i <= ge; i++) {
905                        final int icv = sfmap[i];
906                        for (int t = nGroups; --t >= 0;) {
907                            cost[t] += (short) (len[t][icv] & mask);
908                        }
909                    }
910                }
911
912                /*
913                 * Find the coding table which is best for this group, and
914                 * record its identity in the selector table.
915                 */
916                int bt = -1;
917                for (int t = nGroups, bc = 999999999; --t >= 0;) {
918                    final int cost_t = cost[t];
919                    if (cost_t < bc) {
920                        bc = cost_t;
921                        bt = t;
922                    }
923                }
924
925                fave[bt]++;
926                selector[nSelectors] = (byte) bt;
927                nSelectors++;
928
929                /*
930                 * Increment the symbol frequencies for the selected table.
931                 */
932                final int[] rfreq_bt = rfreq[bt];
933                for (int i = gs; i <= ge; i++) {
934                    rfreq_bt[sfmap[i]]++;
935                }
936
937                gs = ge + 1;
938            }
939
940            /*
941             * Recompute the tables based on the accumulated frequencies.
942             */
943            for (int t = 0; t < nGroups; t++) {
944                hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20);
945            }
946        }
947
948        return nSelectors;
949    }
950
951    private void sendMTFValues2(final int nGroups, final int nSelectors) {
952        // assert (nGroups < 8) : nGroups;
953
954        final Data dataShadow = this.data;
955        final byte[] pos = dataShadow.sendMTFValues2_pos;
956
957        for (int i = nGroups; --i >= 0;) {
958            pos[i] = (byte) i;
959        }
960
961        for (int i = 0; i < nSelectors; i++) {
962            final byte ll_i = dataShadow.selector[i];
963            byte tmp = pos[0];
964            int j = 0;
965
966            while (ll_i != tmp) {
967                j++;
968                final byte tmp2 = tmp;
969                tmp = pos[j];
970                pos[j] = tmp2;
971            }
972
973            pos[0] = tmp;
974            dataShadow.selectorMtf[i] = (byte) j;
975        }
976    }
977
978    private void sendMTFValues3(final int nGroups, final int alphaSize) {
979        final int[][] code = this.data.sendMTFValues_code;
980        final byte[][] len = this.data.sendMTFValues_len;
981
982        for (int t = 0; t < nGroups; t++) {
983            int minLen = 32;
984            int maxLen = 0;
985            final byte[] len_t = len[t];
986            for (int i = alphaSize; --i >= 0;) {
987                final int l = len_t[i] & 0xff;
988                if (l > maxLen) {
989                    maxLen = l;
990                }
991                if (l < minLen) {
992                    minLen = l;
993                }
994            }
995
996            // assert (maxLen <= 20) : maxLen;
997            // assert (minLen >= 1) : minLen;
998
999            hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
1000        }
1001    }
1002
1003    private void sendMTFValues4() throws IOException {
1004        final boolean[] inUse = this.data.inUse;
1005        final boolean[] inUse16 = this.data.sentMTFValues4_inUse16;
1006
1007        for (int i = 16; --i >= 0;) {
1008            inUse16[i] = false;
1009            final int i16 = i * 16;
1010            for (int j = 16; --j >= 0;) {
1011                if (inUse[i16 + j]) {
1012                    inUse16[i] = true;
1013                    break;
1014                }
1015            }
1016        }
1017
1018        for (int i = 0; i < 16; i++) {
1019            bsW(1, inUse16[i] ? 1 : 0);
1020        }
1021
1022        final OutputStream outShadow = this.out;
1023        int bsLiveShadow = this.bsLive;
1024        int bsBuffShadow = this.bsBuff;
1025
1026        for (int i = 0; i < 16; i++) {
1027            if (inUse16[i]) {
1028                final int i16 = i * 16;
1029                for (int j = 0; j < 16; j++) {
1030                    // inlined: bsW(1, inUse[i16 + j] ? 1 : 0);
1031                    while (bsLiveShadow >= 8) {
1032                        outShadow.write(bsBuffShadow >> 24); // write 8-bit
1033                        bsBuffShadow <<= 8;
1034                        bsLiveShadow -= 8;
1035                    }
1036                    if (inUse[i16 + j]) {
1037                        bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
1038                    }
1039                    bsLiveShadow++;
1040                }
1041            }
1042        }
1043
1044        this.bsBuff = bsBuffShadow;
1045        this.bsLive = bsLiveShadow;
1046    }
1047
1048    private void sendMTFValues5(final int nGroups, final int nSelectors)
1049        throws IOException {
1050        bsW(3, nGroups);
1051        bsW(15, nSelectors);
1052
1053        final OutputStream outShadow = this.out;
1054        final byte[] selectorMtf = this.data.selectorMtf;
1055
1056        int bsLiveShadow = this.bsLive;
1057        int bsBuffShadow = this.bsBuff;
1058
1059        for (int i = 0; i < nSelectors; i++) {
1060            for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) {
1061                // inlined: bsW(1, 1);
1062                while (bsLiveShadow >= 8) {
1063                    outShadow.write(bsBuffShadow >> 24);
1064                    bsBuffShadow <<= 8;
1065                    bsLiveShadow -= 8;
1066                }
1067                bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
1068                bsLiveShadow++;
1069            }
1070
1071            // inlined: bsW(1, 0);
1072            while (bsLiveShadow >= 8) {
1073                outShadow.write(bsBuffShadow >> 24);
1074                bsBuffShadow <<= 8;
1075                bsLiveShadow -= 8;
1076            }
1077            // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1078            bsLiveShadow++;
1079        }
1080
1081        this.bsBuff = bsBuffShadow;
1082        this.bsLive = bsLiveShadow;
1083    }
1084
1085    private void sendMTFValues6(final int nGroups, final int alphaSize)
1086        throws IOException {
1087        final byte[][] len = this.data.sendMTFValues_len;
1088        final OutputStream outShadow = this.out;
1089
1090        int bsLiveShadow = this.bsLive;
1091        int bsBuffShadow = this.bsBuff;
1092
1093        for (int t = 0; t < nGroups; t++) {
1094            final byte[] len_t = len[t];
1095            int curr = len_t[0] & 0xff;
1096
1097            // inlined: bsW(5, curr);
1098            while (bsLiveShadow >= 8) {
1099                outShadow.write(bsBuffShadow >> 24); // write 8-bit
1100                bsBuffShadow <<= 8;
1101                bsLiveShadow -= 8;
1102            }
1103            bsBuffShadow |= curr << (32 - bsLiveShadow - 5);
1104            bsLiveShadow += 5;
1105
1106            for (int i = 0; i < alphaSize; i++) {
1107                final int lti = len_t[i] & 0xff;
1108                while (curr < lti) {
1109                    // inlined: bsW(2, 2);
1110                    while (bsLiveShadow >= 8) {
1111                        outShadow.write(bsBuffShadow >> 24); // write 8-bit
1112                        bsBuffShadow <<= 8;
1113                        bsLiveShadow -= 8;
1114                    }
1115                    bsBuffShadow |= 2 << (32 - bsLiveShadow - 2);
1116                    bsLiveShadow += 2;
1117
1118                    curr++; /* 10 */
1119                }
1120
1121                while (curr > lti) {
1122                    // inlined: bsW(2, 3);
1123                    while (bsLiveShadow >= 8) {
1124                        outShadow.write(bsBuffShadow >> 24); // write 8-bit
1125                        bsBuffShadow <<= 8;
1126                        bsLiveShadow -= 8;
1127                    }
1128                    bsBuffShadow |= 3 << (32 - bsLiveShadow - 2);
1129                    bsLiveShadow += 2;
1130
1131                    curr--; /* 11 */
1132                }
1133
1134                // inlined: bsW(1, 0);
1135                while (bsLiveShadow >= 8) {
1136                    outShadow.write(bsBuffShadow >> 24); // write 8-bit
1137                    bsBuffShadow <<= 8;
1138                    bsLiveShadow -= 8;
1139                }
1140                // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1141                bsLiveShadow++;
1142            }
1143        }
1144
1145        this.bsBuff = bsBuffShadow;
1146        this.bsLive = bsLiveShadow;
1147    }
1148
1149    private void sendMTFValues7() throws IOException {
1150        final Data dataShadow = this.data;
1151        final byte[][] len = dataShadow.sendMTFValues_len;
1152        final int[][] code = dataShadow.sendMTFValues_code;
1153        final OutputStream outShadow = this.out;
1154        final byte[] selector = dataShadow.selector;
1155        final char[] sfmap = dataShadow.sfmap;
1156        final int nMTFShadow = this.nMTF;
1157
1158        int selCtr = 0;
1159
1160        int bsLiveShadow = this.bsLive;
1161        int bsBuffShadow = this.bsBuff;
1162
1163        for (int gs = 0; gs < nMTFShadow;) {
1164            final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
1165            final int selector_selCtr = selector[selCtr] & 0xff;
1166            final int[] code_selCtr = code[selector_selCtr];
1167            final byte[] len_selCtr = len[selector_selCtr];
1168
1169            while (gs <= ge) {
1170                final int sfmap_i = sfmap[gs];
1171
1172                //
1173                // inlined: bsW(len_selCtr[sfmap_i] & 0xff,
1174                // code_selCtr[sfmap_i]);
1175                //
1176                while (bsLiveShadow >= 8) {
1177                    outShadow.write(bsBuffShadow >> 24);
1178                    bsBuffShadow <<= 8;
1179                    bsLiveShadow -= 8;
1180                }
1181                final int n = len_selCtr[sfmap_i] & 0xFF;
1182                bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n);
1183                bsLiveShadow += n;
1184
1185                gs++;
1186            }
1187
1188            gs = ge + 1;
1189            selCtr++;
1190        }
1191
1192        this.bsBuff = bsBuffShadow;
1193        this.bsLive = bsLiveShadow;
1194    }
1195
1196    @Override
1197    public void write(final byte[] buf, int offs, final int len)
1198        throws IOException {
1199        if (offs < 0) {
1200            throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
1201        }
1202        if (len < 0) {
1203            throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
1204        }
1205        if (offs + len > buf.length) {
1206            throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
1207                                                + len + ") > buf.length("
1208                                                + buf.length + ").");
1209        }
1210        if (closed) {
1211            throw new IOException("Stream closed");
1212        }
1213
1214        for (final int hi = offs + len; offs < hi;) {
1215            write0(buf[offs++]);
1216        }
1217    }
1218
1219    @Override
1220    public void write(final int b) throws IOException {
1221        if (closed) {
1222            throw new IOException("Closed");
1223        }
1224        write0(b);
1225    }
1226
1227    /**
1228     * Keeps track of the last bytes written and implicitly performs
1229     * run-length encoding as the first step of the bzip2 algorithm.
1230     */
1231    private void write0(int b) throws IOException {
1232        if (this.currentChar != -1) {
1233            b &= 0xff;
1234            if (this.currentChar == b) {
1235                if (++this.runLength > 254) {
1236                    writeRun();
1237                    this.currentChar = -1;
1238                    this.runLength = 0;
1239                }
1240                // else nothing to do
1241            } else {
1242                writeRun();
1243                this.runLength = 1;
1244                this.currentChar = b;
1245            }
1246        } else {
1247            this.currentChar = b & 0xff;
1248            this.runLength++;
1249        }
1250    }
1251
1252    /**
1253     * Writes the current byte to the buffer, run-length encoding it
1254     * if it has been repeated at least four times (the first step
1255     * RLEs sequences of four identical bytes).
1256     *
1257     * <p>Flushes the current block before writing data if it is
1258     * full.</p>
1259     *
1260     * <p>"write to the buffer" means adding to data.buffer starting
1261     * two steps "after" this.last - initially starting at index 1
1262     * (not 0) - and updating this.last to point to the last index
1263     * written minus 1.</p>
1264     */
1265    private void writeRun() throws IOException {
1266        final int lastShadow = this.last;
1267
1268        if (lastShadow < this.allowableBlockSize) {
1269            final int currentCharShadow = this.currentChar;
1270            final Data dataShadow = this.data;
1271            dataShadow.inUse[currentCharShadow] = true;
1272            final byte ch = (byte) currentCharShadow;
1273
1274            int runLengthShadow = this.runLength;
1275            this.crc.updateCRC(currentCharShadow, runLengthShadow);
1276
1277            switch (runLengthShadow) {
1278            case 1:
1279                dataShadow.block[lastShadow + 2] = ch;
1280                this.last = lastShadow + 1;
1281                break;
1282
1283            case 2:
1284                dataShadow.block[lastShadow + 2] = ch;
1285                dataShadow.block[lastShadow + 3] = ch;
1286                this.last = lastShadow + 2;
1287                break;
1288
1289            case 3: {
1290                final byte[] block = dataShadow.block;
1291                block[lastShadow + 2] = ch;
1292                block[lastShadow + 3] = ch;
1293                block[lastShadow + 4] = ch;
1294                this.last = lastShadow + 3;
1295            }
1296                break;
1297
1298            default: {
1299                runLengthShadow -= 4;
1300                dataShadow.inUse[runLengthShadow] = true;
1301                final byte[] block = dataShadow.block;
1302                block[lastShadow + 2] = ch;
1303                block[lastShadow + 3] = ch;
1304                block[lastShadow + 4] = ch;
1305                block[lastShadow + 5] = ch;
1306                block[lastShadow + 6] = (byte) runLengthShadow;
1307                this.last = lastShadow + 5;
1308            }
1309                break;
1310
1311            }
1312        } else {
1313            endBlock();
1314            initBlock();
1315            writeRun();
1316        }
1317    }
1318
1319}