001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.lzw;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.nio.ByteOrder;
024
025import org.apache.commons.compress.MemoryLimitException;
026import org.apache.commons.compress.compressors.CompressorInputStream;
027import org.apache.commons.compress.utils.BitInputStream;
028import org.apache.commons.compress.utils.InputStreamStatistics;
029
030/**
031 * <p>Generic LZW implementation. It is used internally for
032 * the Z decompressor and the Unshrinking Zip file compression method,
033 * but may be useful for third-party projects in implementing their own LZW variations.</p>
034 *
035 * @NotThreadSafe
036 * @since 1.10
037 */
038public abstract class LZWInputStream extends CompressorInputStream implements InputStreamStatistics {
039    protected static final int DEFAULT_CODE_SIZE = 9;
040    protected static final int UNUSED_PREFIX = -1;
041
042    private final byte[] oneByte = new byte[1];
043
044    protected final BitInputStream in;
045    private int clearCode = -1;
046    private int codeSize = DEFAULT_CODE_SIZE;
047    private byte previousCodeFirstChar;
048    private int previousCode = UNUSED_PREFIX;
049    private int tableSize;
050    private int[] prefixes;
051    private byte[] characters;
052    private byte[] outputStack;
053    private int outputStackLocation;
054
055    protected LZWInputStream(final InputStream inputStream, final ByteOrder byteOrder) {
056        this.in = new BitInputStream(inputStream, byteOrder);
057    }
058
059    /**
060     * Add a new entry to the dictionary.
061     * @param previousCode the previous code
062     * @param character the next character to append
063     * @return the new code
064     * @throws IOException on error
065     */
066    protected abstract int addEntry(int previousCode, byte character)
067        throws IOException;
068
069    /**
070     * Adds a new entry if the maximum table size hasn't been exceeded
071     * and returns the new index.
072     * @param previousCode the previous code
073     * @param character the character to append
074     * @param maxTableSize the maximum table size
075     * @return the new code or -1 if maxTableSize has been reached already
076     */
077    protected int addEntry(final int previousCode, final byte character, final int maxTableSize) {
078        if (tableSize < maxTableSize) {
079            prefixes[tableSize] = previousCode;
080            characters[tableSize] = character;
081            return tableSize++;
082        }
083        return -1;
084    }
085
086    /**
087     * Add entry for repeat of previousCode we haven't added, yet.
088     * @return new code for a repeat of the previous code or -1 if
089     * maxTableSize has been reached already
090     * @throws IOException on error
091     */
092    protected int addRepeatOfPreviousCode() throws IOException {
093        if (previousCode == -1) {
094            // can't have a repeat for the very first code
095            throw new IOException("The first code can't be a reference to its preceding code");
096        }
097        return addEntry(previousCode, previousCodeFirstChar);
098    }
099
100    @Override
101    public void close() throws IOException {
102        in.close();
103    }
104
105    /**
106     * Read the next code and expand it.
107     * @return the expanded next code, negative on EOF
108     * @throws IOException on error
109     */
110    protected abstract int decompressNextSymbol() throws IOException;
111
112    /**
113     * Expands the entry with index code to the output stack and may
114     * create a new entry
115     * @param code the code
116     * @param addedUnfinishedEntry whether unfinished entries have been added
117     * @return the new location of the output stack
118     * @throws IOException on error
119     */
120    protected int expandCodeToOutputStack(final int code, final boolean addedUnfinishedEntry)
121        throws IOException {
122        for (int entry = code; entry >= 0; entry = prefixes[entry]) {
123            outputStack[--outputStackLocation] = characters[entry];
124        }
125        if (previousCode != -1 && !addedUnfinishedEntry) {
126            addEntry(previousCode, outputStack[outputStackLocation]);
127        }
128        previousCode = code;
129        previousCodeFirstChar = outputStack[outputStackLocation];
130        return outputStackLocation;
131    }
132
133    protected int getClearCode() {
134        return clearCode;
135    }
136
137    protected int getCodeSize() {
138        return codeSize;
139    }
140
141    /**
142     * @since 1.17
143     */
144    @Override
145    public long getCompressedCount() {
146        return in.getBytesRead();
147    }
148
149    protected int getPrefix(final int offset) {
150        return prefixes[offset];
151    }
152
153    protected int getPrefixesLength() {
154        return prefixes.length;
155    }
156
157    protected int getTableSize() {
158        return tableSize;
159    }
160
161    protected void incrementCodeSize() {
162        codeSize++;
163    }
164
165    /**
166     * Initializes the arrays based on the maximum code size.
167     *
168     * @param maxCodeSize maximum code size
169     * @throws IllegalArgumentException if {@code maxCodeSize} is out of bounds for {@code prefixes} and {@code characters}.
170     */
171    protected void initializeTables(final int maxCodeSize) {
172        // maxCodeSize shifted cannot be less than 256, otherwise the loop in initializeTables() will throw an ArrayIndexOutOfBoundsException
173        // maxCodeSize cannot be smaller than getCodeSize(), otherwise addEntry() will throw an ArrayIndexOutOfBoundsException
174        if (1 << maxCodeSize < 256 || getCodeSize() > maxCodeSize) {
175            // TODO test against prefixes.length and characters.length?
176            throw new IllegalArgumentException("maxCodeSize " + maxCodeSize + " is out of bounds.");
177        }
178        final int maxTableSize = 1 << maxCodeSize;
179        prefixes = new int[maxTableSize];
180        characters = new byte[maxTableSize];
181        outputStack = new byte[maxTableSize];
182        outputStackLocation = maxTableSize;
183        final int max = 1 << 8;
184        for (int i = 0; i < max; i++) {
185            prefixes[i] = -1;
186            characters[i] = (byte) i;
187        }
188    }
189
190    /**
191     * Initializes the arrays based on the maximum code size.
192     * First checks that the estimated memory usage is below memoryLimitInKb
193     *
194     * @param maxCodeSize maximum code size
195     * @param memoryLimitInKb maximum allowed estimated memory usage in Kb
196     * @throws MemoryLimitException if estimated memory usage is greater than memoryLimitInKb
197     * @throws IllegalArgumentException if {@code maxCodeSize} is not bigger than 0
198     */
199    protected void initializeTables(final int maxCodeSize, final int memoryLimitInKb)
200            throws MemoryLimitException {
201        if (maxCodeSize <= 0) {
202            throw new IllegalArgumentException("maxCodeSize is " + maxCodeSize
203                + ", must be bigger than 0");
204        }
205
206        if (memoryLimitInKb > -1) {
207            final int maxTableSize = 1 << maxCodeSize;
208            //account for potential overflow
209            final long memoryUsageInBytes = (long) maxTableSize * 6; //(4 (prefixes) + 1 (characters) +1 (outputStack))
210            final long memoryUsageInKb = memoryUsageInBytes >> 10;
211
212            if (memoryUsageInKb > memoryLimitInKb) {
213                throw new MemoryLimitException(memoryUsageInKb, memoryLimitInKb);
214            }
215        }
216        initializeTables(maxCodeSize);
217    }
218
219    @Override
220    public int read() throws IOException {
221        final int ret = read(oneByte);
222        if (ret < 0) {
223            return ret;
224        }
225        return 0xff & oneByte[0];
226    }
227
228    @Override
229    public int read(final byte[] b, final int off, final int len) throws IOException {
230        if (len == 0) {
231            return 0;
232        }
233        int bytesRead = readFromStack(b, off, len);
234        while (len - bytesRead > 0) {
235            final int result = decompressNextSymbol();
236            if (result < 0) {
237                if (bytesRead > 0) {
238                    count(bytesRead);
239                    return bytesRead;
240                }
241                return result;
242            }
243            bytesRead += readFromStack(b, off + bytesRead, len - bytesRead);
244        }
245        count(bytesRead);
246        return bytesRead;
247    }
248
249    private int readFromStack(final byte[] b, final int off, final int len) {
250        final int remainingInStack = outputStack.length - outputStackLocation;
251        if (remainingInStack > 0) {
252            final int maxLength = Math.min(remainingInStack, len);
253            System.arraycopy(outputStack, outputStackLocation, b, off, maxLength);
254            outputStackLocation += maxLength;
255            return maxLength;
256        }
257        return 0;
258    }
259
260    /**
261     * Reads the next code from the stream.
262     * @return the next code
263     * @throws IOException on error
264     */
265    protected int readNextCode() throws IOException {
266        if (codeSize > 31) {
267            throw new IllegalArgumentException("Code size must not be bigger than 31");
268        }
269        return (int) in.readBits(codeSize);
270    }
271
272    protected void resetCodeSize() {
273        setCodeSize(DEFAULT_CODE_SIZE);
274    }
275
276    protected void resetPreviousCode() {
277        this.previousCode = -1;
278    }
279
280    /**
281     * Sets the clear code based on the code size.
282     * @param codeSize code size
283     */
284    protected void setClearCode(final int codeSize) {
285        clearCode = 1 << codeSize - 1;
286    }
287
288    protected void setCodeSize(final int cs) {
289        this.codeSize = cs;
290    }
291
292    protected void setPrefix(final int offset, final int value) {
293        prefixes[offset] = value;
294    }
295
296    protected void setTableSize(final int newSize) {
297        tableSize = newSize;
298    }
299
300}