001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors;
020
021import java.util.Collections;
022import java.util.HashMap;
023import java.util.Locale;
024import java.util.Map;
025
026/**
027 * File name mapping code for the compression formats.
028 * @ThreadSafe
029 * @since 1.4
030 */
031public class FileNameUtil {
032
033    /**
034     * Map from common file name suffixes to the suffixes that identify compressed
035     * versions of those file types. For example: from ".tar" to ".tgz".
036     */
037    private final Map<String, String> compressSuffix =
038        new HashMap<>();
039
040    /**
041     * Map from common file name suffixes of compressed files to the
042     * corresponding suffixes of uncompressed files. For example: from
043     * ".tgz" to ".tar".
044     * <p>
045     * This map also contains format-specific suffixes like ".gz" and "-z".
046     * These suffixes are mapped to the empty string, as they should simply
047     * be removed from the file name when the file is uncompressed.
048     */
049    private final Map<String, String> uncompressSuffix;
050
051    /**
052     * Length of the longest compressed suffix.
053     */
054    private final int longestCompressedSuffix;
055
056    /**
057     * Length of the shortest compressed suffix.
058     */
059    private final int shortestCompressedSuffix;
060
061    /**
062     * Length of the longest uncompressed suffix.
063     */
064    private final int longestUncompressedSuffix;
065
066    /**
067     * Length of the shortest uncompressed suffix longer than the
068     * empty string.
069     */
070    private final int shortestUncompressedSuffix;
071
072    /**
073     * The format's default extension.
074     */
075    private final String defaultExtension;
076
077    /**
078     * sets up the utility with a map of known compressed to
079     * uncompressed suffix mappings and the default extension of the
080     * format.
081     *
082     * @param uncompressSuffix Map from common file name suffixes of
083     * compressed files to the corresponding suffixes of uncompressed
084     * files. For example: from ".tgz" to ".tar".  This map also
085     * contains format-specific suffixes like ".gz" and "-z".  These
086     * suffixes are mapped to the empty string, as they should simply
087     * be removed from the file name when the file is uncompressed.
088     *
089     * @param defaultExtension the format's default extension like ".gz"
090     */
091    public FileNameUtil(final Map<String, String> uncompressSuffix,
092                        final String defaultExtension) {
093        this.uncompressSuffix = Collections.unmodifiableMap(uncompressSuffix);
094        int lc = Integer.MIN_VALUE, sc = Integer.MAX_VALUE;
095        int lu = Integer.MIN_VALUE, su = Integer.MAX_VALUE;
096        for (final Map.Entry<String, String> ent : uncompressSuffix.entrySet()) {
097            final int cl = ent.getKey().length();
098            if (cl > lc) {
099                lc = cl;
100            }
101            if (cl < sc) {
102                sc = cl;
103            }
104
105            final String u = ent.getValue();
106            final int ul = u.length();
107            if (ul > 0) {
108                compressSuffix.computeIfAbsent(u, k -> ent.getKey());
109                if (ul > lu) {
110                    lu = ul;
111                }
112                if (ul < su) {
113                    su = ul;
114                }
115            }
116        }
117        longestCompressedSuffix = lc;
118        longestUncompressedSuffix = lu;
119        shortestCompressedSuffix = sc;
120        shortestUncompressedSuffix = su;
121        this.defaultExtension = defaultExtension;
122    }
123
124    /**
125     * Maps the given file name to the name that the file should have after
126     * compression. Common file types with custom suffixes for
127     * compressed versions are automatically detected and correctly mapped.
128     * For example the name "package.tar" is mapped to "package.tgz". If no
129     * custom mapping is applicable, then the default ".gz" suffix is appended
130     * to the file name.
131     *
132     * @param fileName name of a file
133     * @return name of the corresponding compressed file
134     */
135    public String getCompressedFilename(final String fileName) {
136        final String lower = fileName.toLowerCase(Locale.ENGLISH);
137        final int n = lower.length();
138        for (int i = shortestUncompressedSuffix;
139             i <= longestUncompressedSuffix && i < n; i++) {
140            final String suffix = compressSuffix.get(lower.substring(n - i));
141            if (suffix != null) {
142                return fileName.substring(0, n - i) + suffix;
143            }
144        }
145        // No custom suffix found, just append the default
146        return fileName + defaultExtension;
147    }
148
149    /**
150     * Maps the given name of a compressed file to the name that the
151     * file should have after uncompression. Commonly used file type specific
152     * suffixes like ".tgz" or ".svgz" are automatically detected and
153     * correctly mapped. For example the name "package.tgz" is mapped to
154     * "package.tar". And any file names with the generic ".gz" suffix
155     * (or any other generic gzip suffix) is mapped to a name without that
156     * suffix. If no format suffix is detected, then the file name is returned
157     * unmapped.
158     *
159     * @param fileName name of a file
160     * @return name of the corresponding uncompressed file
161     */
162    public String getUncompressedFilename(final String fileName) {
163        final String lower = fileName.toLowerCase(Locale.ENGLISH);
164        final int n = lower.length();
165        for (int i = shortestCompressedSuffix;
166             i <= longestCompressedSuffix && i < n; i++) {
167            final String suffix = uncompressSuffix.get(lower.substring(n - i));
168            if (suffix != null) {
169                return fileName.substring(0, n - i) + suffix;
170            }
171        }
172        return fileName;
173    }
174
175    /**
176     * Detects common format suffixes in the given file name.
177     *
178     * @param fileName name of a file
179     * @return {@code true} if the file name has a common format suffix,
180     *         {@code false} otherwise
181     */
182    public boolean isCompressedFilename(final String fileName) {
183        final String lower = fileName.toLowerCase(Locale.ENGLISH);
184        final int n = lower.length();
185        for (int i = shortestCompressedSuffix;
186             i <= longestCompressedSuffix && i < n; i++) {
187            if (uncompressSuffix.containsKey(lower.substring(n - i))) {
188                return true;
189            }
190        }
191        return false;
192    }
193}