001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.output;
018
019import java.io.IOException;
020import java.io.OutputStream;
021import java.io.Writer;
022import java.nio.ByteBuffer;
023import java.nio.CharBuffer;
024import java.nio.charset.Charset;
025import java.nio.charset.CharsetDecoder;
026import java.nio.charset.CoderResult;
027import java.nio.charset.CodingErrorAction;
028import java.nio.charset.StandardCharsets;
029
030import org.apache.commons.io.Charsets;
031import org.apache.commons.io.IOUtils;
032import org.apache.commons.io.build.AbstractStreamBuilder;
033import org.apache.commons.io.charset.CharsetDecoders;
034
035/**
036 * {@link OutputStream} implementation that transforms a byte stream to a character stream using a specified charset encoding and writes the resulting stream to
037 * a {@link Writer}. The stream is transformed using a {@link CharsetDecoder} object, guaranteeing that all charset encodings supported by the JRE are handled
038 * correctly.
039 * <p>
040 * The output of the {@link CharsetDecoder} is buffered using a fixed size buffer. This implies that the data is written to the underlying {@link Writer} in
041 * chunks that are no larger than the size of this buffer. By default, the buffer is flushed only when it overflows or when {@link #flush()} or {@link #close()}
042 * is called. In general there is therefore no need to wrap the underlying {@link Writer} in a {@link java.io.BufferedWriter}. {@link WriterOutputStream} can
043 * also be instructed to flush the buffer after each write operation. In this case, all available data is written immediately to the underlying {@link Writer},
044 * implying that the current position of the {@link Writer} is correlated to the current position of the {@link WriterOutputStream}.
045 * </p>
046 * <p>
047 * {@link WriterOutputStream} implements the inverse transformation of {@link java.io.OutputStreamWriter}; in the following example, writing to {@code out2}
048 * would have the same result as writing to {@code out} directly (provided that the byte sequence is legal with respect to the charset encoding):
049 * </p>
050 * <p>
051 * To build an instance, see {@link Builder}.
052 * </p>
053 * <pre>
054 * OutputStream out = ...
055 * Charset cs = ...
056 * OutputStreamWriter writer = new OutputStreamWriter(out, cs);
057 * WriterOutputStream out2 = WriterOutputStream.builder()
058 *   .setWriter(writer)
059 *   .setCharset(cs)
060 *   .get();
061 * </pre>
062 * <p>
063 * {@link WriterOutputStream} implements the same transformation as {@link java.io.InputStreamReader}, except that the control flow is reversed: both classes
064 * transform a byte stream into a character stream, but {@link java.io.InputStreamReader} pulls data from the underlying stream, while
065 * {@link WriterOutputStream} pushes it to the underlying stream.
066 * </p>
067 * <p>
068 * Note that while there are use cases where there is no alternative to using this class, very often the need to use this class is an indication of a flaw in
069 * the design of the code. This class is typically used in situations where an existing API only accepts an {@link OutputStream} object, but where the stream is
070 * known to represent character data that must be decoded for further use.
071 * </p>
072 * <p>
073 * Instances of {@link WriterOutputStream} are not thread safe.
074 * </p>
075 *
076 * @see org.apache.commons.io.input.ReaderInputStream
077 * @since 2.0
078 */
079public class WriterOutputStream extends OutputStream {
080
081    /**
082     * Builds a new {@link WriterOutputStream} instance.
083     * <p>
084     * For example:
085     * </p>
086     * <pre>{@code
087     * WriterOutputStream s = WriterOutputStream.builder()
088     *   .setPath(path)
089     *   .setBufferSize(8192)
090     *   .setCharset(StandardCharsets.UTF_8)
091     *   .setWriteImmediately(false)
092     *   .get();}
093     * </pre>
094     *
095     * @since 2.12.0
096     */
097    public static class Builder extends AbstractStreamBuilder<WriterOutputStream, Builder> {
098
099        private CharsetDecoder charsetDecoder;
100        private boolean writeImmediately;
101
102        /**
103         * Constructs a new Builder.
104         */
105        public Builder() {
106            this.charsetDecoder = getCharset().newDecoder();
107        }
108
109        /**
110         * Constructs a new instance.
111         * <p>
112         * This builder use the aspect Writer, OpenOption[], Charset, CharsetDecoder, buffer size and writeImmediately.
113         * </p>
114         * <p>
115         * You must provide an origin that can be converted to a Writer by this builder, otherwise, this call will throw an
116         * {@link UnsupportedOperationException}.
117         * </p>
118         *
119         * @return a new instance.
120         * @throws UnsupportedOperationException if the origin cannot provide a Writer.
121         * @see #getWriter()
122         */
123        @SuppressWarnings("resource")
124        @Override
125        public WriterOutputStream get() throws IOException {
126            return new WriterOutputStream(getWriter(), charsetDecoder, getBufferSize(), writeImmediately);
127        }
128
129        @Override
130        public Builder setCharset(final Charset charset) {
131            super.setCharset(charset);
132            this.charsetDecoder = getCharset().newDecoder();
133            return this;
134        }
135
136        @Override
137        public Builder setCharset(final String charset) {
138            super.setCharset(charset);
139            this.charsetDecoder = getCharset().newDecoder();
140            return this;
141        }
142
143        /**
144         * Sets the charset decoder.
145         *
146         * @param charsetDecoder the charset decoder.
147         * @return this
148         */
149        public Builder setCharsetDecoder(final CharsetDecoder charsetDecoder) {
150            this.charsetDecoder = charsetDecoder != null ? charsetDecoder : getCharsetDefault().newDecoder();
151            super.setCharset(this.charsetDecoder.charset());
152            return this;
153        }
154
155        /**
156         * Sets whether the output buffer will be flushed after each write operation ({@code true}), i.e. all available data will be written to the underlying
157         * {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()}
158         * is called.
159         *
160         * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the
161         *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
162         *                         {@link #flush()} or {@link #close()} is called.
163         * @return this
164         */
165        public Builder setWriteImmediately(final boolean writeImmediately) {
166            this.writeImmediately = writeImmediately;
167            return this;
168        }
169
170    }
171
172    private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;
173
174    /**
175     * Constructs a new {@link Builder}.
176     *
177     * @return a new {@link Builder}.
178     * @since 2.12.0
179     */
180    public static Builder builder() {
181        return new Builder();
182    }
183
184    /**
185     * Checks if the JDK in use properly supports the given charset.
186     *
187     * @param charset the charset to check the support for
188     */
189    private static void checkIbmJdkWithBrokenUTF16(final Charset charset) {
190        if (!StandardCharsets.UTF_16.name().equals(charset.name())) {
191            return;
192        }
193        final String TEST_STRING_2 = "v\u00e9s";
194        final byte[] bytes = TEST_STRING_2.getBytes(charset);
195
196        final CharsetDecoder charsetDecoder2 = charset.newDecoder();
197        final ByteBuffer bb2 = ByteBuffer.allocate(16);
198        final CharBuffer cb2 = CharBuffer.allocate(TEST_STRING_2.length());
199        final int len = bytes.length;
200        for (int i = 0; i < len; i++) {
201            bb2.put(bytes[i]);
202            bb2.flip();
203            try {
204                charsetDecoder2.decode(bb2, cb2, i == len - 1);
205            } catch (final IllegalArgumentException e) {
206                throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
207                        + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
208            }
209            bb2.compact();
210        }
211        cb2.rewind();
212        if (!TEST_STRING_2.equals(cb2.toString())) {
213            throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
214                    + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
215        }
216
217    }
218
219    private final Writer writer;
220    private final CharsetDecoder decoder;
221
222    private final boolean writeImmediately;
223
224    /**
225     * ByteBuffer used as input for the decoder. This buffer can be small as it is used only to transfer the received data to the decoder.
226     */
227    private final ByteBuffer decoderIn = ByteBuffer.allocate(128);
228
229    /**
230     * CharBuffer used as output for the decoder. It should be somewhat larger as we write from this buffer to the underlying Writer.
231     */
232    private final CharBuffer decoderOut;
233
234    /**
235     * Constructs a new {@link WriterOutputStream} that uses the default character encoding and with a default output buffer size of {@value #BUFFER_SIZE}
236     * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} is called.
237     *
238     * @param writer the target {@link Writer}
239     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
240     */
241    @Deprecated
242    public WriterOutputStream(final Writer writer) {
243        this(writer, Charset.defaultCharset(), BUFFER_SIZE, false);
244    }
245
246    /**
247     * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
248     * when it overflows or when {@link #flush()} or {@link #close()} is called.
249     *
250     * @param writer  the target {@link Writer}
251     * @param charset the charset encoding
252     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
253     */
254    @Deprecated
255    public WriterOutputStream(final Writer writer, final Charset charset) {
256        this(writer, charset, BUFFER_SIZE, false);
257    }
258
259    /**
260     * Constructs a new {@link WriterOutputStream}.
261     *
262     * @param writer           the target {@link Writer}
263     * @param charset          the charset encoding
264     * @param bufferSize       the size of the output buffer in number of characters
265     * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the
266     *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
267     *                         {@link #flush()} or {@link #close()} is called.
268     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
269     */
270    @Deprecated
271    public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize, final boolean writeImmediately) {
272        // @formatter:off
273        this(writer,
274            Charsets.toCharset(charset).newDecoder()
275                    .onMalformedInput(CodingErrorAction.REPLACE)
276                    .onUnmappableCharacter(CodingErrorAction.REPLACE)
277                    .replaceWith("?"),
278             bufferSize,
279             writeImmediately);
280        // @formatter:on
281    }
282
283    /**
284     * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
285     * when it overflows or when {@link #flush()} or {@link #close()} is called.
286     *
287     * @param writer  the target {@link Writer}
288     * @param decoder the charset decoder
289     * @since 2.1
290     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
291     */
292    @Deprecated
293    public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) {
294        this(writer, decoder, BUFFER_SIZE, false);
295    }
296
297    /**
298     * Constructs a new {@link WriterOutputStream}.
299     *
300     * @param writer           the target {@link Writer}
301     * @param decoder          the charset decoder
302     * @param bufferSize       the size of the output buffer in number of characters
303     * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the
304     *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
305     *                         {@link #flush()} or {@link #close()} is called.
306     * @since 2.1
307     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
308     */
309    @Deprecated
310    public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize, final boolean writeImmediately) {
311        checkIbmJdkWithBrokenUTF16(CharsetDecoders.toCharsetDecoder(decoder).charset());
312        this.writer = writer;
313        this.decoder = CharsetDecoders.toCharsetDecoder(decoder);
314        this.writeImmediately = writeImmediately;
315        this.decoderOut = CharBuffer.allocate(bufferSize);
316    }
317
318    /**
319     * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
320     * when it overflows or when {@link #flush()} or {@link #close()} is called.
321     *
322     * @param writer      the target {@link Writer}
323     * @param charsetName the name of the charset encoding
324     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
325     */
326    @Deprecated
327    public WriterOutputStream(final Writer writer, final String charsetName) {
328        this(writer, charsetName, BUFFER_SIZE, false);
329    }
330
331    /**
332     * Constructs a new {@link WriterOutputStream}.
333     *
334     * @param writer           the target {@link Writer}
335     * @param charsetName      the name of the charset encoding
336     * @param bufferSize       the size of the output buffer in number of characters
337     * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the
338     *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
339     *                         {@link #flush()} or {@link #close()} is called.
340     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
341     */
342    @Deprecated
343    public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize, final boolean writeImmediately) {
344        this(writer, Charsets.toCharset(charsetName), bufferSize, writeImmediately);
345    }
346
347    /**
348     * Close the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
349     * {@link Writer#close()} will be called.
350     *
351     * @throws IOException if an I/O error occurs.
352     */
353    @Override
354    public void close() throws IOException {
355        processInput(true);
356        flushOutput();
357        writer.close();
358    }
359
360    /**
361     * Flush the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
362     * {@link Writer#flush()} will be called.
363     *
364     * @throws IOException if an I/O error occurs.
365     */
366    @Override
367    public void flush() throws IOException {
368        flushOutput();
369        writer.flush();
370    }
371
372    /**
373     * Flush the output.
374     *
375     * @throws IOException if an I/O error occurs.
376     */
377    private void flushOutput() throws IOException {
378        if (decoderOut.position() > 0) {
379            writer.write(decoderOut.array(), 0, decoderOut.position());
380            decoderOut.rewind();
381        }
382    }
383
384    /**
385     * Decode the contents of the input ByteBuffer into a CharBuffer.
386     *
387     * @param endOfInput indicates end of input
388     * @throws IOException if an I/O error occurs.
389     */
390    private void processInput(final boolean endOfInput) throws IOException {
391        // Prepare decoderIn for reading
392        decoderIn.flip();
393        CoderResult coderResult;
394        while (true) {
395            coderResult = decoder.decode(decoderIn, decoderOut, endOfInput);
396            if (coderResult.isOverflow()) {
397                flushOutput();
398            } else if (coderResult.isUnderflow()) {
399                break;
400            } else {
401                // The decoder is configured to replace malformed input and unmappable characters,
402                // so we should not get here.
403                throw new IOException("Unexpected coder result");
404            }
405        }
406        // Discard the bytes that have been read
407        decoderIn.compact();
408    }
409
410    /**
411     * Write bytes from the specified byte array to the stream.
412     *
413     * @param b the byte array containing the bytes to write
414     * @throws IOException if an I/O error occurs.
415     */
416    @Override
417    public void write(final byte[] b) throws IOException {
418        write(b, 0, b.length);
419    }
420
421    /**
422     * Write bytes from the specified byte array to the stream.
423     *
424     * @param b   the byte array containing the bytes to write
425     * @param off the start offset in the byte array
426     * @param len the number of bytes to write
427     * @throws IOException if an I/O error occurs.
428     */
429    @Override
430    public void write(final byte[] b, int off, int len) throws IOException {
431        while (len > 0) {
432            final int c = Math.min(len, decoderIn.remaining());
433            decoderIn.put(b, off, c);
434            processInput(false);
435            len -= c;
436            off += c;
437        }
438        if (writeImmediately) {
439            flushOutput();
440        }
441    }
442
443    /**
444     * Write a single byte to the stream.
445     *
446     * @param b the byte to write
447     * @throws IOException if an I/O error occurs.
448     */
449    @Override
450    public void write(final int b) throws IOException {
451        write(new byte[] { (byte) b }, 0, 1);
452    }
453}