Javolution 6.0.0 java
UTF8StreamReader.java
Go to the documentation of this file.
1 /*
2  * Javolution - Java(TM) Solution for Real-Time and Embedded Systems
3  * Copyright (C) 2012 - Javolution (http://javolution.org/)
4  * All rights reserved.
5  *
6  * Permission to use, copy, modify, and distribute this software is
7  * freely granted, provided that this notice is preserved.
8  */
9 package javolution.io;
10 
11 import java.io.CharConversionException;
12 import java.io.IOException;
13 import java.io.InputStream;
14 import java.io.Reader;
15 
44 public final class UTF8StreamReader extends Reader {
45 
49  private InputStream _inputStream;
50 
54  private int _start;
55 
59  private int _end;
60 
64  private final byte[] _bytes;
65 
69  public UTF8StreamReader() {
70  _bytes = new byte[2048];
71  }
72 
78  public UTF8StreamReader(int capacity) {
79  _bytes = new byte[capacity];
80  }
81 
95  public UTF8StreamReader setInput(InputStream inStream) {
96  if (_inputStream != null)
97  throw new IllegalStateException("Reader not closed or reset");
98  _inputStream = inStream;
99  return this;
100  }
101 
109  public boolean ready() throws IOException {
110  if (_inputStream == null)
111  throw new IOException("Stream closed");
112  return ((_end - _start) > 0) || (_inputStream.available() != 0);
113  }
114 
120  public void close() throws IOException {
121  if (_inputStream != null) {
122  _inputStream.close();
123  reset();
124  }
125  }
126 
135  public int read() throws IOException {
136  byte b = _bytes[_start];
137  return ((b >= 0) && (_start++ < _end)) ? b : read2();
138  }
139 
140  // Reads one full character, blocks if necessary.
141  private int read2() throws IOException {
142  if (_start < _end) {
143  byte b = _bytes[_start++];
144 
145  // Decodes UTF-8.
146  if ((b >= 0) && (_moreBytes == 0)) {
147  // 0xxxxxxx
148  return b;
149  } else if (((b & 0xc0) == 0x80) && (_moreBytes != 0)) {
150  // 10xxxxxx (continuation byte)
151  _code = (_code << 6) | (b & 0x3f); // Adds 6 bits to code.
152  if (--_moreBytes == 0) {
153  return _code;
154  } else {
155  return read2();
156  }
157  } else if (((b & 0xe0) == 0xc0) && (_moreBytes == 0)) {
158  // 110xxxxx
159  _code = b & 0x1f;
160  _moreBytes = 1;
161  return read2();
162  } else if (((b & 0xf0) == 0xe0) && (_moreBytes == 0)) {
163  // 1110xxxx
164  _code = b & 0x0f;
165  _moreBytes = 2;
166  return read2();
167  } else if (((b & 0xf8) == 0xf0) && (_moreBytes == 0)) {
168  // 11110xxx
169  _code = b & 0x07;
170  _moreBytes = 3;
171  return read2();
172  } else if (((b & 0xfc) == 0xf8) && (_moreBytes == 0)) {
173  // 111110xx
174  _code = b & 0x03;
175  _moreBytes = 4;
176  return read2();
177  } else if (((b & 0xfe) == 0xfc) && (_moreBytes == 0)) {
178  // 1111110x
179  _code = b & 0x01;
180  _moreBytes = 5;
181  return read2();
182  } else {
183  throw new CharConversionException("Invalid UTF-8 Encoding");
184  }
185  } else { // No more bytes in buffer.
186  if (_inputStream == null)
187  throw new IOException("No input stream or stream closed");
188  _start = 0;
189  _end = _inputStream.read(_bytes, 0, _bytes.length);
190  if (_end > 0) {
191  return read2(); // Continues.
192  } else { // Done.
193  if (_moreBytes == 0) {
194  return -1;
195  } else { // Incomplete sequence.
196  throw new CharConversionException(
197  "Unexpected end of stream");
198  }
199  }
200  }
201  }
202 
203  private int _code;
204 
205  private int _moreBytes;
206 
222  public int read(char cbuf[], int off, int len) throws IOException {
223  if (_inputStream == null)
224  throw new IOException("No input stream or stream closed");
225  if (_start >= _end) { // Fills buffer.
226  _start = 0;
227  _end = _inputStream.read(_bytes, 0, _bytes.length);
228  if (_end <= 0) { // Done.
229  return _end;
230  }
231  }
232  final int off_plus_len = off + len;
233  for (int i = off; i < off_plus_len;) {
234  // assert(_start < _end)
235  byte b = _bytes[_start];
236  if ((b >= 0) && (++_start < _end)) {
237  cbuf[i++] = (char) b; // Most common case.
238  } else if (b < 0) {
239  if (i < off_plus_len - 1) { // Up to two 'char' can be read.
240  int code = read2();
241  if (code < 0x10000) {
242  cbuf[i++] = (char) code;
243  } else if (code <= 0x10ffff) { // Surrogates.
244  cbuf[i++] = (char) (((code - 0x10000) >> 10) + 0xd800);
245  cbuf[i++] = (char) (((code - 0x10000) & 0x3ff) + 0xdc00);
246  } else {
247  throw new CharConversionException("Cannot convert U+"
248  + Integer.toHexString(code)
249  + " to char (code greater than U+10FFFF)");
250  }
251  if (_start < _end) {
252  continue;
253  }
254  }
255  return i - off;
256  } else { // End of buffer (_start >= _end).
257  cbuf[i++] = (char) b;
258  return i - off;
259  }
260  }
261  return len;
262  }
263 
271  public void read(Appendable dest) throws IOException {
272  if (_inputStream == null)
273  throw new IOException("No input stream or stream closed");
274  while (true) {
275  if (_start >= _end) { // Fills buffer.
276  _start = 0;
277  _end = _inputStream.read(_bytes, 0, _bytes.length);
278  if (_end <= 0) { // Done.
279  break;
280  }
281  }
282  byte b = _bytes[_start];
283  if (b >= 0) {
284  dest.append((char) b); // Most common case.
285  _start++;
286  } else {
287  int code = read2();
288  if (code < 0x10000) {
289  dest.append((char) code);
290  } else if (code <= 0x10ffff) { // Surrogates.
291  dest.append((char) (((code - 0x10000) >> 10) + 0xd800));
292  dest.append((char) (((code - 0x10000) & 0x3ff) + 0xdc00));
293  } else {
294  throw new CharConversionException("Cannot convert U+"
295  + Integer.toHexString(code)
296  + " to char (code greater than U+10FFFF)");
297  }
298  }
299  }
300  }
301 
302  public void reset() {
303  _code = 0;
304  _end = 0;
305  _inputStream = null;
306  _moreBytes = 0;
307  _start = 0;
308  }
309 
313  public UTF8StreamReader setInputStream(InputStream inStream) {
314  return this.setInput(inStream);
315  }
316 }
javolution.io.UTF8StreamReader.close
void close()
Definition: UTF8StreamReader.java:120
javolution.io.UTF8StreamReader.reset
void reset()
Definition: UTF8StreamReader.java:302
javolution.io.UTF8StreamReader.UTF8StreamReader
UTF8StreamReader(int capacity)
Definition: UTF8StreamReader.java:78
javolution.io.UTF8StreamReader.setInputStream
UTF8StreamReader setInputStream(InputStream inStream)
Definition: UTF8StreamReader.java:313
javolution.io.UTF8StreamReader
Definition: UTF8StreamReader.java:44
javolution.io.UTF8StreamReader.read
void read(Appendable dest)
Definition: UTF8StreamReader.java:271
javolution.io.UTF8StreamReader.read
int read()
Definition: UTF8StreamReader.java:135
javolution.io.UTF8StreamReader.UTF8StreamReader
UTF8StreamReader()
Definition: UTF8StreamReader.java:69
javolution.io.UTF8StreamReader._moreBytes
int _moreBytes
Definition: UTF8StreamReader.java:205
javolution.io.UTF8StreamReader.read2
int read2()
Definition: UTF8StreamReader.java:141
javolution.io.UTF8StreamReader._code
int _code
Definition: UTF8StreamReader.java:203
javolution.io.UTF8StreamReader.setInput
UTF8StreamReader setInput(InputStream inStream)
Definition: UTF8StreamReader.java:95
javolution.io.UTF8StreamReader._end
int _end
Definition: UTF8StreamReader.java:59
javolution.io.UTF8StreamReader._bytes
final byte[] _bytes
Definition: UTF8StreamReader.java:64
javolution.io.UTF8StreamReader._start
int _start
Definition: UTF8StreamReader.java:54
javolution.io.UTF8StreamReader.ready
boolean ready()
Definition: UTF8StreamReader.java:109
javolution.io.UTF8StreamReader.read
int read(char cbuf[], int off, int len)
Definition: UTF8StreamReader.java:222
javolution.io.UTF8StreamReader._inputStream
InputStream _inputStream
Definition: UTF8StreamReader.java:49