Javolution 6.0.0 java
UTF8ByteBufferReader.java
Go to the documentation of this file.
1 /*
2  * Javolution - Java(TM) Solution for Real-Time and Embedded Systems
3  * Copyright (C) 2012 - Javolution (http://javolution.org/)
4  * All rights reserved.
5  *
6  * Permission to use, copy, modify, and distribute this software is
7  * freely granted, provided that this notice is preserved.
8  */
9 package javolution.io;
10 
11 import java.io.CharConversionException;
12 import java.io.IOException;
13 import java.io.Reader;
14 import java.nio.BufferUnderflowException;
15 import java.nio.ByteBuffer;
16 
39 public final class UTF8ByteBufferReader extends Reader {
40 
44  private ByteBuffer _byteBuffer;
45 
50 
60  public UTF8ByteBufferReader setInput(ByteBuffer byteBuffer) {
61  if (_byteBuffer != null)
62  throw new IllegalStateException("Reader not closed or reset");
63  _byteBuffer = byteBuffer;
64  return this;
65  }
66 
74  public boolean ready() throws IOException {
75  if (_byteBuffer != null) {
76  return _byteBuffer.hasRemaining();
77  } else {
78  throw new IOException("Reader closed");
79  }
80  }
81 
87  public void close() throws IOException {
88  if (_byteBuffer != null) {
89  reset();
90  }
91  }
92 
102  public int read() throws IOException {
103  if (_byteBuffer != null) {
104  if (_byteBuffer.hasRemaining()) {
105  byte b = _byteBuffer.get();
106  return (b >= 0) ? b : read2(b);
107  } else {
108  return -1;
109  }
110  } else {
111  throw new IOException("Reader closed");
112  }
113  }
114 
115  // Reads one full character, throws CharConversionException if limit reached.
116  private int read2(byte b) throws IOException {
117  try {
118  // Decodes UTF-8.
119  if ((b >= 0) && (_moreBytes == 0)) {
120  // 0xxxxxxx
121  return b;
122  } else if (((b & 0xc0) == 0x80) && (_moreBytes != 0)) {
123  // 10xxxxxx (continuation byte)
124  _code = (_code << 6) | (b & 0x3f); // Adds 6 bits to code.
125  if (--_moreBytes == 0) {
126  return _code;
127  } else {
128  return read2(_byteBuffer.get());
129  }
130  } else if (((b & 0xe0) == 0xc0) && (_moreBytes == 0)) {
131  // 110xxxxx
132  _code = b & 0x1f;
133  _moreBytes = 1;
134  return read2(_byteBuffer.get());
135  } else if (((b & 0xf0) == 0xe0) && (_moreBytes == 0)) {
136  // 1110xxxx
137  _code = b & 0x0f;
138  _moreBytes = 2;
139  return read2(_byteBuffer.get());
140  } else if (((b & 0xf8) == 0xf0) && (_moreBytes == 0)) {
141  // 11110xxx
142  _code = b & 0x07;
143  _moreBytes = 3;
144  return read2(_byteBuffer.get());
145  } else if (((b & 0xfc) == 0xf8) && (_moreBytes == 0)) {
146  // 111110xx
147  _code = b & 0x03;
148  _moreBytes = 4;
149  return read2(_byteBuffer.get());
150  } else if (((b & 0xfe) == 0xfc) && (_moreBytes == 0)) {
151  // 1111110x
152  _code = b & 0x01;
153  _moreBytes = 5;
154  return read2(_byteBuffer.get());
155  } else {
156  throw new CharConversionException("Invalid UTF-8 Encoding");
157  }
158  } catch (BufferUnderflowException e) {
159  throw new CharConversionException("Incomplete Sequence");
160  }
161  }
162 
163  private int _code;
164 
165  private int _moreBytes;
166 
181  public int read(char cbuf[], int off, int len) throws IOException {
182  if (_byteBuffer == null)
183  throw new IOException("Reader closed");
184  final int off_plus_len = off + len;
185  int remaining = _byteBuffer.remaining();
186  if (remaining <= 0)
187  return -1;
188  for (int i = off; i < off_plus_len;) {
189  if (remaining-- > 0) {
190  byte b = _byteBuffer.get();
191  if (b >= 0) {
192  cbuf[i++] = (char) b; // Most common case.
193  } else {
194  if (i < off_plus_len - 1) { // Up to two 'char' can be read.
195  int code = read2(b);
196  remaining = _byteBuffer.remaining(); // Recalculates.
197  if (code < 0x10000) {
198  cbuf[i++] = (char) code;
199  } else if (code <= 0x10ffff) { // Surrogates.
200  cbuf[i++] = (char) (((code - 0x10000) >> 10) + 0xd800);
201  cbuf[i++] = (char) (((code - 0x10000) & 0x3ff) + 0xdc00);
202  } else {
203  throw new CharConversionException(
204  "Cannot convert U+"
205  + Integer.toHexString(code)
206  + " to char (code greater than U+10FFFF)");
207  }
208  } else { // Not enough space in destination (go back).
209  _byteBuffer.position(_byteBuffer.position() - 1);
210  remaining++;
211  return i - off;
212  }
213  }
214  } else {
215  return i - off;
216  }
217  }
218  return len;
219  }
220 
231  public void read(Appendable dest) throws IOException {
232  if (_byteBuffer == null)
233  throw new IOException("Reader closed");
234  while (_byteBuffer.hasRemaining()) {
235  byte b = _byteBuffer.get();
236  if (b >= 0) {
237  dest.append((char) b); // Most common case.
238  } else {
239  int code = read2(b);
240  if (code < 0x10000) {
241  dest.append((char) code);
242  } else if (code <= 0x10ffff) { // Surrogates.
243  dest.append((char) (((code - 0x10000) >> 10) + 0xd800));
244  dest.append((char) (((code - 0x10000) & 0x3ff) + 0xdc00));
245  } else {
246  throw new CharConversionException("Cannot convert U+"
247  + Integer.toHexString(code)
248  + " to char (code greater than U+10FFFF)");
249  }
250  }
251  }
252  }
253 
254  public void reset() {
255  _byteBuffer = null;
256  _code = 0;
257  _moreBytes = 0;
258  }
259 
263  public UTF8ByteBufferReader setByteBuffer(ByteBuffer byteBuffer) {
264  return this.setInput(byteBuffer);
265  }
266 
267 }
javolution.io.UTF8ByteBufferReader._code
int _code
Definition: UTF8ByteBufferReader.java:163
javolution.io.UTF8ByteBufferReader.read2
int read2(byte b)
Definition: UTF8ByteBufferReader.java:116
javolution.io.UTF8ByteBufferReader.setByteBuffer
UTF8ByteBufferReader setByteBuffer(ByteBuffer byteBuffer)
Definition: UTF8ByteBufferReader.java:263
javolution.io.UTF8ByteBufferReader.reset
void reset()
Definition: UTF8ByteBufferReader.java:254
javolution.io.UTF8ByteBufferReader.ready
boolean ready()
Definition: UTF8ByteBufferReader.java:74
javolution.io.UTF8ByteBufferReader._moreBytes
int _moreBytes
Definition: UTF8ByteBufferReader.java:165
javolution.io.UTF8ByteBufferReader.read
int read()
Definition: UTF8ByteBufferReader.java:102
javolution.io.UTF8ByteBufferReader.setInput
UTF8ByteBufferReader setInput(ByteBuffer byteBuffer)
Definition: UTF8ByteBufferReader.java:60
javolution.io.UTF8ByteBufferReader._byteBuffer
ByteBuffer _byteBuffer
Definition: UTF8ByteBufferReader.java:44
javolution.io.UTF8ByteBufferReader
Definition: UTF8ByteBufferReader.java:39
javolution.io.UTF8ByteBufferReader.read
void read(Appendable dest)
Definition: UTF8ByteBufferReader.java:231
javolution.io.UTF8ByteBufferReader.close
void close()
Definition: UTF8ByteBufferReader.java:87
javolution.io.UTF8ByteBufferReader.UTF8ByteBufferReader
UTF8ByteBufferReader()
Definition: UTF8ByteBufferReader.java:49
javolution.io.UTF8ByteBufferReader.read
int read(char cbuf[], int off, int len)
Definition: UTF8ByteBufferReader.java:181