package com.mindprod.example;
import com.mindprod.common18.EIO;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import static java.lang.System.*;
/**
* Discover how Java's use of UTF-8 conforms with Unicode standards.
*
* @author Roedy Green, Canadian Mind Products
* @version 1.0 2006-02-25
* @since 2006-02-25
*/
public final class TestUTF8
{
/**
* byte order mark as a character.
*/
private static final char BOM = ( char ) 0xfeff;
/**
* TEST strange use to check out how Sun encodes UTF-8
*/
private static final String TEST = new StringBuilder().append( "bom:" )
.append( BOM )
.append( " text:echidna" )
.append( " x0:" )
.append( ( char ) 0x00 )
.append( " xa1:" )
.append( ( char ) 0xa1 )
.append( " x100:" )
.append( ( char ) 0x100 )
.append( " x0911:" )
.append( ( char ) 0x0911 )
.append( " xffff:" )
.append( ( char ) 0xffff )
.append( " || 32 bit || x10000:" )
.appendCodePoint( 0x10000 )
.append( " x10302:" )
.appendCodePoint( 0x10302 )
.append( " x1ffff:" )
.appendCodePoint( 0x1ffff )
.append( " x100000:" )
.appendCodePoint( 0x100000 )
.append( " x10ffff:" )
.appendCodePoint( 0x10ffff )
.toString();
/**
* dump contents of file in hex
*
* @param bb ByteBuffer as raw bytes, e.g. ByteBuffer or MappedByteBuffer
*/
private static void examine( ByteBuffer bb )
{
out.println( "position: " + bb.position() );
out.println( "limit: " + bb.limit() );
out.println( "capacity: " + bb.capacity() );
int limit = bb.limit();
for ( int offset = 0; offset < limit; offset++ )
{
int c = bb.get() & 0xff;// want to view unsigned
out.printf( "%6d > %2x : %3d : %1c\n", offset, c, c, ( char ) c );
}
}
/**
* Test CharBuffer.getBytes
*
* @throws java.io.IOException on I/O failure.
*/
private static void testCharBuffer() throws IOException
{
Charset utf8 = Charset.forName( "UTF-8" );
CharsetDecoder decoder = utf8.newDecoder();
CharsetEncoder encoder = utf8.newEncoder();
ByteBuffer encoded = encoder.encode( CharBuffer.wrap( TEST ) );
CharBuffer charBuffer = decoder.decode( encoded );
String reconstitutedTest = charBuffer.toString();
if ( !reconstitutedTest.equals( TEST ) )
{
out.println( "oops: charBuffer differs from original" );
}
out.println( "<><> charBuffer <><>" );
out.println( "String length: "
+ TEST.length()
+ " UTF-8 length: "
+ encoded.limit()
+ " reconstituted length: "
+ reconstitutedTest.length() );
encoded.flip();// prepare to read
examine( encoded );
}
/**
* Test String.getBytes
*
* @throws java.io.IOException on I/O failure.
*/
private static void testGetBytes() throws IOException
{
byte[] encoded = TEST.getBytes( EIO.UTF8 );
String reconstitutedTest = new String( encoded, EIO.UTF8 );
if ( !reconstitutedTest.equals( TEST ) )
{
out.println( "oops: getBytes differs from original" );
}
out.println( "<><> getBytes <><>" );
out.println( "String length: "
+ TEST.length()
+ " UTF-8 length: "
+ encoded
.length
+ " reconstituted length: "
+ reconstitutedTest.length() );
ByteBuffer encodedBuffer = ByteBuffer.wrap( encoded );
examine( encodedBuffer );
}
/**
* Test OutputStreamWriter
*
* @throws java.io.IOException on I/O failure.
*/
private static void testOutputStreamWriter() throws IOException
{
File tempFile = File.createTempFile( "temp_", "tmp" );
FileOutputStream fos =
new FileOutputStream( tempFile, false);
OutputStreamWriter osw = new OutputStreamWriter( fos, EIO.UTF8 );
osw.write( TEST );
osw.close();
FileInputStream fis = new FileInputStream( tempFile );
InputStreamReader isw = new InputStreamReader( fis, EIO.UTF8 );
char[] cbuf = new char[ TEST.length() ];
int charsRead = isw.read( cbuf );
String reconstitutedTest = new String( cbuf, 0, charsRead );
if ( !reconstitutedTest.equals( TEST ) )
{
out.println( "oops: InputStreamReader differs from original" );
}
osw.close();
out.println( "<><> OutputStreamWriter <><>" );
out.println( "String length: "
+ TEST.length()
+ " UTF-8 length: "
+ tempFile.length()
+ " reconstituted length: "
+ reconstitutedTest.length() );
fis = new FileInputStream( tempFile );
FileChannel fc = fis.getChannel();
ByteBuffer encodedBuffer =
fc.map( FileChannel.MapMode.READ_ONLY, 0, tempFile.length() );
examine( encodedBuffer );
fc.close();
fis.close();
tempFile.delete();
}
/**
* Test DataOutputStream.writeUTF
*
* @throws java.io.IOException on I/O failure
*/
private static void testWriteUTF() throws IOException
{
File tempFile = File.createTempFile( "temp_", "tmp" );
FileOutputStream fos =
new FileOutputStream( tempFile, false);
DataOutputStream dos = new DataOutputStream( fos );
dos.writeUTF( TEST );
dos.close();
FileInputStream fis = new FileInputStream( tempFile );
DataInputStream dis = new DataInputStream( fis );
String reconstitutedTest = dis.readUTF();
if ( !reconstitutedTest.equals( TEST ) )
{
out.println( "oops: readUTF differs from original" );
}
dis.close();
out.println( "<><> DataOutputStream.writeUTF <><>" );
out.println( "String length: "
+ TEST.length()
+ " UTF-8 length: "
+ tempFile.length()
+ " reconstituted length: "
+ reconstitutedTest.length() );
fis = new FileInputStream( tempFile );
FileChannel fc = fis.getChannel();
ByteBuffer encodedBuffer =
fc.map( FileChannel.MapMode.READ_ONLY, 0, tempFile.length() );
examine( encodedBuffer );
fc.close();
fis.close();
tempFile.delete();
}
/**
* Examines Java's various UTF implementations for conformance with Unicode Standards.
*
* @param args not used
*
* @throws java.io.IOException on I/O failure
*/
public static void main( String[] args ) throws IOException
{
testCharBuffer();
testGetBytes();
testWriteUTF();
testOutputStreamWriter();
}
}