 * [TestBOM.java]
 * Summary: Discover handles Java handled BOMs, Byte Order Marks.
 * Copyright: (c) 2011-2017 Roedy Green, Canadian Mind Products, http://mindprod.com
 * Licence: This software may be copied and used freely for any purpose but military.
 *          http://mindprod.com/contact/nonmil.html
 * Requires: JDK 1.8+
 * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/
 * Version History:
 *  1.0 2011-11-11 initial version
package com.mindprod.example;

import com.mindprod.common18.EIO;
import com.mindprod.common18.ST;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;

import static java.lang.System.*;

 * Discover handles Java handled BOMs, Byte Order Marks.
 * @author Roedy Green, Canadian Mind Products
 * @version 1.0 2010-02-20 initial version
 * @since 2010-02-20
public final class TestBOM
     * a BOM as it appears internally
    private static final char BOM = 0xfeff;

     * Benchmark three ways of buffered writing and three ways of buffered reading.
     * @param args not used
     * @throws java.io.IOException if problem writing or reading the test file.
    public static void main( String[] args ) throws IOException
        // find out if Java filters out BOM marks in input.
        // Need to prepare a test UTF-8 encoded test file with BOM.
        // first three bytes are efbbbf.  i.e. char feff encoded in UTF-8
        File hasABOM = new File( "hasabom.txt" );
        // test reader
        FileInputStream fis = new FileInputStream( hasABOM );
        final InputStreamReader eisr = new InputStreamReader( fis, EIO.UTF8 );
        int firstChar = eisr.read();
        out.println( "With Reader: first char seen: " + Integer.toHexString( firstChar ) );
        out.println( "Expect feff, BOM as unitcode. BOM is passed through to you." );
        // test inputStream
        fis = new FileInputStream( hasABOM );
        // read entire file into a byte buffer.
        final byte[] bb = new byte[ ( int ) hasABOM.length() ];
        final int bytesRead = fis.read( bb, 0 /* offset in ba */, bb.length /* bytes to read */ );
        out.println( bytesRead + " bytes read" );
        out.println( "With InputStream: first bytes seen: "
                     + ST.toLZHexString( bb[ 0 ], 2 )
                     + ST.toLZHexString( bb[ 1 ], 2 )
                     + ST.toLZHexString( bb[ 2 ], 2 ) );
        out.println( "Expect efbbbf, BOM as utf-8. BOM is passed through to you." );
        // test String decode
        out.println( "With String constructor decode: first char seen: " + Integer.toHexString( new String( bb,
                EIO.UTF8 ).charAt( 0 ) ) );
        out.println( "Expect feff, BOM as Unicode. BOM is passed through to you." );
        // on writing, does Java pass through and/or insert BOMs?
        // test writer
        final File hasABOMviaWriter = new File( "hasabomviaWriter.txt" );
        final FileOutputStream fos = new FileOutputStream( hasABOMviaWriter, false /* append */ );
        final OutputStreamWriter eosw = new OutputStreamWriter( fos, EIO.UTF8 );
        final PrintWriter prw = new PrintWriter( eosw, false /* auto flush on println */ );
        prw.print( BOM );
        prw.println( "abc" );
        out.println( "With Writer, look at the the file hasabomviawriter.txt to see what happened" );
        out.println( "Expect efbbbf, BOM as utf-8. your BOM is passed through. No other BOM added." );
        // test string encode as UTF-8 bytes
        byte[] ee = ( BOM + "abc" ).getBytes( EIO.UTF8 );
        out.println( "With String.getBytes encode: first bytes seen: "
                     + ST.toLZHexString( ee[ 0 ], 2 )
                     + ST.toLZHexString( ee[ 1 ], 2 )
                     + ST.toLZHexString( ee[ 2 ], 2 )
                     + " "
                     + ST.toLZHexString( ee[ 3 ], 2 )
                     + ST.toLZHexString( ee[ 4 ], 2 )
                     + ST.toLZHexString( ee[ 5 ], 2 ) );
        out.println( "Expect efbbbf 616263,  BOM as utf-8. your BOM is passed through. No other BOM added." );