/*
 * [Asciitab.java]
 *
 * Summary: prepares ASCII table in HTML.
 *
 * Copyright: (c) 2001-2017 Roedy Green, Canadian Mind Products, http://mindprod.com
 *
 * Licence: This software may be copied and used freely for any purpose but military.
 *          http://mindprod.com/contact/nonmil.html
 *
 * Requires: JDK 1.8+
 *
 * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/
 *
 * Version History:
 *  1.1 2009-03-22 add style markup, character entities
 *  1.2 2011-11-23 integrate into Palette
 */
package com.mindprod.palette;

import com.mindprod.common18.BigDate;
import com.mindprod.entities.EntifyStrings;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;

import static java.lang.System.*;

/**
 * prepares ASCII table in HTML.
 * <p/>
 * Does not run standalone. run when Palette is run.
 *
 * @author Roedy Green, Canadian Mind Products
 * @version 1.2 2011-11-23 integrate into Palette
 * @since 2001
 */
final class Asciitab
    {
    private static final String DO_NOT_EDIT = "<!-- The following table was generated on " + BigDate.localToday().toString() + " by AsciiTab. D o   n o t   e d i t . -->\n";

    /**
     * how you display this character in HTML
     *
     * @param c the character you want to render
     *
     * @return html for rendering the character
     */
    private static String asHTML( char c )
        {
        if ( c < 32 )
            {
            /* control char */
            return "&#x" + toLZ( c + 0x2400, 4, 16 ) + ";";
            }
        else if ( c > 126 )
            {
            /* high ascii */
            // decimal form: "&#" + Integer.toString( c ) + ";";
            return EntifyStrings.entifyHTML( String.valueOf( c ) );
            }
        else
            {
            switch ( c )
                {
                case 32:
                    return "&nbsp;";
                case 38:
                    return "&amp;";
                case 34:
                    return "&quot;";
                case 60:
                    return "&lt;";
                case 62:
                    return "&gt;";
                /* ordinary char */
                default:
                    return String.valueOf( c );
                }
            }
        }

    /**
     * how you display this character in HTML, quoted so you can see the code
     *
     * @param c the character you want to render
     *
     * @return html for rendering the character
     */
    private static String asQuotedHTML( char c )
        {
        String result = asHTML( c );
        if ( result.charAt( 0 ) == '&' )
            {
            return "&amp;" + result.substring( 1 );
            }
        else
            {
            return result;
            }
        }

    /**
     * Get description of a character
     *
     * @param c character to describe
     *
     * @return string destribing character, or &nbsp; if none available.
     */
    @SuppressWarnings( { "OctalInteger" } )
    private static String descs( char c )
        {
        if ( 'A' <= c && c <= 'Z' )
            {
            return "upper case " + String.valueOf( c );
            }
        else if ( 'a' <= c && c <= 'z' )
            {
            return "lower case " + String.valueOf( c );
            }
        else if ( '0' <= c && c <= '9' )
            {
            return "digit " + String.valueOf( c );
            }
        else
            {
            switch ( c )
                {
                case 0:
                    return "^@ <!-- macro Acronym NUL_ctrl -->";
                case 1:
                    return "^A <!-- macro Acronym SOH_ctrl -->";
                case 2:
                    return "^B <!-- macro Acronym STX_ctrl -->";
                case 3:
                    return "^C <!-- macro Acronym ETX_ctrl -->";
                case 4:
                    return "^D <!-- macro Acronym EOT_ctrl -->";
                case 5:
                    return "^E <!-- macro Acronym ENQ_ctrl -->";
                case 6:
                    return "^F <!-- macro Acronym ACK_ctrl -->";
                case 7:
                    return "^G <!-- macro Acronym BEL_ctrl -->";
                case 8:
                    return "^H <!-- macro Acronym BS_ctrl --> [\\b]";
                case 9:
                    return "^I <!-- macro Acronym HT_ctrl --> [\\t]";
                case 10:
                    return "^J <!-- macro Acronym LF_ctrl --> [\\n]";
                case 11:
                    return "^K <!-- macro Acronym VT_ctrl -->";
                case 12:
                    return "^L <!-- macro Acronym FF_ctrl --> [\\f]";
                case 13:
                    return "^M <!-- macro Acronym CR_ctrl --> [\\r]";
                case 14:
                    return "^N <!-- macro Acronym SO_ctrl -->";
                case 15:
                    return "^O <!-- macro Acronym SI_ctrl -->";
                case 16:
                    return "^P <!-- macro Acronym DLE_ctrl -->";
                case 17:
                    return "^Q <!-- macro Acronym DC1 -->, XON resume transmission";
                case 18:
                    return "^R <!-- macro Acronym DC2 -->";
                case 19:
                    return "^S <!-- macro Acronym DC3 -->, XOFF pause transmission";
                case 20:
                    return "^T <!-- macro Acronym DC4 -->";
                case 21:
                    return "^U <!-- macro Acronym NAK_ctrl -->";
                case 22:
                    return "^V <!-- macro Acronym SYN_ctrl -->";
                case 23:
                    return "^W <!-- macro Acronym ETB_ctrl -->";
                case 24:
                    return "^X <!-- macro Acronym CAN_ctrl -->";
                case 25:
                    return "^Y <!-- macro Acronym EM_ctrl -->";
                case 26:
                    return "^Z <!-- macro Acronym SUB_ctrl -->";
                case 27:
                    return "^[ <!-- macro Acronym ESC_ctrl -->";
                case 28:
                    return "^\\ <!-- macro Acronym FS_ctrl -->, originally used to separate files in a stream.";
                case 29:
                    return "^] <!-- macro Acronym GS_ctrl -->, originally used to separate groups of similar records " +
                           "(tables) in a stream";
                case 30:
                    return "^^ <!-- macro Acronym RS_ctrl -->, originally used to separate records.";
                case 31:
                    return "^_ <!-- macro Acronym US_ctrl -->, originally used to separate fields (or units as they were" +
                           " once called).";
                case 32:
                    return "space";
                case 33:
                    return "bang, exclamation";
                case 34:
                    return "quote";
                case 35:
                    return "sharp, number sign";
                case 36:
                    return "dollar sign";
                case 37:
                    return "percent";
                case 38:
                    return "ampersand";
                case 39:
                    return "apostrophe";
                case 40:
                    return "left parenthesis";
                case 41:
                    return "right parenthesis";
                case 42:
                    return "star, asterisk";
                case 43:
                    return "plus";
                case 44:
                    return "comma";
                case 45:
                    return "minus";
                case 46:
                    return "period";
                case 47:
                    return "slash, ,<strong>not backslash!</strong>";
                case 58:
                    return "colon";
                case 59:
                    return "semicolon";
                case 60:
                    return "less than";
                case 61:
                    return "equals";
                case 62:
                    return "greater than";
                case 63:
                    return "question mark";
                case 64:
                    return "at sign";
                case 91:
                    return "left square bracket";
                case 92:
                    return "backslash, <strong>not slash!</strong>";
                case 93:
                    return "right square bracket";
                case 94:
                    return "hat, circumflex";
                case 95:
                    return "underscore";
                case 96:
                    return "grave, rhymes with have";
                case 123:
                    return "left curly brace";
                case 124:
                    return "vertical bar";
                case 125:
                    return "right curly brace";
                case 126:
                    return "tilde";
                case 127:
                    return "<!-- macro Acronym DEL_ctrl -->";
                // the following are OCTAL because that's  how PostScript specifies them
                // &xxx; codes are for Latin-1
                case 0241:
                    return "PostScript (&iexcl;) exclamdown";
                case 0242:
                    return "PostScript (&cent;) cent";
                case 0243:
                    return "PostScript (&pound;) sterling";
                case 0244:
                    return "PostScript (/) fraction";
                case 0245:
                    return "PostScript (&yen;) yen";
                case 0246:
                    return "PostScript (&#131;) florin";
                case 0247:
                    return "PostScript (&sect;) section";
                case 0250:
                    return "PostScript (&curren;) currency";
                case 0251:
                    return "PostScript (') quotesingle";
                case 0252:
                    return "PostScript (&#147;) quotedblleft";
                case 0253:
                    return "PostScript (&laquo;) guillemotleft";
                case 0254:
                    return "PostScript (&lt;) guilsinglleft";
                case 0255:
                    return "PostScript (&gt;) guilsinglright";
                case 0256:
                    return "PostScript fi ligature";
                case 0257:
                    return "PostScript fl ligature;";
                case 0261:
                    return "PostScript (&#150;) endash";
                case 0262:
                    return "PostScript (&#134;) dagger";
                case 0263:
                    return "PostScript (&middot;) periodcentered";
                case 0266:
                    return "PostScript (&para;) paragraph";
                case 0267:
                    return "PostScript (&#149;) bullet";
                case 0270:
                    return "PostScript (,) quotesinglbase";
                case 0271:
                    return "PostScript (&#132;) quotedblbase";
                case 0272:
                    return "PostScript (&#148;) quotedblright";
                case 0273:
                    return "PostScript (&raquo;) guillemotright";
                case 0274:
                    return "PostScript (&#133;) ellipsis";
                case 0275:
                    return "PostScript (&#137;) perthousand";
                case 0277:
                    return "PostScript (&iquest;) questiondown";
                case 0301:
                    return "PostScript (`) grave";
                case 0302:
                    return "PostScript (&acute;) acute";
                case 0303:
                    return "PostScript (^) circumflex";
                case 0304:
                    return "PostScript (~) tilde";
                case 0305:
                    return "PostScript (&macr;) macron, overbar accent";
                case 0306:
                    return "PostScript (<sup>u</sup>) breve, flattened u-shaped accent";
                case 0307:
                    return "PostScript (&#183;) dotaccent";
                case 0310:
                    return "PostScript (&uml;) dieresis";
                case 0312:
                    return "PostScript (&#176;) ring";
                case 0313:
                    return "PostScript (&cedil;) cedilla";
                case 0315:
                    return "PostScript (&#148;) hungarumlaut";
                case 0316:
                    return "PostScript (,) ogonek, reverse comma";
                case 0317:
                    return "PostScript (<sup>v</sup>) caron, flattened v-shaped accent";
                case 0320:
                    return "PostScript (&#151;) emdash";
                case 0341:
                    return "PostScript (&AElig;) AE";
                case 0343:
                    return "PostScript (&ordf;) ordfeminine";
                case 0350:
                    return "PostScript (L/) Lslash, L with / overstrike";
                case 0351:
                    return "PostScript (&Oslash;) Oslash";
                case 0352:
                    return "PostScript (&#140;) OE";
                case 0353:
                    return "PostScript (&ordm;) ordmasculine";
                case 0361:
                    return "PostScript (&aelig;) ae";
                case 0365:
                    return "PostScript (1) dotlessi, i without dot";
                case 0370:
                    return "PostScript (l/) l with / overstrike";
                case 0371:
                    return "PostScript (&oslash;) oslash";
                case 0372:
                    return "PostScript (&#156;) oe";
                case 0373:
                    return "PostScript (&szlig;) germandbls";
                default:
                    return "&nbsp;";
                }
            }
        }

    /**
     * Generate ascii table
     *
     * @param dir      target dir where generated file goes.
     * @param filename filename where to put the generated table
     */
    @SuppressWarnings( { "SameParameterValue" } )
    static void generateAsciitab( final File dir, final String filename )
        {
        try
            {
            FileWriter p = new FileWriter( new File( dir, filename ) );
            p.write( "\n" );
            p.write( DO_NOT_EDIT +
                     "<table class=\"standard\"><caption class=\"hidden\">ASCII and Latin-1 Character Table</caption>\n" );
            p.write( "<colgroup><col style=\"text-align:center\" class=\"behold\"><col span=\"3\"" +
                     " style=\"text-align:right\" ><col span=\"2\"" +
                     " style=\"text-align:left\"></colgroup>\n" );
            p.write( "<thead><tr><th colspan=\"6\">ASCII and Latin-1 Character Table</th></tr>" );
            p.write( "<tr><th>Char</th><th>Dec</th><th>Hex</th><th>Octal</th><th>HTML</th><th>Notes</th></tr></thead" +
                     ">\n<tbody>\n" );
            int everyTenthLine = 0;
            for ( char c = 0; c <= 0xff; c++ )
                {
                p.write( "<tr>" );
                /* displayable char */
                p.write( "<td class=\"behold\">" );
                if ( !isControlChar( c ) )
                    {
                    p.write( asHTML( c ) );   // avoid generated raw unprintable char
                    }
                p.write( "</td>" );
                /* decimal */
                p.write( "<td class=\"decimal\">" );
                p.write( Integer.toString( c ) );
                p.write( "</td>" );
                /* hex */
                p.write( "<td class=\"hexentity\">" );
                p.write( "0x" + toLZ( c, 2, 16 ) );
                p.write( "</td>" );
                /* octal */
                p.write( "<td class=\"octal\">" );
                p.write( toLZ( c, 4, 8 ) );
                p.write( "</td>" );
                /* HTML entity */
                p.write( "<td class=\"entity\">" );
                p.write( asQuotedHTML( c ) );
                p.write( "</td>" );
                /* notes */
                p.write( "<td>" );
                p.write( descs( c ) );
                p.write( "</td>" );
                p.write( "</tr>\n" );
                } // end for
            p.write( "</tbody></table>" );
            p.write( "\n" );
            p.close();
            }
        catch ( IOException e )
            {
            err.println( e.getMessage() );
            }
        }

    /**
     * is this char an unprintable control char?
     *
     * @param theCharNumber
     *
     * @return true if is a control char
     */
    private static boolean isControlChar( int theCharNumber )
        {
        // leave low control chars. They have special glyphms
        return 127 <= theCharNumber && theCharNumber <= 159;
        }

    /**
     * Convert an integer to a String, with left zeroes.
     *
     * @param i    the integer to be converted
     * @param len  len the length of the resulting string
     * @param base base 10=decimal 16=hex 8=octal
     *
     * @return String representation of the int e.g. 007
     */
    private static String toLZ( int i, int len, int base )
        {
        // Since String is final, we could not add this method there.
        String s = Integer.toString( i, base );
        if ( s.length() > len )
            {
            return s.substring( 0, len );
            }
        else if ( s.length() < len )
            // pad on left with zeros
            {
            return "000000000000000000000000000".substring( 0,
                    len - s.length() )
                   + s;
            }
        else
            {
            return s;
            }
        } // end toLZ
    }