/*
 * @(#)TagCategory.java
 *
 * Summary: categorises HTML tag for the finite state automaton parser.
 *
 * Copyright: (c) 2009 Roedy Green, Canadian Mind Products, http://mindprod.com
 *
 * Licence: This software may be copied and used freely for any purpose but military.
 *          http://mindprod.com/contact/nonmil.html
 *
 * Requires: JDK 1.6+
 *
 * Created with: IntelliJ IDEA IDE.
 *
 * Version History:
 *  2.8 2009-04-04 - no longer correct missing entities. Just issue warning messages.
 */
package com.mindprod.compactor;

/**
 * categorises HTML tag for the finite state automaton parser.
 *
 * @author Roedy Green, Canadian Mind Products
 * @version 2.8 2009-04-04 - no longer correct missing entities. Just issue warning messages.
 * @since 2009
 */
enum TagCategory
    {
        /**
         * <!-- begin comment marker
         */
        COMMENT,

        /**
         * not a tag, usually missing &lt; entity
         */
        INVALID,

        /**
         * tag like </dt></h?> </li></td> that trims spaces to the left of the tag
         */
        LEFT_TRIM,

        /**
         * ordinary tag, no special processing.
         */
        PLAIN,

        /**
         * <pre>
         */
        PRE,

        /**
         * tag like <dt><h?> <li><td> that trims spaces to the right of the tag
         */
        RIGHT_TRIM,

        /**
         * </pre>
         */
        SLASH_PRE;

    // -------------------------- STATIC METHODS --------------------------

    /**
     * decide which category a tag belongs to
     *
     * @param tag e.g. "dt" "/dt" without enclosing <> or trailing whitespace, in lower case, trimmed to 11 chars. also
     *            blockquote> -> blockquote   !>  -> !
     *
     * @return return category of the tag
     */
    static TagCategory categorise( final String tag )
        {
        if ( tag.startsWith( "/" ) )
            {
            final String base = tag.substring( 1 );
            switch ( base.length() )
                {
                case 0:
                    return INVALID;
                case 2:
                    return
                            base.equals( "dt" ) ||
                            base.equals( "li" ) ||
                            base.equals( "ol" ) ||
                            base.equals( "td" ) ||
                            base.equals( "th" ) ||
                            base.equals( "tr" ) ||
                            base.equals( "ul" ) ||
                            base.charAt( 0 ) == 'h' && '1' <= base.charAt( 1 ) && base.charAt( 1 ) <= '9' ?
                            LEFT_TRIM :
                            PLAIN;

                case 3:
                    if ( base.equals( "pre" ) )
                        {
                        return SLASH_PRE;
                        }
                    else if ( base.equals( "div" ) )
                        {
                        return LEFT_TRIM;
                        }
                    else
                        {
                        return PLAIN;
                        }

                case 5:
                    return
                            base.equals( "table" ) ||
                            base.equals( "tbody" ) ||
                            base.equals( "thead" ) ||
                            base.equals( "title" ) ? LEFT_TRIM : PLAIN;
                case 10:
                    return base.equals( "blockquote" ) ? LEFT_TRIM : PLAIN;

                case 1:
                case 4:
                default:
                    return PLAIN;
                }
            }
        else
            {
            switch ( tag.length() )
                {
                case 0:
                    return INVALID;
                case 2:
                    return tag.equals( "dt" ) ||
                           tag.equals( "li" ) ||
                           tag.equals( "ol" ) ||
                           tag.equals( "td" ) ||
                           tag.equals( "th" ) ||
                           tag.equals( "tr" ) ||
                           tag.equals( "ul" ) ||
                           tag.charAt( 0 ) == 'h' && '1' <= tag.charAt( 1 ) && tag.charAt( 1 ) <= '9' ?
                           RIGHT_TRIM :
                           PLAIN;

                case 3:
                    if ( tag.equals( "!--" ) )
                        {
                        return COMMENT;
                        }
                    else if ( tag.equals( "pre" ) )
                        {
                        return PRE;
                        }
                    else if ( tag.equals( "div" ) )
                            {
                            return RIGHT_TRIM;
                            }
                        else
                            {
                            return PLAIN;
                            }

                case 5:
                    return tag.equals( "table" ) ||
                           tag.equals( "tbody" ) ||
                           tag.equals( "thead" ) ||
                           tag.equals( "title" ) ? RIGHT_TRIM : PLAIN;
                case 10:
                    return tag.equals( "blockquote" ) ? RIGHT_TRIM : PLAIN;
                case 1:
                case 4:
                default:
                    return PLAIN;
                }
            }
        }
    }