package com.mindprod.compactor;
/**
* categorises HTML tag for the finite state automaton parser.
*
* @author Roedy Green, Canadian Mind Products
* @version 3.0 2010-02-12 trim space inside <p>..</p>.
* @since 2009
*/
enum TagCategory
{
/**
* <!-- begin comment marker
*/
COMMENT,
/**
* not a tag, usually missing < entity
*/
INVALID,
/**
* tag like </dt></h?> </li></td> that trims spaces to the left of the tag
*/
LEFT_TRIM,
/**
* ordinary tag, no special processing.
*/
PLAIN,
/**
* <pre>
*/
PRE,
/**
* tag like <dt><h?> <li><td> that trims spaces to the right of the tag
*/
RIGHT_TRIM,
/**
* <script ... leave contents as is
*/
SCRIPT,
/**
* </pre>
*/
SLASH_PRE,
/**
* </script>
*/
SLASH_SCRIPT;
/**
* decide which category a tag belongs to
*
* @param tag e.g. "dt" "/dt" without enclosing <> or trailing whitespace, in lower case, trimmed to 11 chars. also
* blockquote> -> blockquote !> -> !
*
* @return return category of the tag
*/
static TagCategory categorise( final String tag )
{
if ( tag.startsWith( "/" ) )
{
final String base = tag.substring( 1 );
switch ( base )
{
case "article":
case "aside":
case "audio":
case "blockquote":
case "canvas":
case "caption":
case "colgroup":
case "datalist":
case "details":
case "dialog":
case "div":
case "dt":
case "embed":
case "figure":
case "figurecaption":
case "footer":
case "h1":
case "h2":
case "h3":
case "h4":
case "h5":
case "h6":
case "h7":
case "h8":
case "h9":
case "header":
case "li":
case "main":
case "menuitem":
case "meter":
case "nav":
case "noscript":
case "ol":
case "output":
case "p":
case "progress":
case "section":
case "summary":
case "table":
case "tbody":
case "td":
case "th":
case "thead":
case "title":
case "tr":
case "ul":
return LEFT_TRIM;
case "pre":
return SLASH_PRE;
case "script":
return SLASH_SCRIPT;
case "":
return INVALID;
default:
return PLAIN;
}
}
else
{
switch ( tag )
{
case "article":
case "aside":
case "audio":
case "blockquote":
case "canvas":
case "caption":
case "colgroup":
case "datalist":
case "details":
case "dialog":
case "div":
case "dt":
case "embed":
case "figure":
case "figurecaption":
case "footer":
case "h1":
case "h2":
case "h3":
case "h4":
case "h5":
case "h6":
case "h7":
case "h8":
case "h9":
case "header":
case "li":
case "main":
case "menuitem":
case "meter":
case "nav":
case "noscript":
case "ol":
case "output":
case "p":
case "progress":
case "section":
case "summary":
case "table":
case "tbody":
case "td":
case "th":
case "thead":
case "title":
case "tr":
case "ul":
return RIGHT_TRIM;
case "pre":
return PRE;
case "script":
return SCRIPT;
case "!--":
return COMMENT;
case "":
return INVALID;
default:
return PLAIN;
}
}
}
}