package com.mindprod.compactor;
/**
* categorises HTML tag
*/
enum TagCategory
{
/**
* <!-- begin comment marker
*/
COMMENT,
/**
* not a tag, usually missing < entity
*/
INVALID,
/**
* tag like </dt></h?> </li></td> that trims spaces to the left of the tag
*/
LEFT_TRIM,
/**
* ordinary tag, no special processing.
*/
PLAIN,
/**
* <pre>
*/
PRE,
/**
* tag like <dt><h?> <li><td> that trims spaces to the right of the tag
*/
RIGHT_TRIM,
/**
* </pre>
*/
SLASH_PRE;
/**
* decide which category a tag belongs to
*
* @param tag e.g. "dt" "/dt" without enclosing <> or trailing whitespace, in lower case, trimmed to 11 chars. also
* blockquote> -> blockquote !> -> !
* @return return category of the tag
*/
static TagCategory categorise( final String tag )
{
if ( tag.startsWith( "/" ) )
{
final String base = tag.substring( 1 );
switch ( base.length() )
{
case 0:
return INVALID;
case 2:
return
base.equals( "dt" ) ||
base.equals( "li" ) ||
base.equals( "ol" ) ||
base.equals( "td" ) ||
base.equals( "th" ) ||
base.equals( "tr" ) ||
base.equals( "ul" ) ||
base.charAt( 0 ) == 'h' && '1' <= base.charAt( 1 ) && base.charAt( 1 ) <= '9' ?
LEFT_TRIM :
PLAIN;
case 3:
if ( base.equals( "pre" ) )
{
return SLASH_PRE;
}
else if ( base.equals( "div" ) )
{
return LEFT_TRIM;
}
else
{
return PLAIN;
}
case 5:
return
base.equals( "table" ) ||
base.equals( "tbody" ) ||
base.equals( "thead" ) ||
base.equals( "title" ) ? LEFT_TRIM : PLAIN;
case 10:
return base.equals( "blockquote" ) ? LEFT_TRIM : PLAIN;
case 1:
case 4:
default:
return PLAIN;
}
}
else
{
switch ( tag.length() )
{
case 0:
return INVALID;
case 2:
return tag.equals( "dt" ) ||
tag.equals( "li" ) ||
tag.equals( "ol" ) ||
tag.equals( "td" ) ||
tag.equals( "th" ) ||
tag.equals( "tr" ) ||
tag.equals( "ul" ) ||
tag.charAt( 0 ) == 'h' && '1' <= tag.charAt( 1 ) && tag.charAt( 1 ) <= '9' ?
RIGHT_TRIM :
PLAIN;
case 3:
if ( tag.equals( "!--" ) )
{
return COMMENT;
}
else if ( tag.equals( "pre" ) )
{
return PRE;
}
else if ( tag.equals( "div" ) )
{
return RIGHT_TRIM;
}
else
{
return PLAIN;
}
case 5:
return tag.equals( "table" ) ||
tag.equals( "tbody" ) ||
tag.equals( "thead" ) ||
tag.equals( "title" ) ? RIGHT_TRIM : PLAIN;
case 10:
return tag.equals( "blockquote" ) ? RIGHT_TRIM : PLAIN;
case 1:
case 4:
default:
return PLAIN;
}
}
}
}