package com.mindprod.compactor;
import com.mindprod.commandline.CommandLine;
import com.mindprod.common18.EIO;
import com.mindprod.filter.AllButSVNDirectoriesFilter;
import com.mindprod.filter.ExtensionListFilter;
import com.mindprod.hunkio.HunkIO;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.util.regex.Pattern;
import static java.lang.System.*;
/**
* Compacts HTML by removing unnecessary white space.
* <p/>
* We always compact whitespace inside and outside comments.
* <p/>
* We don't consolidate tags. e.g.
* <span class="x">this </span><span class="x">and that</span> can be
* collapsed
* to <span class="x">this and that</span>.
* <p/>
* We don't convert tags to lower case e.g. <BR> to <br>
* <p/>
* We leave all comments in place. If ever such a feature is implemented, it must
* not strip SSI comments. It may or may not leave macro comments.
* <p/>
* We do not remove macro generations. You can do that with StripGenerated.
* <p/>
* We do not remove the macro comments.
* <p/>
* We remove space and NLs on the right of <div><dt><li><h?><ol><table><
* tbody><td><th><thead><tr><ul> tags.
* <p/>
* We remove space and NLs on the lift of </div></dt></li></h?></ol></table><
* /tbody></td></th></thead></tr></ul> tags.
* <p/>
* We always remove lead and trailing spaces from lines.
* <p/>
* We compact spaces in side HTML text, tags and comments.
* <p/>
* We leave spaces as is inside <pre>...</pre> and inside quoted tag parameters.
* <p/>
* We convert " to &quot; > to &gt; when used in raw text.
* <p/>
* We don't tokenize to convert to CBF, compact binary format. The catch here is web
* browsers can't read the result without a plug-in. This would result in a major
* compaction. Perhaps the XML folk will eventually get disgusted with their obese
* format and XHTML can inherit a now compact form.
* <p/>
* We don't do any LZW compression. the catch is, browsers can't read this without a
* special plug-in.
*
* @author Roedy Green, Canadian Mind Products
* @version 3.9 2014-07-26 internal simplication. Now preserves space both before and after comment which may be significant.
* @see com.mindprod.compactor.HTMLState
* @since 2006
*/
public class Compactor
{
/**
* keep macrcos and SSI and PAD comments
*/
static final Pattern MACRO_PATTERN = Pattern.compile( "(?:\\s*(?:macro|generated|/generated|alias|cross|PAD|/PAD))|#" );
/**
* <!--# SSI comment pattern, also keep <!-- PAD Program_Version for Canadian Sales Tax Calculator -->4.4<!-- /PAD -->
*/
static final Pattern JUST_SSI_PATTERN = Pattern.compile( "(?:\\s*(?:PAD|/PAD))|#" );
private static final int FIRST_COPYRIGHT_YEAR = 2006;
/**
* undisplayed copyright notice
*/
@SuppressWarnings( { "UnusedDeclaration" } )
private static final String EMBEDDED_COPYRIGHT =
"Copyright: (c) 2006-2017 Roedy Green, Canadian Mind Products, http://mindprod.com";
/**
* date this version was released.
*/
@SuppressWarnings( { "UnusedDeclaration" } )
private static final String RELEASE_DATE = "2014-07-26";
/**
* how to use the command line
*/
private static final String USAGE = "\nCompactor needs a filename.html or a space-separated list of filenames, " +
"with optional -s -q -v switches.";
/**
* embedded version string.
*/
@SuppressWarnings( { "UnusedDeclaration" } )
private static final String VERSION_STRING = "3.9";
/**
* constructor
*/
public Compactor()
{
}
/**
* compact and tidy one file.
*
* @param quiet true if want progress messages suppressed
* @param fileBeingProcessed the file currently being processed.
*
* @throws IOException Suppress IntelliJ Code Analyse that wants to make this private.
* @noinspection WeakerAccess, SameParameterValue, StringEquality
*/
public static void compactFile( boolean quiet, File fileBeingProcessed ) throws IOException
{
if ( !quiet )
{
out.print( " compacting " + fileBeingProcessed.getName() + " " );
}
switch ( EIO.getExtension( fileBeingProcessed ) )
{
case "html":
case "htm":
case "htmlfrag":
break;
default:
err.println( "Cannot compact: "
+ fileBeingProcessed.getName()
+ "not .html .htm .htmlfrag file" );
return;
}
String big = HunkIO.readEntireFile( fileBeingProcessed );
String result = compactStringKeepingMacrosAndComments( big, fileBeingProcessed.getPath() );
if ( result == big )
{
if ( !quiet )
{
out.println( "-" );
}
return;
}
if ( !quiet )
{
out.println( "*" );
}
final File tempFile = HunkIO.createTempFile( "temp_", ".tmp", fileBeingProcessed );
FileWriter emit = new FileWriter( tempFile );
emit.write( result );
emit.close();
HunkIO.deleteAndRename( tempFile, fileBeingProcessed );
}
/**
* compact a String as needed
*
* @param uncompacted uncompacted string
* @param where where this string came from, used in error messages to help you track down source
* @param how *=compactStringStrippingMacrosAndComments, including <!--cseignore--><!--/cseignore-->
* +=compactStringKeepingMacrosAndComments
* -=does nothing
* Q=Quick If first 400 chars contain a double space, compactStringKeepingMacrosAndComments,
* otherwise do nothing.
*
* @return compacted String
*/
public static String compactStringAsNeeded( final String uncompacted, final String where, final char how )
{
switch ( how )
{
case '*':
return Compactor.compactStringStrippingMacrosAndComments( uncompacted, where );
case '+':
return Compactor.compactStringKeepingMacrosAndComments( uncompacted, where );
case '-':
return uncompacted;
case 'Q':
case 'q':
final String test = ( uncompacted.length() < 400 ) ? uncompacted : uncompacted.substring( 0, 400 );
if ( test.contains( " " + " " ) )
{
return uncompacted;
}
else
{
return Compactor.compactStringKeepingMacrosAndComments( uncompacted, where );
}
default:
assert false : "invalid Compactor.compactStringAsNeeded.how " + how + " It must be one of * + - Q";
return uncompacted;
}
}
/**
* Remove excess whitespace from HTML represented by string.
*
* @param big the String to compact.
* @param where used in error messages to indicate where the error occurred, usually the name of the file being
* compacted.
*
* @return the compacted String, big itself if nothing changed.
*/
public static String compactStringKeepingMacrosAndComments( final String big, final String where )
{
return HTMLState.compactString( big, where, null );
}
/**
* Remove excess whitespace from HTML represented by string.
*
* @param big the String to compact.
* @param where used in error messages to indicate where the error occurred, usually the name of the file being
* compacted.
*
* @return the compacted String, big itself if nothing changed.
*/
public static String compactStringKeepingMacrosStrippingComments( final String big, final String where )
{
return HTMLState.compactString( big, where, MACRO_PATTERN );
}
/**
* Remove excess whitespace from HTML represented by string, strip all macros and comments.
*
* @param big the String to compact.
* @param where used in error messages to indicate where the error occurred, usually the name of the file being
* compacted.
*
* @return the compacted String, big itself if nothing changed.
*/
public static String compactStringStrippingMacrosAndComments( final String big, final String where )
{
return HTMLState.compactString( big, where, JUST_SSI_PATTERN );
}
/**
* compacts HTML files.
*
* @param args names of files to process, dirs, files, -s, *.*, no wildcards.
*/
public static void main( String[] args )
{
out.println( "Gathering html files to compact..." );
CommandLine commandLine = new CommandLine( args,
new AllButSVNDirectoriesFilter(),
new ExtensionListFilter( ExtensionListFilter.COMMON_HTML_EXTENSIONS ) );
final boolean quiet = commandLine.isQuiet();
if ( commandLine.size() == 0 )
{
throw new IllegalArgumentException( "No files found to process\n" + USAGE );
}
final Compactor compactor = new Compactor();
for ( File file : commandLine )
{
try
{
compactFile( quiet, file );
}
catch ( FileNotFoundException e )
{
err.println( "Error: "
+ EIO.getCanOrAbsPath( file )
+ " not found." );
}
catch ( Exception e )
{
err.println();
e.printStackTrace( err );
err.println( " in file "
+ EIO.getCanOrAbsPath( file ) );
err.println();
}
}
}
}