package com.mindprod.compactor;
import com.mindprod.commandline.CommandLine;
import com.mindprod.filter.AllDirectoriesFilter;
import com.mindprod.filter.ExtensionListFilter;
import com.mindprod.hunkio.HunkIO;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.util.regex.Pattern;
import static java.lang.System.err;
import static java.lang.System.out;
/**
* Compacts HTML by removing unnecessary white space.
* <p/>
* We always compact whitespace inside and outside comments.
* <p/>
* We don't consolidate tags. e.g.
* <span class="x">this </span><span class="x">and that</span> can be collapsed
* to <span class="x">this and that</span>.
* <p/>
* We don't convert tags to lower case e.g. <BR> to <br>
* <p/>
* We leave all comments in place. If ever such a feature is implemented, it must
* not strip SSI comments. It may or may not leave macro comments.
* <p/>
* We do not remove macro generations. You can do that with StripGenerated.
* <p/>
* We do not remove the macro comments.
* <p/>
* We remove space and NLs on the right of <div><dt><li><h?><ol><table><tbody><td><th><thead><tr><ul> tags.
* <p/>
* We remove space and NLs on the lift of </div></dt></li></h?></ol></table></tbody></td></th></thead></tr></ul> tags.
* <p/>
* We always remove lead and trailing spaces from lines.
* <p/>
* We compact spaces in side HTML text, tags and comments.
* <p/>
* We leave spaces as is inside <pre>...</pre> and inside quoted tag parameters.
* <p/>
* We convert " to &quot; > to &gt; when used in raw text.
* <p/>
* We don't tokenise to convert to CBF, compact binary format. The catch here is web
* browsers can't read the result without a plugin. This would result in a major
* compaction. Perhaps the XML folk will eventually get disgusted with their obese
* format and XHTML can inherit a now compact form.
* <p/>
* We don't do any LZW compression. the catch is, browsers can't read this without a
* special plugin.
*
* @author Roedy Green, Canadian Mind Products
* @version 2.8 2009-04-04 - no longer correct missing entities. Just issue warning messages.
* @since 2006
*/
public class Compactor
{
/**
* undisplayed copyright notice
*/
@SuppressWarnings( { "UnusedDeclaration" } )
public static final String EMBEDDED_COPYRIGHT =
"copyright (c) 1999-2009 Roedy Green, Canadian Mind Products, http://mindprod.com";
/**
* date this version was released.
*/
@SuppressWarnings( { "UnusedDeclaration" } )
private static final String RELEASE_DATE = "2009-04-04";
/**
* embedded version string.
*/
@SuppressWarnings( { "UnusedDeclaration" } )
public static final String VERSION_STRING = "2.8";
/**
* compact and tidy one file.
*
* @param quiet true if want progress messages suppressed
* @param fileBeingProcessed File to compact and tidy.
*
* @throws IOException Suppress IntelliJ Code Analyse that wants to make this private.
* @noinspection WeakerAccess,SameParameterValue,StringEquality
*/
public static void compactFile( boolean quiet, File fileBeingProcessed
) throws IOException
{
if ( !quiet )
{
out.print( " compacting " + fileBeingProcessed.getName() + " " );
}
if ( !( fileBeingProcessed.getName().endsWith( ".html" )
|| fileBeingProcessed
.getName().endsWith( ".htm" ) ) )
{
err.println( "Cannot compact: "
+ fileBeingProcessed.getName()
+ "not .html file" );
return;
}
String big = HunkIO.readEntireFile( fileBeingProcessed );
String result = compactString( big, fileBeingProcessed.getPath() );
if ( result == big )
{
if ( !quiet )
{
out.println( "-" );
}
return;
}
if ( !quiet )
{
out.println( "*" );
}
File tempfile =
HunkIO.createTempFile( "temp", ".tmp", fileBeingProcessed );
FileWriter emit = new FileWriter( tempfile );
emit.write( result );
emit.close();
fileBeingProcessed.delete();
tempfile.renameTo( fileBeingProcessed );
}
/**
* Remove excess whitespace from HTML represented by string.
*
* @param big the String to compact.
* @param where used in error messages to indicate where the error occurred, usually the name of the file being
* compacted.
*
* @return the compacted String, big itself if nothing changed.
*/
public static String compactString( final String big, final String where )
{
return HTMLState.compactString( big, where, true,
Pattern.compile( "\\s*macro\\s"),
Pattern.compile( "#"),
Pattern.compile( "\\s*generated\\s" ),
Pattern.compile( "\\s*/generated\\s" ) );
}
/**
* compacts HTML files.
*
* @param args names of files to process, dirs, files, -s, *.*, no wildcards.
*/
public static void main( String[] args )
{
out.println( "Gathering files to process..." );
CommandLine wantedFiles = new CommandLine( args,
new AllDirectoriesFilter(),
new ExtensionListFilter( "html" ) );
for ( File file : wantedFiles )
{
try
{
compactFile( false, file );
}
catch ( FileNotFoundException e )
{
err.println( "Error: "
+ file.getAbsolutePath()
+ " not found." );
}
catch ( Exception e )
{
err.println( e.getMessage()
+ " in file "
+ file.getAbsolutePath() );
err.println();
e.printStackTrace();
}
}
}
}