/*
Compacts HTML

copyright (c) 1999-2008 Roedy Green, Canadian Mind Products
may be copied and used freely for any purpose but military.
Roedy Green
Canadian Mind Products
#101 - 2536 Wark Street
Victoria, BC Canada
V8T 4G8
tel: (250) 361-9093
roedy g at mindprod dotcom
http://mindprod.com

We always compact whitespace inside and outside comments.

We don't consolidate tags. e.g.
 <span class="x">this </span><span class="x">and that</span> can be collapsed
 to <span class="x">this and that</span>.

We don't convert tags to lower case e.g. <BR> to <br>

We leave all comments in place. If ever such a feature is implemented, it must
not strip SSI comments. It may or may not leave macro comments.

We do not remove macro generations. You can do that with StripGenerated.

We do not remove the macro comments.

We remove space and NLs on the right of <div><dt><li><h?><ol><table><tbody><td><th><thead><tr><ul>  tags.

We remove space and NLs on the lift of </div></dt></li></h?></ol></table></tbody></td></th></thead></tr></ul> tags.

We always remove lead and trailing spaces from lines.

We compact spaces in side HTML text, tags and comments.

We leave spaces as is inside <pre>...</pre> and inside quoted tag parameters.

We convert " to &quot; > to &gt; when used in raw text.

We don't tokenise to convert to CBF, compact binary format. The catch here is web
browsers can't read the result without a plugin. This would result in a major
compaction. Perhaps the XML folk will eventually get disgusted with their obese
format and XHTML can inherit a now compact form.

We don't do any LZW compression. the catch is, browsers can't read this without a
special plugin.

Version history
2.2 2006-03-15 Suppress IntelliJ Code Analyse that wants to make this default scope.
2.3 2008-02-15 complete rewrite, mainly to handle removing space around <dt> <li> <h?>  and <td> tags.
2.4 2008-02-15 add more tags that get trimmed. Charge $10
2.5 2008-02-28 tighter removal of whitespace surrounding comments.
2.6 2008-02-28 optionally allow comments to be stripped out
    entirely. Preserve some space around configurable magic
    macro comments that expand into text such as <!# SSI or
    <!-- macro.
2.7 2008-07-27 remove all space just before > in a tag.  < space will convert to &lt; space.

*/
package com.mindprod.compactor;

import com.mindprod.filter.AllDirectoriesFilter;
import com.mindprod.filter.CommandLine;
import com.mindprod.filter.ExtensionListFilter;
import com.mindprod.hunkio.HunkIO;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import static java.lang.System.*;
import java.util.regex.Pattern;

/**
 * Compacts HTML my removing excess white space.
 *
 * @author Roedy Green
 * @version 2.7 2008-07-27 remove all space just before > in a tag.  < space will convert to &lt; space.
 */
public class Compactor
    {
// ------------------------------ FIELDS ------------------------------

    /**
     * undisplayed copyright notice
     */
    @SuppressWarnings( { "UnusedDeclaration" } )
    public static final String EMBEDDED_COPYRIGHT =
            "copyright (c) 1999-2008 Roedy Green, Canadian Mind Products, http://mindprod.com";

    /**
     * date this version was released.
     */
    @SuppressWarnings( { "UnusedDeclaration" } )
    private static final String RELEASE_DATE = "2008-07-27";

    /**
     * embedded version string.
     */
    @SuppressWarnings( { "UnusedDeclaration" } )
    public static final String VERSION_STRING = "2.7";
// -------------------------- PUBLIC STATIC METHODS --------------------------

    /**
     * compact and tidy one file.
     *
     * @param fileBeingProcessed File to compact and tidy.
     * @param quiet              true if want progress messages suppressed
     * @throws IOException Suppress IntelliJ Code Analyse that wants to make this private.
     * @noinspection WeakerAccess,SameParameterValue,StringEquality
     */
    public static void compactFile( File fileBeingProcessed,
                                    boolean quiet ) throws IOException
        {
        if ( !quiet )
            {
            out.print( "  compacting " + fileBeingProcessed.getName() + " " );
            }
        if ( !( fileBeingProcessed.getName().endsWith( ".html" )
                || fileBeingProcessed
                .getName().endsWith( ".htm" ) ) )
            {
            err.println( "Cannot compact: "
                         + fileBeingProcessed.getName()
                         + "not .html file" );
            return;
            }
        String big = HunkIO.readEntireFile( fileBeingProcessed );

        String result = compactString( big, fileBeingProcessed.getPath() );
        // use == not equals() because compare already done in compactString.
        if ( result == big )
            {
            // nothing changed. No need to write results.

            if ( !quiet )
                {
                out.println( "-" );
                }
            return;
            }
        // generate output into a temporary file until we are sure all is ok.
        // create a temp file in the same directory as filename
        if ( !quiet )
            {
            out.println( "*" );
            }
        File tempfile =
                HunkIO.createTempFile( "temp", ".tmp", fileBeingProcessed );
        FileWriter emit = new FileWriter( tempfile );
        emit.write( result );
        emit.close();
        // successfully created output in same directory as input,
        // Now make it replace the input file.
        fileBeingProcessed.delete();
        tempfile.renameTo( fileBeingProcessed );
        // don't delete tempfile, it has been renamed to a real file
        }// end compactFile

    /**
     * Remove excess whitespace from HTML represented by string.
     *
     * @param big   the String to compact.
     * @param where used in error messages to indicate where the error occurred, usually the name of the file being
     *              compacted.
     * @return the compacted String, big itself if nothing changed.
     */
    public static String compactString( final String big, final String where )
        {
        return HTMLState.compactString( big, where, true/* keep comments */,
                Pattern.compile( "\\s*macro\\s"/* <!-- macro html static macros */ ),
                Pattern.compile( "#"/* <!--# SSI */ ),
                Pattern.compile( "\\s*generated\\s" ),
                Pattern.compile( "\\s*/generated\\s" )
        );
        }

// --------------------------- main() method ---------------------------

    /**
     * compacts HTML files.
     *
     * @param args names of files to process, dirs, files, -s, *.*, no wildcards.
     */
    public static void main( String[] args )
        {
        // gather all the files mentioned on the command line.
        // either directories, files, *.*, with -s and subdirs option.

        out.println( "Gathering files to process..." );
        CommandLine wantedFiles = new CommandLine( args,
                1000,
                /* estimate of expected files */
                new AllDirectoriesFilter(),
                new ExtensionListFilter(
                        "html" ) );

        for ( File file : wantedFiles )
            {
            try
                {
                compactFile( file, false/* not quiet */ );
                }
            catch ( FileNotFoundException e )
                {
                err.println( "Error: "
                             + file.getAbsolutePath()
                             + " not found." );
                }
            catch ( Exception e )
                {
                err.println( e.getMessage()
                             + " in file "
                             + file.getAbsolutePath() );
                err.println();
                e.printStackTrace();
                }
            }// end for
        }// end main
    }// end Compactor