package com.mindprod.http;
import com.mindprod.common18.Build;
import com.mindprod.common18.ST;
import com.mindprod.fastcat.FastCat;
import org.jetbrains.annotations.Nullable;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import static java.lang.System.*;
/**
* Base class to Post, Get, Head, Probe and Chase to send/receive HTTP messages.
* <p/>
* Originally based on work by Jonathan Revusky
*
* @author Roedy Green, Canadian Mind Products
* @version 3.2 2014-07-15 add isGood method to categorise the responseCode
* @since 1998-01-01
*/
abstract class Http
{
/**
* encoding for IBM437
*/
public static final Charset IBM437 = Charset.forName( "IBM437" );
/**
* encoding for IBM850
*/
public static final Charset IBM850 = Charset.forName( "IBM850" );
/**
* encoding for iso-8859-1
*/
public static final Charset ISO88591 = Charset.forName( "ISO-8859-1" );
/**
* encoding for UTF-16
*/
public static final Charset UTF16 = Charset.forName( "UTF-16" );
/**
* encoding for UTF-8
*/
public static final Charset UTF8 = Charset.forName( "UTF8" );
/**
* encoding for code page 1252
*/
public static final Charset WINDOWS1252 = Charset.forName( "windows-1252" );
/**
* true if want extra debugging output. If you change this to true, make sure you set it back to false before
* distributing http or any package that uses it.
*/
static final boolean DEBUGGING = false;
/**
* message length to presume when no length given
*/
static final int DEFAULT_LENGTH = 32 * 1024;
private static final int FIRST_COPYRIGHT_YEAR = 1998;
private static final int HIGHEST_LEGIT_RESPONSE_CODE = 522;
private static final int LOWEST_LEGIT_RESPONSE_CODE = 100;
/**
* undisplayed copyright notice
*/
private static final String EMBEDDED_COPYRIGHT =
"Copyright: (c) 1998-2017 Roedy Green, Canadian Mind Products, http://mindprod.com";
/**
* when package released.
*
* @noinspection UnusedDeclaration
*/
private static final String RELEASE_DATE = "2014-07-15";
/**
* embedded version string.
*/
@SuppressWarnings( { "UnusedDeclaration" } )
private static final String VERSION_STRING = "3.2";
/**
* used to convert responseCode to responseMessage.
*/
private static final String[] responseCodeLookup;
static
{
System.setProperty( "java.net.preferIPv4Stack", "true" );
System.setProperty( "jsse.enableSNIExtension", "false" );
System.setProperty( "jdk.tls.ephemeralDHKeySize", "2048" );
}
static
{
/**
* There is a more elaborate csv list in Brokenlinks
* get wording by indexing into array by responseCode offset.
* We don't use a resource. That would complicate things for building
* jars that use the http package.
* Last updated 2014-04-18
*/
responseCodeLookup = new String[ HIGHEST_LEGIT_RESPONSE_CODE - LOWEST_LEGIT_RESPONSE_CODE + 1 ];
means( 100, "OK to continue with request." );
means( 101, "Server has switched protocols in upgrade header." );
means( 200, "ok" );
means( 201, "Object created, reason = new URI." );
means( 202, "accepted: Async completion (TBS)" );
means( 203, "non-authoritative information" );
means( 204, "no content" );
means( 205, "reset content" );
means( 206, "partial content" );
means( 207, "multistatus" );
means( 208, "already reported" );
means( 226, "IM (Instance maniplation) used." );
means( 300, "Server could not determine what to return." );
means( 301, "permanent redirect" );
means( 302, "temporary redirect" );
means( 303, "temporary redirect to new access method" );
means( 304, "not recently modified" );
means( 305, "Redirection to proxy, location header specifies proxy to use." );
means( 307, "temporary redirect" );
means( 308, "permanent redirect" );
means( 400, "Invalid syntax" );
means( 401, "Access denied, authorisation required." );
means( 402, "payment required" );
means( 403, "request forbidden" );
means( 404, "page not found" );
means( 405, "method not allowed" );
means( 406, "No response acceptable to client found." );
means( 407, "proxy authentication required" );
means( 408, "Server timed out waiting for request" );
means( 409, "User should resubmit with more information" );
means( 410, "Resource withdrawn without redirect" );
means( 411, "length required" );
means( 412, "precondition failed" );
means( 413, "The server is refusing to process a request because the request entity is larger than the server is willing or able to process." );
means( 414, "request-URI too long" );
means( 415, "unsupported media type" );
means( 416, "requested range not satisfiable" );
means( 421, "Protocol Extension Unknown" );
means( 422, "Protocol Extension Refused" );
means( 429, "too many requests from a client in a short time" );
means( 449, "Retry after doing the appropriate action." );
means( 500, "Internal server error" );
means( 501, "not implemented" );
means( 502, "Error response received from gateway" );
means( 503, "temporarily overloaded" );
means( 504, "gateway timeout" );
means( 505, "HTTP version not supported" );
means( 506, "server configuration error" );
means( 507, "not enough RAM" );
means( 508, "loop detected" );
means( 509, "bandwidth exceeded" );
means( 510, "mandatory extension policy rejected" );
means( 520, "Protocol Extension Error" );
means( 521, "Protocol Extension Not Implemented" );
means( 522, "Protocol Extension Parameters Not Acceptable" );
}
/**
* string composed from interrupts
*/
@Nullable
protected String interruptResponseMessage;
/**
* string back from website
*/
@Nullable
protected String rawResponseMessage;
/**
* responseCode from most recent post
*/
int responseCode;
/**
* URL, including encoded get Parameters.
*/
URL url;
/**
* true=auto follow redirects, false=treat redirect as error, just read first leg redirect message..
*/
private boolean followRedirects = true;
/**
* Allow 50 seconds to connect, measured in millis as the default.
*/
private int connectTimeout = Build.CONNECT_TIMEOUT;
/**
* Allow 40 seconds for a read to go without progress, measured in millis as the default.
*/
private int readTimeout = Build.READ_TIMEOUT;
/**
* Accept-Charset for header, has getter/setter
*/
private String acceptCharset = Build.ACCEPT_CHARSET;
/**
* Accept-Encoding for header, when debugging avoid gzip to make Wireshark sniffing easier. We don't handle deflate.
* Deflate is one of the PKZip compressors.
* Currently no SetAcceptEncoding method to override this default.
* Firefox uses gzip, deflate
* in = new InflaterInputStream(conn.getInputStream()), new Inflater(true)); may let us handle deflate
*/
private String acceptEncoding = DEBUGGING ? "identity" : "gzip,x-gzip,identity";
/**
* Accept property for header : application/xhtml+xml,application/xml
* Can be be overrridden
* <p>
* default "application/octet-stream," +
* "application/x-java-jnlp-file," +
* "application/x-java-serialized-object," +
* "application/xhtml+xml," +
* "application/xml," +
* "application/zip," +
* "image/gif," +
* "image/jpeg," +
* "image/png," +
* "text/css," +
* "text/html," +
* "text/plain," +
* "text/x-java-source," +
* "text/xml," +
* "* ; q = . 2 , * / * ; q = . 2 ";
* firefox: text/html,application/xhtml+xml,application/xml;q=0.9, * / * ; q = 0 . 8
* see https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html for an explanation of the q
*/
private String acceptProperty = Build.ACCEPT_MIMES;
/**
* parameters we send with the command. c.f. PostParms sent in message body with a post
*/
private String[] parms;
/**
* the page containing the URL we pretend to be.
* By default null, for none.
*/
private String referer = null;
/**
* additional request properties for the connection, pairs key, value
*/
private String[] requestProperties = new String[ 0 ];
/**
* the browser we pretend to be, by default Firefox can be overridden with setter.
* e.g. Mozilla/5.0 (Windows NT 10.0; WOW64; rv:47.0) Gecko/20100101 Firefox/47.00
*
* @see <a href="http://mindprod.com/jgloss/http.html">details on User-Agent</a>
*/
private String userAgent = Build.USER_AGENT;
/**
* no public instantiation. Just a base class.
*/
Http()
{
}
/**
* display the contents of the header fields key: value, value, value
*
* @param title Title to decorated the dump.
* @param urlc HTTP connection
*/
protected static void dumpHeaders( final String title, final HttpURLConnection urlc )
{
err.println( title );
Map<String, List<String>> pairs = urlc.getHeaderFields();
for ( Map.Entry<String, List<String>> entry : pairs.entrySet() )
{
String key = entry.getKey();
List<String> values = entry.getValue();
out.print( key + ":" );
for ( String value : values )
{
out.print( " [" + value + "]" );
}
err.println();
}
}
/**
* encode a set of parms for the command, separated with ? = & = * This method does not automatically include
* the result in the message sent to the host.
*
* @param encoding for URLEncoder
* @param parms 0..n strings to be send as parameter, alternating keyword/value
*
* @return all the parms in one string encoded with lead ?
* @throws java.io.UnsupportedEncodingException if bad encoding
*/
private static String encodeParms( Charset encoding, String... parms ) throws UnsupportedEncodingException
{
if ( parms == null || parms.length == 0 )
{
return "";
}
assert ( parms.length & 1 ) == 0 : "must have an even number of parms, keyword=value";
int estLength = 10;
for ( String p : parms )
{
estLength += p.length() + 1;
}
final StringBuilder sb = new StringBuilder( estLength );
for ( int i = 0; i < parms.length - 1; i += 2 )
{
sb.append( i == 0 ? "?" : "&" );
sb.append( URLEncoder.encode( parms[ i ], encoding.name()
) );
sb.append( '=' );
sb.append( URLEncoder.encode( parms[ i + 1 ], encoding.name()
) );
}
return sb.toString();
}
/**
* Guess what charSet encoding the response will be in.
*
* @param contentType contents of content type field
* @param defaultEncoding charSet to use if empty content type field, e.g. "UTF-8"
* @param url url of server we just probed, who is responsible for the contentType
*
* @return charsetEncoding to use e.g. "UTF-8"
*/
static Charset guessCharset( final String contentType, final Charset defaultEncoding, final URL url )
{
if ( contentType == null )
{
return defaultEncoding;
}
else
{
int place = contentType.lastIndexOf( "charset=" );
if ( place >= 0 )
{
String charset = null;
try
{
charset = contentType.substring( place + "charset=".length() ).trim().toUpperCase();
charset = ST.trimLeading( ST.trimTrailing( charset, '\"' ), '\"' );
charset = ST.trimTrailing( charset, ';' );
if ( charset.equalsIgnoreCase( "CP-1251" ) )
{
charset = "windows-1251";
}
return Charset.forName( charset );
}
catch ( IllegalArgumentException e )
{
err.println( "Warning: " + url.toString() + " unrecognised charset " + charset + " using " +
defaultEncoding + " instead." );
return defaultEncoding;
}
}
else
{
return defaultEncoding;
}
}
}
/**
* used to build lookup responseCode to responseMessage table
*
* @param responseCode response code
* @param meaning corresponding meaning of the response code, the responseMessage
*/
private static void means( int responseCode, String meaning )
{
responseCodeLookup[ responseCode - LOWEST_LEGIT_RESPONSE_CODE ] = meaning;
}
/**
* convert responseCode to a standard responseMessage
*
* @param responseCode code e.g. 200 for OK
* @param rawResponseMessage raw response message from server
*
* @return String describing the response message.
*/
private static String responseCodeToResponseMessage( int responseCode, String rawResponseMessage )
{
if ( LOWEST_LEGIT_RESPONSE_CODE <= responseCode && responseCode <= HIGHEST_LEGIT_RESPONSE_CODE )
{
final String responseMessage = responseCodeLookup[ responseCode - LOWEST_LEGIT_RESPONSE_CODE ];
if ( responseMessage != null )
{
return responseMessage;
}
}
if ( responseCode == -1 )
{
return "no connect";
}
else
{
return "unknown";
}
}
/**
* process the response from the request we sent the server
*
* @param defaultCharSet Encoding to use to interpret the result.
* @param urlc the HttpURLConnection, all ready to go but for the connect.
*
* @return content of the response, decompressed, decoded.
* @throws java.io.IOException if trouble reading the stream.
*/
String connectAndProcessResponse( Charset defaultCharSet, HttpURLConnection urlc )
throws IOException
{
urlc.connect();
responseCode = urlc.getResponseCode();
rawResponseMessage = urlc.getResponseMessage();
int estimatedLength = urlc.getContentLength();
if ( estimatedLength <= 0 )
{
estimatedLength = DEFAULT_LENGTH;
}
final InputStream is = urlc.getInputStream();
final String contentType = urlc.getContentType();
final Charset charset = guessCharset( contentType, defaultCharSet, url );
final boolean gzipped = "gzip".equals( urlc.getContentEncoding() )
|| "x-gzip".equals( urlc.getContentEncoding() );
String result = Read.readStringBlocking( is,
estimatedLength,
gzipped,
charset );
if ( DEBUGGING )
{
err.println( "--------------------------------" );
err.println( "ResponseCode : " + responseCode );
err.println( "ResponseMessage : " + getResponseMessage() );
err.println( "ContentType : " + contentType );
err.println( "Charset : " + charset );
err.println( "ContentEncoding : " + urlc.getContentEncoding() );
err.println( "Result : " + ( result == null ? "null" : result.substring( 0,
Math.min( result.length(), 300 ) ) ) );
}
is.close();
urlc.disconnect();
return result;
}
/**
* get the parms for the command encoded, separated with ? = & = * @param encoding for URLEncoder
*
* @return all the parms in one string encoded with lead ?
* @throws java.io.UnsupportedEncodingException if bad encoding
*/
String getEncodedParms( Charset encoding ) throws UnsupportedEncodingException
{
return encodeParms( encoding, this.parms );
}
/**
* first thing before accessing web
*/
protected void init()
{
responseCode = -1;
rawResponseMessage = null;
interruptResponseMessage = null;
}
/**
* set up the standard properties on the connection
*
* @param urlc Connection we are setting up.
*/
protected void setStandardProperties( URLConnection urlc )
{
urlc.setConnectTimeout( connectTimeout );
urlc.setReadTimeout( readTimeout );
if ( userAgent != null )
{
urlc.setRequestProperty( "User-Agent", userAgent );
}
if ( urlc instanceof HttpURLConnection )
{
( ( HttpURLConnection ) urlc ).setInstanceFollowRedirects( followRedirects );
}
if ( referer != null )
{
urlc.setRequestProperty( "Referer", referer );
}
for ( int i = 0; i < requestProperties.length; i += 2 )
{
urlc.setRequestProperty( requestProperties[ i ], requestProperties[ i + 1 ] );
}
urlc.setRequestProperty( "Accept", acceptProperty );
urlc.setRequestProperty( "Accept-Charset", acceptCharset );
urlc.setRequestProperty( "Accept-Encoding", acceptEncoding );
final Locale locale = Locale.getDefault();
urlc.setRequestProperty( "Accept-Language", locale.toString().replace( '_', '-' ) + ";q=1," +
locale.getLanguage() + ";q=.8" );
}
/**
* prepare to talk to sites that do not support Server Name Identification
* This is a static method. If you use it is a multithread situation, you will get conflicts.
* You must do all your false work in a batch, then all your true work in a batch.
*/
public static void disableSNI()
{
System.setProperty( "jsse.enableSNIExtension", "false" );
}
/**
* prepare to talk to sites that support Server Name Identification
* This is a static method. If you use it is a multithread situation, you will get conflicts.
* You must do all your false work in a batch, then all your true work in a batch.
*/
public static void enableSNI()
{
System.setProperty( "jsse.enableSNIExtension", "true" );
}
/**
* get current Accept-Charset
*
* @return e.g. "utf-8,iso-8859-1,utf-16;q=0.7,*;q=0.3";
*/
public String getAcceptCharset()
{
return acceptCharset;
}
/**
* change default Accept-Charset
*
* @param acceptCharset e.hg. "iso-8859-1,utf-8,utf-16;q=0.7,*;q=0.3
*/
public void setAcceptCharset( final String acceptCharset )
{
this.acceptCharset = acceptCharset;
}
/**
* get current Accept-Encoding
*
* @return e.g. "gzip,x-gzip,identity"
*/
public String getAcceptEncoding()
{
return acceptEncoding;
}
/**
* change the default encoding Accept-Encoding
*
* @param acceptEncoding e.g. ""gzip,x-gzip,identity""
*/
public void setAcceptEncoding( final String acceptEncoding )
{
this.acceptEncoding = acceptEncoding;
}
/**
* get current AcceptProperty
*
* @return e.g. "gzip,x-gzip,identity"
*/
public String getAcceptProperty()
{
return acceptProperty;
}
/**
* change the default Accept-Property MIME types
*
* @param acceptProperty e.g. "gzip,x-gzip,identity".
*/
public void setAcceptProperty( final String acceptProperty )
{
this.acceptProperty = acceptProperty;
}
/**
* get current connect time out in ms
*
* @return connect timeout is ms.
*/
public int getConnectTimeout()
{
return connectTimeout;
}
/**
* override the default connect timeout of 50 seconds. It only applies to this request.
* Sometimes the connection will ignore the timeout. Oracle thinks it is a feature.
*
* @param connectTimeout timeout to connect in ms. Note int, not long.
*/
public void setConnectTimeout( int connectTimeout )
{
this.connectTimeout = connectTimeout;
}
/**
* Response message from Java Exception
*
* @return responseMessage
* @see #getRawResponseMessage
*/
public String getInterruptResponseMessage()
{
return ST.canonical( interruptResponseMessage );
}
/**
* responseCode from most recent post/get exactly as received from the server
*
* @return responseCode
* @see #getResponseMessage
*/
public String getRawResponseMessage()
{
if ( rawResponseMessage == null )
{
return "";
}
else
{
return rawResponseMessage;
}
}
/**
* get current read time out in ms.
*
* @return read timeout is ms.
*/
public int getReadTimeout()
{
return readTimeout;
}
/**
* override the default read timeout of 40 seconds. It only applies to this request.
* Sometimes the connection will ignore the timeout. Oracle thinks it is a feature.
*
* @param readTimeout timeout to connect in ms. Note int, not long.
*/
public void setReadTimeout( int readTimeout )
{
this.readTimeout = readTimeout;
}
/**
* ges the Referrer ie. the name of a web page this request ostensibly came from.
*
* @return referrer e.g "http://mindprod.com/index.html", null for none.
* @see <a href="http://mindprod.com/jgloss/http.html">details on Referrer</a>
*/
public String getReferer()
{
return referer;
}
/**
* set the Referrer ie. the name of a web page this request ostensibly came from.
* Note that the word Referrer is spelled incorrectly as Referer the HTTP spec.
*
* @param referer e.g "http://mindprod.com/index.html", null for none.
*
* @see <a href="http://mindprod.com/jgloss/http.html">details on Referrer</a>
*/
public void setReferer( String referer )
{
this.referer = referer;
}
/**
* responseCode from most recent post/get
* Meaning of various codes are described at HttpURLConnection and at http://mindprod.com/jgloss/http.html
*
* @return responseCode
* @see java.net.HttpURLConnection
*/
public int getResponseCode()
{
return responseCode;
}
/**
* responseCode from most recent post/get tidied to standard form
*
* @return responseCode
* @see #getRawResponseMessage
*/
public String getResponseMessage()
{
final FastCat sb = new FastCat( 2 );
if ( !ST.isEmpty( rawResponseMessage ) )
{
sb.append( rawResponseMessage );
}
final String translatedResponseMessage = responseCodeToResponseMessage( responseCode, rawResponseMessage );
if ( !ST.isEmpty( translatedResponseMessage ) && !translatedResponseMessage.equalsIgnoreCase( rawResponseMessage ) )
{
sb.append( translatedResponseMessage );
}
return sb.toSeparatedList( " : " );
}
/**
* Get URL for this connection.
*
* @return URL, including encoded GET Parameters, but not POST parameters.
*/
public URL getURL()
{
return url;
}
/**
* get current User-Agent
*
* @return e.g. "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 Firefox/8.0"
*/
public String getUserAgent()
{
return userAgent;
}
/**
* override the default User-Agent
*
* @param userAgent User-Agent a browser uses in an HTTP header to identify itself.
* null for no User Agent. By default you get Firefox.
*
* @see <a href="http://mindprod.com/jgloss/http.html">details on User-Agent</a>
*/
public void setUserAgent( String userAgent )
{
this.userAgent = userAgent;
}
/**
* do we follow redirects on just show first leg
*
* @return true if will follow redirects to the end
*/
public boolean isFollowRedirects()
{
return followRedirects;
}
/**
* does the recent responseCode represent a good status?
*
* @return true if good status
*/
public boolean isGood()
{
switch ( responseCode )
{
case 200:
case 201:
case 202:
case 301:
case 302:
case 303:
case 304:
case 307:
case 308:
return true;
case 400:
case 401:
case 402:
case 403:
case 404:
default:
return false;
}
}
/**
* control whether redirects are automatically followed or treated as errors.
*
* @param followRedirects true=auto follow, false=treat as error. default is true.
*
* @see java.net.HttpURLConnection#setInstanceFollowRedirects(boolean)
*/
public void setInstanceFollowRedirects( boolean followRedirects )
{
this.followRedirects = followRedirects;
}
/**
* set the parms that will be send tacked onto the end of the URL, get-style
*
* @param parms 0..n strings to be send as parameter, alternating keyword/value
*
* @see Post#setPostParms(String...)
*/
public void setParms( final String... parms )
{
assert ( parms.length & 1 ) == 0 : "must have an even number of parms, keyword=value";
this.parms = parms;
}
/**
* set additional requestProperties. Replaced previous set.
*
* @param requestProperties pairs: key value.
*/
public void setRequestProperties( String... requestProperties )
{
if ( ( requestProperties.length & 1 ) != 0 )
{
throw new IllegalArgumentException( "setRequestProperties needs an even number of parameters: key,value" );
}
this.requestProperties = requestProperties;
}
}