package com.mindprod.http;
import com.mindprod.common18.Build;
import com.mindprod.common18.ST;
import com.mindprod.fastcat.FastCat;
import org.jetbrains.annotations.Nullable;
import java.nio.charset.Charset;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import static java.lang.System.*;
* Base class to Post, Get, Head, Probe and Chase to send/receive HTTP messages.
* <p/>
* Originally based on work by Jonathan Revusky
* @author Roedy Green, Canadian Mind Products
* @version 3.2 2014-07-15 add isGood method to categorise the responseCode
* @since 1998-01-01
abstract class Http
* encoding for IBM437
public static final Charset IBM437 = Charset.forName( "IBM437" );
* encoding for IBM850
public static final Charset IBM850 = Charset.forName( "IBM850" );
* encoding for iso-8859-1
public static final Charset ISO88591 = Charset.forName( "ISO-8859-1" );
* encoding for UTF-16
public static final Charset UTF16 = Charset.forName( "UTF-16" );
* encoding for UTF-8
public static final Charset UTF8 = Charset.forName( "UTF8" );
* encoding for code page 1252
public static final Charset WINDOWS1252 = Charset.forName( "windows-1252" );
* true if want extra debugging output. If you change this to true, make sure you set it back to false before
* distributing http or any package that uses it.
static final boolean DEBUGGING = false;
* message length to presume when no length given
static final int DEFAULT_LENGTH = 32 * 1024;
private static final int HIGHEST_LEGIT_RESPONSE_CODE = 522;
private static final int LOWEST_LEGIT_RESPONSE_CODE = 100;
* used to convert responseCode to responseMessage.
private static final String[] responseCodeLookup;
System.setProperty( "", "true" );
System.setProperty( "jsse.enableSNIExtension", "false" );
System.setProperty( "jdk.tls.ephemeralDHKeySize", "2048" );
* There is a more elaborate csv list in Brokenlinks
* get wording by indexing into array by responseCode offset.
* We don't use a resource. That would complicate things for building
* jars that use the http package.
* Last updated 2014-04-18
means( 100, "OK to continue with request." );
means( 101, "Server has switched protocols in upgrade header." );
means( 200, "ok" );
means( 201, "Object created, reason = new URI." );
means( 202, "accepted: Async completion (TBS)" );
means( 203, "non-authoritative information" );
means( 204, "no content" );
means( 205, "reset content" );
means( 206, "partial content" );
means( 207, "multistatus" );
means( 208, "already reported" );
means( 226, "IM (Instance maniplation) used." );
means( 300, "Server could not determine what to return." );
means( 301, "permanent redirect" );
means( 302, "temporary redirect" );
means( 303, "temporary redirect to new access method" );
means( 304, "not recently modified" );
means( 305, "Redirection to proxy, location header specifies proxy to use." );
means( 307, "temporary redirect" );
means( 308, "permanent redirect" );
means( 400, "Invalid syntax" );
means( 401, "Access denied, authorisation required." );
means( 402, "payment required" );
means( 403, "request forbidden" );
means( 404, "page not found" );
means( 405, "method not allowed" );
means( 406, "No response acceptable to client found." );
means( 407, "proxy authentication required" );
means( 408, "Server timed out waiting for request" );
means( 409, "User should resubmit with more information" );
means( 410, "Resource withdrawn without redirect" );
means( 411, "length required" );
means( 412, "precondition failed" );
means( 413, "The server is refusing to process a request because the request entity is larger than the server is willing or able to process." );
means( 414, "request-URI too long" );
means( 415, "unsupported media type" );
means( 416, "requested range not satisfiable" );
means( 421, "Protocol Extension Unknown" );
means( 422, "Protocol Extension Refused" );
means( 429, "too many requests from a client in a short time" );
means( 449, "Retry after doing the appropriate action." );
means( 500, "Internal server error" );
means( 501, "not implemented" );
means( 502, "Error response received from gateway" );
means( 503, "temporarily overloaded" );
means( 504, "gateway timeout" );
means( 505, "HTTP version not supported" );
means( 506, "server configuration error" );
means( 507, "not enough RAM" );
means( 508, "loop detected" );
means( 509, "bandwidth exceeded" );
means( 510, "mandatory extension policy rejected" );
means( 520, "Protocol Extension Error" );
means( 521, "Protocol Extension Not Implemented" );
means( 522, "Protocol Extension Parameters Not Acceptable" );
* string composed from interrupts
protected String interruptResponseMessage;
* string back from website
protected String rawResponseMessage;
* responseCode from most recent post
int responseCode;
* URL, including encoded get Parameters.
URL url;
* true=auto follow redirects, false=treat redirect as error, just read first leg redirect message..
private boolean followRedirects = true;
* Allow 50 seconds to connect, measured in millis as the default.
private int connectTimeout = Build.CONNECT_TIMEOUT;
* Allow 40 seconds for a read to go without progress, measured in millis as the default.
private int readTimeout = Build.READ_TIMEOUT;
* Accept-Charset for header, has getter/setter
private String acceptCharset = Build.ACCEPT_CHARSET;
* Accept-Encoding for header, when debugging avoid gzip to make Wireshark sniffing easier. We don't handle deflate.
* Deflate is one of the PKZip compressors.
* Currently no SetAcceptEncoding method to override this default.
* Firefox uses gzip, deflate
* in = new InflaterInputStream(conn.getInputStream()), new Inflater(true)); may let us handle deflate
private String acceptEncoding = DEBUGGING ? "identity" : "gzip,x-gzip,identity";
* Accept property for header : application/xhtml+xml,application/xml
* Can be be overrridden
* <p>
* default "application/octet-stream," +
* "application/x-java-jnlp-file," +
* "application/x-java-serialized-object," +
* "application/xhtml+xml," +
* "application/xml," +
* "application/zip," +
* "image/gif," +
* "image/jpeg," +
* "image/png," +
* "text/css," +
* "text/html," +
* "text/plain," +
* "text/x-java-source," +
* "text/xml," +
* "* ; q = . 2 , * / * ; q = . 2 ";
* firefox: text/html,application/xhtml+xml,application/xml;q=0.9, * / * ; q = 0 . 8
* see for an explanation of the q
private String acceptProperty = Build.ACCEPT_MIMES;
* parameters we send with the command. c.f. PostParms sent in message body with a post
private String[] parms;
* the page containing the URL we pretend to be.
* By default null, for none.
private String referer = null;
* additional request properties for the connection, pairs key, value
private String[] requestProperties = new String[ 0 ];
* the browser we pretend to be, by default Firefox can be overridden with setter.
* e.g. Mozilla/5.0 (Windows NT 10.0; WOW64; rv:47.0) Gecko/20100101 Firefox/47.00
* @see <a href="">details on User-Agent</a>
private String userAgent = Build.USER_AGENT;
* no public instantiation. Just a base class.
* display the contents of the header fields key: value, value, value
* @param title Title to decorated the dump.
* @param urlc HTTP connection
protected static void dumpHeaders( final String title, final HttpURLConnection urlc )
err.println( title );
Map<String, List<String>> pairs = urlc.getHeaderFields();
for ( Map.Entry<String, List<String>> entry : pairs.entrySet() )
String key = entry.getKey();
List<String> values = entry.getValue();
out.print( key + ":" );
for ( String value : values )
out.print( " [" + value + "]" );
* encode a set of parms for the command, separated with ? = & = * This method does not automatically include
* the result in the message sent to the host.
* @param encoding for URLEncoder
* @param parms 0..n strings to be send as parameter, alternating keyword/value
* @return all the parms in one string encoded with lead ?
* @throws if bad encoding
private static String encodeParms( Charset encoding, String... parms ) throws UnsupportedEncodingException
if ( parms == null || parms.length == 0 )
return "";
assert ( parms.length & 1 ) == 0 : "must have an even number of parms, keyword=value";
int estLength = 10;
for ( String p : parms )
estLength += p.length() + 1;
final StringBuilder sb = new StringBuilder( estLength );
for ( int i = 0; i < parms.length - 1; i += 2 )
sb.append( i == 0 ? "?" : "&" );
sb.append( URLEncoder.encode( parms[ i ],
) );
sb.append( '=' );
sb.append( URLEncoder.encode( parms[ i + 1 ],
) );
return sb.toString();
* Guess what charSet encoding the response will be in.
* @param contentType contents of content type field
* @param defaultEncoding charSet to use if empty content type field, e.g. "UTF-8"
* @param url url of server we just probed, who is responsible for the contentType
* @return charsetEncoding to use e.g. "UTF-8"
static Charset guessCharset( final String contentType, final Charset defaultEncoding, final URL url )
if ( contentType == null )
return defaultEncoding;
int place = contentType.lastIndexOf( "charset=" );
if ( place >= 0 )
String charset = null;
charset = contentType.substring( place + "charset=".length() ).trim().toUpperCase();
charset = ST.trimLeading( ST.trimTrailing( charset, '\"' ), '\"' );
charset = ST.trimTrailing( charset, ';' );
if ( charset.equalsIgnoreCase( "CP-1251" ) )
charset = "windows-1251";
return Charset.forName( charset );
catch ( IllegalArgumentException e )
err.println( "Warning: " + url.toString() + " unrecognised charset " + charset + " using " +
defaultEncoding + " instead." );
return defaultEncoding;
return defaultEncoding;
* used to build lookup responseCode to responseMessage table
* @param responseCode response code
* @param meaning corresponding meaning of the response code, the responseMessage
private static void means( int responseCode, String meaning )
responseCodeLookup[ responseCode - LOWEST_LEGIT_RESPONSE_CODE ] = meaning;
* convert responseCode to a standard responseMessage
* @param responseCode code e.g. 200 for OK
* @param rawResponseMessage raw response message from server
* @return String describing the response message.
private static String responseCodeToResponseMessage( int responseCode, String rawResponseMessage )
final String responseMessage = responseCodeLookup[ responseCode - LOWEST_LEGIT_RESPONSE_CODE ];
if ( responseMessage != null )
return responseMessage;
if ( responseCode == -1 )
return "no connect";
return "unknown";
* process the response from the request we sent the server
* @param defaultCharSet Encoding to use to interpret the result.
* @param urlc the HttpURLConnection, all ready to go but for the connect.
* @return content of the response, decompressed, decoded.
* @throws if trouble reading the stream.
String connectAndProcessResponse( Charset defaultCharSet, HttpURLConnection urlc )
throws IOException
responseCode = urlc.getResponseCode();
rawResponseMessage = urlc.getResponseMessage();
int estimatedLength = urlc.getContentLength();
if ( estimatedLength <= 0 )
estimatedLength = DEFAULT_LENGTH;
final InputStream is = urlc.getInputStream();
final String contentType = urlc.getContentType();
final Charset charset = guessCharset( contentType, defaultCharSet, url );
final boolean gzipped = "gzip".equals( urlc.getContentEncoding() )
|| "x-gzip".equals( urlc.getContentEncoding() );
String result = Read.readStringBlocking( is,
charset );
err.println( "--------------------------------" );
err.println( "ResponseCode : " + responseCode );
err.println( "ResponseMessage : " + getResponseMessage() );
err.println( "ContentType : " + contentType );
err.println( "Charset : " + charset );
err.println( "ContentEncoding : " + urlc.getContentEncoding() );
err.println( "Result : " + ( result == null ? "null" : result.substring( 0,
Math.min( result.length(), 300 ) ) ) );
return result;
* get the parms for the command encoded, separated with ? = & = * @param encoding for URLEncoder
* @return all the parms in one string encoded with lead ?
* @throws if bad encoding
String getEncodedParms( Charset encoding ) throws UnsupportedEncodingException
return encodeParms( encoding, this.parms );
* first thing before accessing web
protected void init()
responseCode = -1;
rawResponseMessage = null;
interruptResponseMessage = null;
* set up the standard properties on the connection
* @param urlc Connection we are setting up.
protected void setStandardProperties( URLConnection urlc )
urlc.setConnectTimeout( connectTimeout );
urlc.setReadTimeout( readTimeout );
if ( userAgent != null )
urlc.setRequestProperty( "User-Agent", userAgent );
if ( urlc instanceof HttpURLConnection )
( ( HttpURLConnection ) urlc ).setInstanceFollowRedirects( followRedirects );
if ( referer != null )
urlc.setRequestProperty( "Referer", referer );
for ( int i = 0; i < requestProperties.length; i += 2 )
urlc.setRequestProperty( requestProperties[ i ], requestProperties[ i + 1 ] );
urlc.setRequestProperty( "Accept", acceptProperty );
urlc.setRequestProperty( "Accept-Charset", acceptCharset );
urlc.setRequestProperty( "Accept-Encoding", acceptEncoding );
final Locale locale = Locale.getDefault();
urlc.setRequestProperty( "Accept-Language", locale.toString().replace( '_', '-' ) + ";q=1," +
locale.getLanguage() + ";q=.8" );
* prepare to talk to sites that do not support Server Name Identification
* This is a static method. If you use it is a multithread situation, you will get conflicts.
* You must do all your false work in a batch, then all your true work in a batch.
public static void disableSNI()
System.setProperty( "jsse.enableSNIExtension", "false" );
* prepare to talk to sites that support Server Name Identification
* This is a static method. If you use it is a multithread situation, you will get conflicts.
* You must do all your false work in a batch, then all your true work in a batch.
public static void enableSNI()
System.setProperty( "jsse.enableSNIExtension", "true" );
* get current Accept-Charset
* @return e.g. "utf-8,iso-8859-1,utf-16;q=0.7,*;q=0.3";
public String getAcceptCharset()
return acceptCharset;
* change default Accept-Charset
* @param acceptCharset e.hg. "iso-8859-1,utf-8,utf-16;q=0.7,*;q=0.3
public void setAcceptCharset( final String acceptCharset )
this.acceptCharset = acceptCharset;
* get current Accept-Encoding
* @return e.g. "gzip,x-gzip,identity"
public String getAcceptEncoding()
return acceptEncoding;
* change the default encoding Accept-Encoding
* @param acceptEncoding e.g. ""gzip,x-gzip,identity""
public void setAcceptEncoding( final String acceptEncoding )
this.acceptEncoding = acceptEncoding;
* get current AcceptProperty
* @return e.g. "gzip,x-gzip,identity"
public String getAcceptProperty()
return acceptProperty;
* change the default Accept-Property MIME types
* @param acceptProperty e.g. "gzip,x-gzip,identity".
public void setAcceptProperty( final String acceptProperty )
this.acceptProperty = acceptProperty;
* get current connect time out in ms
* @return connect timeout is ms.
public int getConnectTimeout()
return connectTimeout;
* override the default connect timeout of 50 seconds. It only applies to this request.
* Sometimes the connection will ignore the timeout. Oracle thinks it is a feature.
* @param connectTimeout timeout to connect in ms. Note int, not long.
public void setConnectTimeout( int connectTimeout )
this.connectTimeout = connectTimeout;
* Response message from Java Exception
* @return responseMessage
* @see #getRawResponseMessage
public String getInterruptResponseMessage()
return ST.canonical( interruptResponseMessage );
* responseCode from most recent post/get exactly as received from the server
* @return responseCode
* @see #getResponseMessage
public String getRawResponseMessage()
if ( rawResponseMessage == null )
return "";
return rawResponseMessage;
* get current read time out in ms.
* @return read timeout is ms.
public int getReadTimeout()
return readTimeout;
* override the default read timeout of 40 seconds. It only applies to this request.
* Sometimes the connection will ignore the timeout. Oracle thinks it is a feature.
* @param readTimeout timeout to connect in ms. Note int, not long.
public void setReadTimeout( int readTimeout )
this.readTimeout = readTimeout;
* ges the Referrer ie. the name of a web page this request ostensibly came from.
* @return referrer e.g "", null for none.
* @see <a href="">details on Referrer</a>
public String getReferer()
return referer;
* set the Referrer ie. the name of a web page this request ostensibly came from.
* Note that the word Referrer is spelled incorrectly as Referer the HTTP spec.
* @param referer e.g "", null for none.
* @see <a href="">details on Referrer</a>
public void setReferer( String referer )
this.referer = referer;
* responseCode from most recent post/get
* Meaning of various codes are described at HttpURLConnection and at
* @return responseCode
* @see
public int getResponseCode()
return responseCode;
* responseCode from most recent post/get tidied to standard form
* @return responseCode
* @see #getRawResponseMessage
public String getResponseMessage()
final FastCat sb = new FastCat( 2 );
if ( !ST.isEmpty( rawResponseMessage ) )
sb.append( rawResponseMessage );
final String translatedResponseMessage = responseCodeToResponseMessage( responseCode, rawResponseMessage );
if ( !ST.isEmpty( translatedResponseMessage ) && !translatedResponseMessage.equalsIgnoreCase( rawResponseMessage ) )
sb.append( translatedResponseMessage );
return sb.toSeparatedList( " : " );
* Get URL for this connection.
* @return URL, including encoded GET Parameters, but not POST parameters.
public URL getURL()
return url;
* get current User-Agent
* @return e.g. "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 Firefox/8.0"
public String getUserAgent()
return userAgent;
* override the default User-Agent
* @param userAgent User-Agent a browser uses in an HTTP header to identify itself.
* null for no User Agent. By default you get Firefox.
* @see <a href="">details on User-Agent</a>
public void setUserAgent( String userAgent )
this.userAgent = userAgent;
* do we follow redirects on just show first leg
* @return true if will follow redirects to the end
public boolean isFollowRedirects()
return followRedirects;
* does the recent responseCode represent a good status?
* @return true if good status
public boolean isGood()
switch ( responseCode )
case 200:
case 201:
case 202:
case 301:
case 302:
case 303:
case 304:
case 307:
case 308:
return true;
case 400:
case 401:
case 402:
case 403:
case 404:
return false;
* control whether redirects are automatically followed or treated as errors.
* @param followRedirects true=auto follow, false=treat as error. default is true.
* @see
public void setInstanceFollowRedirects( boolean followRedirects )
this.followRedirects = followRedirects;
* set the parms that will be send tacked onto the end of the URL, get-style
* @param parms 0..n strings to be send as parameter, alternating keyword/value
* @see Post#setPostParms(String...)
public void setParms( final String... parms )
assert ( parms.length & 1 ) == 0 : "must have an even number of parms, keyword=value";
this.parms = parms;
* set additional requestProperties. Replaced previous set.
* @param requestProperties pairs: key value.
public void setRequestProperties( String... requestProperties )
if ( ( requestProperties.length & 1 ) != 0 )
throw new IllegalArgumentException( "setRequestProperties needs an even number of parameters: key,value" );
this.requestProperties = requestProperties;