/**
* largest codepoint we can represent with a surrogate pair.
* 10 bits in each char + 0x10000 16-bit codes we don't need to represent
* 2 ** (10+10) + 0x10000 - 1 = 0x10ffff, a bit more than 20 bits.
*/
private static final int BIGGEST_REPRESENTABLE_CODEPOINT = 0x10ffff;
/**
* Convert 32-bit codePoint to a 2-character Unicode-16 string surrogate pair string.
* e.g. 0x1d509 to "\ud835\udd09"
*
* @param codePoint number 32-bit code point to convert to literal
* Must be in in range 0 to {@value #BIGGEST_REPRESENTABLE_CODEPOINT}.
*
* @return pair of surrogate characters as a 2-character string.
*/
private static String toSurrogatePair( final int codePoint )
{
if ( !( 0 <= codePoint && codePoint <= BIGGEST_REPRESENTABLE_CODEPOINT ) )
{
throw new IllegalArgumentException( "toSurrogatePair codePoint must be in range 0x0000.." + BIGGEST_REPRESENTABLE_CODEPOINT );
}
if ( codePoint <= 0xffff )
{
return String.valueOf( ( char ) codePoint );
}
else
{
final int extract = codePoint - 0x10000;
final int high = ( extract >>> 10 ) + 0xd800;
final int low = ( extract & 0x3ff ) + 0xdc00;
StringBuilder sb = new StringBuilder( 2 );
sb.append( ( char ) high );
sb.append( ( char ) low );
return sb.toString();
}
}
/**
* Convert 32-bit codePoint to a pair of \ u x x x x surrogate literals.
* e.g. 0x1d509 to "\ud835\udd09"
*
* @param codePoint number 32-bit code point to convert to literal
* Must be in in range 0 to {@value #BIGGEST_REPRESENTABLE_CODEPOINT}.
*
* @return pair of surrogate characters as a 12-character Java literal without quotes.
*/
private static String toSurrogatePairLiteral( final int codePoint )
{
if ( !( 0 <= codePoint && codePoint <= BIGGEST_REPRESENTABLE_CODEPOINT ) )
{
throw new IllegalArgumentException( "toSurrogatePairLiteral codePoint must be in range 0x0000.." + BIGGEST_REPRESENTABLE_CODEPOINT );
}
if ( codePoint <= 0xffff )
{
StringBuilder sb = new StringBuilder( 6 );
sb.append( "\\u" );
sb.append( StringTools.toLzHexString( codePoint, 4 ) );
return sb.toString();
}
else
{
final int extract = codePoint - 0x10000;
final int high = ( extract >>> 10 ) + 0xd800;
final int low = ( extract & 0x3ff ) + 0xdc00;
StringBuilder sb = new StringBuilder( 12 );
sb.append( "\\u" );
sb.append( StringTools.toLzHexString( high, 4 ) );
sb.append( "\\u" );
sb.append( StringTools.toLzHexString( low, 4 ) );
return sb.toString();
}
}
/**
* Convert 32-bit codePoint to \ U x x x x x x x x C-style string literal.
* e.g. 0x1d509 to \U0001d509
*
* @param codePoint number 32-bit code point to convert to literal.
* Must be in in range 0 to {@value #BIGGEST_REPRESENTABLE_CODEPOINT}.
*
* @return as a C-style 10-char literal without quotes.
*/
private static String toCLiteral( final int codePoint )
{
if ( !( 0 <= codePoint && codePoint <= BIGGEST_REPRESENTABLE_CODEPOINT ) )
{
throw new IllegalArgumentException( "toSurrogatePair toCLiteral must be in range 0x0000.." + BIGGEST_REPRESENTABLE_CODEPOINT );
}
StringBuilder sb = new StringBuilder( 10 );
sb.append( "\\U" );
sb.append( StringTools.toLzHexString( codePoint, 8 ) );
return sb.toString();
}