package com.mindprod.example;
import com.mindprod.fastcat.FastCat;
import java.util.BitSet;
import java.util.Random;
import static java.lang.System.*;
/**
* Calculate theoretical and simulated probability of generating a non-unique random names.
*
* @author Roedy Green, Canadian Mind Products
* @version 1.0 2009-01-01 initial version
* @since 2009-01-01
*/
public final class TestCollisionProbability
{
/**
* maximum number of acceptable trials
*/
private static final int MAX_TRIALS = 1000000;
/**
* minimum number of acceptable trials
*/
private static final int MIN_TRIALS = 100;
/**
* we can't compute accurately above this limit on n
*/
private static final long APPROXIMATING_LIMIT_FOR_N = 100000000L;
/**
* we can't simulate above this limit on m
*/
private static final long SIMULATING_LIMIT_FOR_M = 10000000L;
/**
* we can't simulate above this limit on n
*/
private static final long SIMULATING_LIMIT_FOR_N = 1000000L;
/**
* sun's random number generator to use in simulations
*/
private static final Random wheel = new Random();
/**
* Calculate the probability for a true random number generator avoiding collisions.
*
* @param n count of names generated
* @param m count of possible names
*
* @return probability of no collisions
*/
private static double calculateTheoreticalProbability( long n, long m )
{
double probabilityOfNoCollision = 1.0d;
for ( long i = 0; i < n; i++ )
{
probabilityOfNoCollision *= ( double ) ( m - i ) / m;
}
return probabilityOfNoCollision;
}
/**
* Display the probability for a random number generator avoiding collisions.
*
* @param probabilityOfNoCollision probability of no collisions 0..1
* @param description sort of generator used.
* @param n count of names generated
* @param m count of possible names
*/
private static void display( double probabilityOfNoCollision,
String description,
long n,
long m )
{
double probabilityOfCollision = 1.0d - probabilityOfNoCollision;
out.println( "Generating n="
+ n
+ " Strings of m="
+ m
+ " possible Strings\n"
+ "that you could generate with "
+ description
+ ",\n"
+ "the probability of collision is "
+ probabilityOfCollision
+ "\n"
+ "where 0 is never, and 1 is always," );
if ( probabilityOfCollision <= 0.0 )
{
out.println( "in other words, don't worry about collisions." );
}
else if ( probabilityOfCollision >= 1.0 )
{
out.println( "in other words, collisions will always happen." );
}
else
{
out.println( "or one in " + 1 / probabilityOfCollision );
}
}
/**
* Display some random strings where m = Long.MAX_VALUE+1
*/
private static void displaySamples()
{
out.println( "Sample random decimal string: " + Long.toString(
wheel.nextLong() & Long
.MAX_VALUE
) );
out.println( "Sample random hex string: " + Long.toHexString( wheel
.nextLong() & Long
.MAX_VALUE ) );
out.println( "Sample random alphabetic string: " + Long.toString(
wheel.nextLong() & Long
.MAX_VALUE,
36
) );
}
/**
* Estimate probability for a true random number generator avoiding collisions. for large n this would take
* impossibly long to compute, so we approximate to get a pessimistic number.
*
* @param n count of names generated
* @param m count of possible names
*
* @return probability of no collisions
*/
private static double estimateTheoreticalProbability( long n, long m )
{
out.println( "approximating..." );
return Math.pow( ( double ) ( m - n ) / m, n );
}
/**
* Estimate probability for a true random number generator avoiding collisions. for large n this would take
* impossibly long to compute, so we approximate to get a pessimistic number.
*
* @param n count of names generated
* @param m count of possible names
* @param trials number of trials in simulation
*
* @return probability of no collisions
*/
private static double simulateActualProbability( long n, long m, int trials )
{
assert trials >= MIN_TRIALS :
"not enough trials";
assert trials <= MAX_TRIALS :
"too many trials";
assert n <= SIMULATING_LIMIT_FOR_N :
"n too large";
assert m <= SIMULATING_LIMIT_FOR_M :
"m too large";
assert SIMULATING_LIMIT_FOR_N < Integer
.MAX_VALUE : "SIMULATING_LIMIT_FOR_N too large.";
assert SIMULATING_LIMIT_FOR_M < Integer
.MAX_VALUE : "SIMULATING_LIMIT_FOR_M too large.";
final FastCat sb = new FastCat( 10 );
sb.append( "\nSimulating ", trials, " trials of n=", n, " numbers " );
sb.append( "each in range 0 .. ", m - 1, " where m=", m, "." );
out.println( sb.toString() );
int m1 = ( int ) m;
BitSet b = new BitSet( m1 );
int collisions = 0;
for ( int t = 0; t < trials; t++ )
{
b.clear();
for ( int i = 0; i < n; i++ )
{
int poss = wheel.nextInt( m1 );
if ( b.get( poss ) )
{
collisions++;
break;
}
else
{
b.set( poss );
}
}
}
return ( double ) ( trials - collisions ) / trials;
}
/**
* main Calculate theoretical probability of generating a non-unique random hex name in n Strings of a set of m
* possibilities via <br> private static Random wheel = new Random(); // seed with elapsedTime of day<br> ...<br>
* String almostUniqueAndRandom =<br> Long.toHexString( wheel.nextLong() & * Long.MAX_VALUE);<br> you get
* Strings of form "0" .. "7fffffffffffffff", a maximum 16 characters long. <br> <br> try out with: <br> java
* com.mindprod.example.TestCollisionProbability 20000 9223372036854775807 1000<br> or to see a simulation, try
* something smaller like<br> java com.mindprod.example.TestCollisionProbability 200 10000000 1000<br>
*
* @param args n number of Strings you plan to generate <br> m string you will generate over the range 0..m
* <p/>
* If you are generating 32-bit hashcodes use m=4_294_967_296
* If you are using 60-bit hashcodes, use m=1_152_921_504_606_846_976
* 64-bit hash codes will overamp this program.
* If you are using it to solwe the birthday problem use n=number of friends m=365.
* <br>
* trials how many sets of n numbers to generate in the simulation
*/
public static void main( String[] args )
{
if ( args.length != 3 )
{
throw new IllegalArgumentException(
"You must have n, the number of generated Strings and m, the number of possible Strings, and t, " +
"the number of trials on the command line."
);
}
long n = Long.parseLong( args[ 0 ] );
long m = Long.parseLong( args[ 1 ] );
int trials = Integer.parseInt( args[ 2 ] );
if ( !( 0 < n && n < m ) )
{
throw new IllegalArgumentException( "You must have 0 < n < m" );
}
if ( !( MIN_TRIALS <= trials && trials <= MAX_TRIALS ) )
{
throw new IllegalArgumentException( "Must have "
+ MIN_TRIALS
+ " <= trials <= "
+ MAX_TRIALS );
}
displaySamples();
double probabilityOfNoCollision;
if ( n <= APPROXIMATING_LIMIT_FOR_N )
{
probabilityOfNoCollision = calculateTheoreticalProbability( n, m );
}
else
{
probabilityOfNoCollision = estimateTheoreticalProbability( n, m );
}
display( probabilityOfNoCollision, "a true number generator", n, m );
if ( n <= SIMULATING_LIMIT_FOR_N && m <= SIMULATING_LIMIT_FOR_M )
{
probabilityOfNoCollision =
simulateActualProbability( n, m, trials );
display( probabilityOfNoCollision,
"Sun's java.util.Random pseudo-random number generator",
n,
m );
}
else
{
out.println( "The problem too big to simulate." );
}
}
}