#! /usr/bin/env python3 # def walker_sampler ( n, y, a ): #****************************************************************************80 # # Purpose: # # WALKER_SAMPLER returns a random sample i=1..N with prob X[i]. # # Discussion: # # Implementation of algorithm for sampling from a discrete # probability N-vector X[1], X[2], ..., X[N]. (N>=1.) # Runs on O(1) worst case time per sample, # and uses one integer and one double N-element array for storage. # Preprocessing consumes O(N) time and temporarily uses one # additional integer array (B[0..N+1]) for bookkeeping. # X[0] and X[N+1] are also used as sentinels in the Build() algorithm. # # Modified: # # 20 February 2016 # # Author: # # Original C version by Warren Smith. # Python version by John Burkardt. # # Reference: # # Warren Smith, # How to sample from a probability distribution, # April 18, 2002. # # Parameters: # # Input, unsigned int N, indicates the size of the probability vector X. # # Input, double Y[N+2], the Walker threshold vector. # # Input, unsigned int A[N+2], the Walker index vector. # # Output, unsigned int WALKER_SAMPLER, a sample value between 1 and N, # selected according to the probability vector X. # import numpy as np import random as rn # # Let i = random uniform integer from {1,2,...N} # i = 1 + int ( np.fix ( float ( n ) * rn.random ( ) ) ) r = rn.random ( ) if ( y[i] < r ): i = a[i] return i def walker_sampler_test ( ): #****************************************************************************80 # # Purpose: # # WALKER_SAMPLER_TEST tests WALKER_SAMPLER. # # Discussion: # # A Zipf-type probability vector is used. # # Modified: # # 20 February 2016 # # Author: # # Original C version by Warren Smith. # Python version by John Burkardt. # # Parameters: # # Input, unsigned int SEED, a seed for the random number generator. # # Input, unsigned int N, indicates the size of the probability vector X. # # Input, double P, the value of the Zipf parameter # 1 <= P. # import numpy as np import platform import random as rn from random_permutation import random_permutation from walker_build import walker_build from zipf_probability import zipf_probability seed = 123456789 n = 10 p = 2.0 print ( '' ) print ( 'WALKER_SAMPLER_TEST:' ) print ( ' Python version: %s' % ( platform.python_version ( ) ) ) print ( ' WALKER_SAMPLER creates Walker sample vectors Y and A' ) print ( ' for efficient sampling of a discrete probability vector.' ) print ( ' Test the Walker sampler with a Zipf-type probability.' ) # # Initialize the random number generator. # print ( ' Use seed = %d as input to random.seed ( seed ):' % ( seed ) ) rn.seed ( seed ) # # Generate a standard Zipf probability vector for cases 1 - N, # with parameter P. # x = zipf_probability ( n, p ) print ( '' ) print ( ' Zipf probabilities' ) print ( ' for N = %d' % ( n ) ) print ( ' and parameter P = %g' % ( p ) ) print ( '' ) print ( ' I X[I]' ) print ( '' ) for i in range ( 1, n + 1 ): print ( ' %4d %16g' % ( i, x[i] ) ) # # For better testing, randomly scramble the probabilities. # x = random_permutation ( n, x, seed ) print ( '' ) print ( ' Randomly permuted X:' ) print ( '' ) print ( ' I X[I]' ) print ( '' ) for i in range ( 1, n + 1 ): print ( ' %4d %16g' % ( i, x[i] ) ) # # Build the Walker sampler. # y, a = walker_build ( n, x ) print ( '' ) print ( ' Built the sampler' ) print ( ' i Y[i] A[i]:' ) print ( '' ) for i in range ( 0, n + 2 ): print ( ' %3d %16g %4d' % ( i, y[i], a[i] ) ) # # Prepare to count the frequency of each outcome. # count = np.zeros ( n + 2, dtype = np.int32 ) # # Call the sampler many times. # for i in range ( 0, 100000 ): j = walker_sampler ( n, y, a ) count[j] = count[j] + 1 # # Compare normalized sample frequencies to the original probabilities in X. # v = 0.0 print ( '' ) print ( ' 100000 samples:' ) print ( ' prob #samples:' ) print ( '' ) for i in range ( 1, n + 1 ): print ( ' %16g %6d' % ( x[i], count[i] ) ) expval = x[i] * 100000 t = expval - count[i] v = v + t * t / expval v = v / ( float ) ( n ) print ( '' ) print ( ' sumvar = %g, (should be about 1)' % ( v ) ) # # Terminate. # print ( '' ) print ( 'WALKER_SAMPLER_TEST' ) print ( ' Normal end of execution.' ) return if ( __name__ == '__main__' ): from timestamp import timestamp timestamp ( ) walker_sampler_test ( ) timestamp ( )