#! /usr/bin/env python # def zipf_cdf_inv ( a, cdf ): #*****************************************************************************80 # ## ZIPF_CDF_INV inverts the Zipf CDF. # # Discussion: # # Simple summation is used. # # Licensing: # # This code is distributed under the GNU LGPL license. # # Modified: # # 09 March 2016 # # Author: # # John Burkardt # # Parameters: # # Input, real CDF, the value of the CDF. # # Input, real A, the parameter of the PDF. # 1.0 < A. # # Output, integer X, the argument such that # CDF(X-1) < CDF <= CDF(X) # 1 <= X <= 1000 # from r8_zeta import r8_zeta if ( cdf <= 0.0 ): x = 1 else: c = r8_zeta ( a ) cdf2 = 0.0 x = 1000 for y in range ( 1, 1001 ): pdf = ( 1.0 / y ** a ) / c cdf2 = cdf2 + pdf if ( cdf <= cdf2 ): x = y break return x def zipf_cdf ( x, a ): #*****************************************************************************80 # ## ZIPF_CDF evaluates the Zipf CDF. # # Discussion: # # Simple summation is used. # # Licensing: # # This code is distributed under the GNU LGPL license. # # Modified: # # 09 March 2016 # # Author: # # John Burkardt # # Parameters: # # Input, integer X, the argument of the PDF. # 1 <= X # # Input, real A, the parameter of the PDF. # 1.0 < A. # # Output, real CDF, the value of the CDF. # from r8_zeta import r8_zeta if ( x < 1 ): cdf = 0.0 else: c = r8_zeta ( a ) cdf = 0.0 for y in range ( 1, x + 1 ): pdf = ( 1.0 / y ** a ) / c cdf = cdf + pdf return cdf def zipf_cdf_test ( ): #*****************************************************************************80 # ## ZIPF_CDF_TEST tests ZIPF_CDF, ZIPF_CDF_INV, ZIPF_PDF. # # Licensing: # # This code is distributed under the GNU LGPL license. # # Modified: # # 09 March 2016 # # Author: # # John Burkardt # import platform print ( '' ) print ( 'ZIPF_CDF_TEST' ) print ( ' Python version: %s' % ( platform.python_version ( ) ) ) print ( ' ZIPF_PDF evaluates the Zipf PDF.' ) print ( ' ZIPF_CDF evaluates the Zipf CDF.' ) print ( ' ZIPF_CDF_INV inverts the Zipf CDF.' ) a = 2.0 check = zipf_check ( a ) if ( not check ): print ( '' ) print ( 'ZIPF_CDF_TEST - Fatal error!' ) print ( ' The parameters are not legal.' ) return print ( '' ) print ( ' PDF parameter A = %14g' % ( a ) ) print ( '' ) print ( ' X PDF(X) CDF(X) CDF_INV(CDF)' ) print ( '' ) for x in range ( 1, 21 ): pdf = zipf_pdf ( x, a ) cdf = zipf_cdf ( x, a ) x2 = zipf_cdf_inv ( a, cdf ) print ( ' %6d %14g %14g %6d' % ( x, pdf, cdf, x2 ) ) # # Terminate. # print ( '' ) print ( 'ZIPF_CDF_TEST' ) print ( ' Normal end of execution.' ) return def zipf_check ( a ): #*****************************************************************************80 # ## ZIPF_CHECK checks the parameter of the Zipf PDF. # # Licensing: # # This code is distributed under the GNU LGPL license. # # Modified: # # 09 March 2016 # # Author: # # John Burkardt # # Parameters: # # Input, real A, the parameter of the PDF. # 1.0 < A. # # Output, logical CHECK, is true if the parameters are legal. # if ( a <= 1.0 ): print ( '' ) print ( 'ZIPF_CHECK - Fatal error!' ) print ( ' A <= 1.' ) check = False return check check = True return check def zipf_mean ( a ): #*****************************************************************************80 # ## ZIPF_MEAN returns the mean of the Zipf PDF. # # Licensing: # # This code is distributed under the GNU LGPL license. # # Modified: # # 09 March 2016 # # Author: # # John Burkardt # # Parameters: # # Input, real A, the parameter of the PDF. # 1.0 < A. # # Output, real MEAN, the mean of the PDF. # The mean is only defined for 2 < A. # from sys import exit from r8_zeta import r8_zeta if ( a <= 2.0 ): print ( '' ) print ( 'ZIPF_MEAN - Fatal error!' ) print ( ' No mean defined for A <= 2.' ) exit ( 'ZIPF_MEAN - Fatal error!' ) mean = r8_zeta ( a - 1.0 ) / r8_zeta ( a ) return mean def zipf_pdf ( x, a ): #*****************************************************************************80 # ## ZIPF_PDF evaluates the Zipf PDF. # # Discussion: # # PDF(X)(A) = ( 1 / X^A ) / C # # where the normalizing constant is chosen so that # # C = Sum ( 1 <= I < oo ) 1 / I^A. # # From observation, the frequency of different words in long # sequences of text seems to follow the Zipf PDF, with # parameter A slightly greater than 1. The Zipf PDF is sometimes # known as the "discrete Pareto" PDF. # # Lotka's law is a version of the Zipf PDF in which A is 2 or approximately # 2. Lotka's law describes the frequency of publications by authors in a # given field, and estimates that the number of authors with X papers is # about 1/X^A of the number of authors with 1 paper. # # Licensing: # # This code is distributed under the GNU LGPL license. # # Modified: # # 09 March 2016 # # Author: # # John Burkardt # # Reference: # # Alfred Lotka, # The frequency distribution of scientific productivity, # Journal of the Washington Academy of Sciences, # Volume 16, Number 12, 1926, pages 317-324. # # Parameters: # # Input, integer X, the argument of the PDF. # 1 <= N # # Input, real A, the parameter of the PDF. # 1.0 < A. # # Output, real PDF, the value of the PDF. # from r8_zeta import r8_zeta if ( x < 1 ): pdf = 0.0 else: c = r8_zeta ( a ) pdf = ( 1.0 / x ** a ) / c return pdf def zipf_sample ( a, seed ): #*****************************************************************************80 # ## ZIPF_SAMPLE samples the Zipf PDF. # # Licensing: # # This code is distributed under the GNU LGPL license. # # Modified: # # 09 March 2016 # # Author: # # John Burkardt # # Reference: # # Luc Devroye, # Non-Uniform Random Variate Generation, # Springer Verlag, 1986, pages 550-551. # # Parameters: # # Input, real A, the parameter of the PDF. # 1.0 < A. # # Input, integer SEED, a seed for the random number generator. # # Output, integer X, a sample of the PDF. # # Output, integer SEED, an updated seed for the random number generator. # import numpy as np from r8_uniform_01 import r8_uniform_01 b = 2.0 ** ( a - 1.0 ) while ( True ): u, seed = r8_uniform_01 ( seed ) v, seed = r8_uniform_01 ( seed ) w = np.floor ( 1.0 / u ** ( 1.0 / ( a - 1.0 ) ) ) t = ( ( w + 1.0 ) / w ) ** ( a - 1.0 ) if ( v * w * ( t - 1.0 ) * b <= t * ( b - 1.0 ) ): break x = np.floor ( w ) return x, seed def zipf_sample_test ( ): #*****************************************************************************80 # ## ZIPF_SAMPLE_TEST tests ZIPF_MEAN, ZIPF_SAMPLE, ZIPF_VARIANCE. # # Licensing: # # This code is distributed under the GNU LGPL license. # # Modified: # # 09 March 2016 # # Author: # # John Burkardt # import numpy as np import platform from i4vec_max import i4vec_max from i4vec_mean import i4vec_mean from i4vec_min import i4vec_min from i4vec_variance import i4vec_variance nsample = 1000 seed = 123456789 print ( '' ) print ( 'ZIPF_SAMPLE_TEST' ) print ( ' Python version: %s' % ( platform.python_version ( ) ) ) print ( ' ZIPF_MEAN returns the mean of the Zipf distribution.' ) print ( ' ZIPF_SAMPLE samples the Zipf distribution.' ) print ( ' ZIPF_VARIANCE returns the variance of the Zipf distribution.' ) a = 4.0 check = zipf_check ( a ) if ( not check ): print ( '' ) print ( 'ZIPF_SAMPLE_TEST - Fatal error!' ) print ( ' The parameters are not legal.' ) return mean = zipf_mean ( a ) variance = zipf_variance ( a ) print ( '' ) print ( ' PDF parameter A = %14g' % ( a ) ) print ( ' PDF mean = %14g' % ( mean ) ) print ( ' PDF variance = %14g' % ( variance ) ) x = np.zeros ( nsample ) for i in range ( 0, nsample ): x[i], seed = zipf_sample ( a, seed ) mean = i4vec_mean ( nsample, x ) variance = i4vec_variance ( nsample, x ) xmax = i4vec_max ( nsample, x ) xmin = i4vec_min ( nsample, x ) print ( '' ) print ( ' Sample size = %6d' % ( nsample ) ) print ( ' Sample mean = %14g' % ( mean ) ) print ( ' Sample variance = %14g' % ( variance ) ) print ( ' Sample maximum = %6d' % ( xmax ) ) print ( ' Sample minimum = %6d' % ( xmin ) ) # # Terminate. # print ( '' ) print ( 'ZIPF_SAMPLE_TEST' ) print ( ' Normal end of execution.' ) return def zipf_variance ( a ): #*****************************************************************************80 # ## ZIPF_VARIANCE returns the variance of the Zipf PDF. # # Licensing: # # This code is distributed under the GNU LGPL license. # # Modified: # # 09 March 2016 # # Author: # # John Burkardt # # Parameters: # # Input, real A, the parameter of the PDF. # 1.0 < A. # # Output, real VARIANCE, the variance of the PDF. # The variance is only defined for 3 < A. # from sys import exit from r8_zeta import r8_zeta if ( a <= 3.0 ): print ( '' ) print ( 'ZIPF_VARIANCE - Fatal error!' ) print ( ' No variance defined for A <= 3.0.' ) exit ( 'ZIPF_VARIANCE - Fatal error!' ) mean = zipf_mean ( a ) variance = r8_zeta ( a - 2.0 ) / r8_zeta ( a ) - mean * mean return variance if ( __name__ == '__main__' ): from timestamp import timestamp timestamp ( ) zipf_cdf_test ( ) zipf_sample_test ( ) timestamp ( )