1classdef Zipf < DiscreteDistribution
2 % Zipf Power-law distribution
for modeling popularity and ranking
4 % Zipf represents the Zipf distribution commonly used
for modeling popularity
5 % distributions, where items are ranked by frequency. The probability of an
6 % item
is inversely proportional to its rank raised to the shape parameter.
7 % This distribution
is essential
for cache modeling and content popularity.
9 % @brief Zipf distribution
for popularity and ranking-based phenomena
11 % Key characteristics:
12 % - Two parameters: shape (s) and number of items (n)
13 % - Probability ∝ 1/rank^s
for each rank
14 % - Heavy-tailed distribution (popular items dominate)
15 % - Mean = H(s-1,n)/H(s,n) where H
is generalized harmonic number
16 % - Support: {1, 2, ..., n}
18 % Shape parameter behavior:
19 % - s > 1: Heavy tail, few items very popular
20 % - s = 1: Classic Zipf law
21 % - s < 1: Less skewed, more uniform popularity
23 % The Zipf distribution
is used
for:
24 % - Web cache popularity modeling
25 % - Content access patterns
26 % - Word frequency in natural language
27 % - City population distributions
28 % - Internet traffic modeling
32 % web_popularity = Zipf(1.2, 1000); % 1000 items, shape=1.2
33 % access_pattern = web_popularity.sample(10000);
36 % Copyright (c) 2018-2022, Imperial College London
37 % All rights reserved.
40 function self = Zipf(s, n)
41 % ZIPF Create a Zipf distribution instance
43 % @brief Creates a Zipf distribution with shape parameter and item count
44 % @param s Shape parameter (s > 0, controls skewness)
45 % @param n Number of items (positive integer)
46 % @return self Zipf distribution instance with specified parameters
48 self@DiscreteDistribution('Zipf',4,[1,n]);
49 p = 1./((1:n).^s)/Zipf.genHarmonic(s,n);
51 setParam(self, 1,
'p', p(:)
');
52 setParam(self, 2, 'x
', x(:)');
53 setParam(self, 3,
's', s);
54 setParam(self, 4,
'n', n);
57 function ex = getMean(self)
60 % Get distribution mean
61 s = self.getParam(3).paramValue;
62 n = self.getParam(4).paramValue;
63 ex = self.genHarmonic(s-1,n) / self.genHarmonic(s,n);
66 function SCV = getSCV(self)
69 % Get distribution squared coefficient of variation (SCV = variance / mean^2)
70 s = self.getParam(3).paramValue;
71 n = self.getParam(4).paramValue;
73 var = self.genHarmonic(s-2,n) / self.genHarmonic(s,n) - ex^2;
77 function X = sample(self, n)
81 alpha = self.getParam(3).paramValue;
82 ranks = self.getParam(2).paramValue;
84 pmf = (1./ranks.^alpha) / sum(1./ranks.^alpha);
86 uniformRandomNumbers = rand(n, 1);
87 X = arrayfun(@(x) find(cdf >= x, 1,
'first'), uniformRandomNumbers);
90 function Ft = evalCDF(self,k)
91 % FT = EVALCDF(SELF,K)
93 % Evaluate the cumulative distribution function at t
96 s = self.getParam(3).paramValue;
97 n = self.getParam(4).paramValue;
98 Ft = self.genHarmonic(s,k) / self.genHarmonic(s,n);
101 function L = evalLST(self, s)
103 % Evaluate the Laplace-Stieltjes transform of the distribution function at s
104 % For Zipf distribution, LST(s) = Σ(k=1 to n)
P(X=k) * e^(-s*k)
106 shape = self.getParam(3).paramValue;
107 n = self.getParam(4).paramValue;
108 Hns = self.genHarmonic(shape, n);
112 prob = 1.0 / k^shape / Hns;
113 L = L + prob * exp(-s * k);
117 function p = evalPMF(self, k)
120 % Evaluate the probability mass function at k
123 s = self.getParam(3).paramValue;
124 n = self.getParam(4).paramValue;
125 if nargin<2 %~exist(
'k',
'var')
128 Hns = Zipf.genHarmonic(s,n);
132 function proc = getProcess(self)
133 % PROC = GETPROCESS()
135 % Get process representation for non-Markovian distribution
136 % Returns [mean, SCV] pair for use in network analysis
137 proc = [self.getMean(), self.getSCV()];
142 function Hnm = genHarmonic(s,n)
143 % HNM = GENHARMONIC(S,N)
145 % Generate harmonic numbers to normalize a Zipf-like distribution
146 % on n items with shape parameter s