LINE Solver
MATLAB API documentation
Loading...
Searching...
No Matches
Zipf.m
1classdef Zipf < DiscreteDistribution
2 % Zipf Power-law distribution for modeling popularity and ranking
3 %
4 % Zipf represents the Zipf distribution commonly used for modeling popularity
5 % distributions, where items are ranked by frequency. The probability of an
6 % item is inversely proportional to its rank raised to the shape parameter.
7 % This distribution is essential for cache modeling and content popularity.
8 %
9 % @brief Zipf distribution for popularity and ranking-based phenomena
10 %
11 % Key characteristics:
12 % - Two parameters: shape (s) and number of items (n)
13 % - Probability ∝ 1/rank^s for each rank
14 % - Heavy-tailed distribution (popular items dominate)
15 % - Mean = H(s-1,n)/H(s,n) where H is generalized harmonic number
16 % - Support: {1, 2, ..., n}
17 %
18 % Shape parameter behavior:
19 % - s > 1: Heavy tail, few items very popular
20 % - s = 1: Classic Zipf law
21 % - s < 1: Less skewed, more uniform popularity
22 %
23 % The Zipf distribution is used for:
24 % - Web cache popularity modeling
25 % - Content access patterns
26 % - Word frequency in natural language
27 % - City population distributions
28 % - Internet traffic modeling
29 %
30 % Example:
31 % @code
32 % web_popularity = Zipf(1.2, 1000); % 1000 items, shape=1.2
33 % access_pattern = web_popularity.sample(10000);
34 % @endcode
35 %
36 % Copyright (c) 2018-2022, Imperial College London
37 % All rights reserved.
38
39 methods
40 function self = Zipf(s, n)
41 % ZIPF Create a Zipf distribution instance
42 %
43 % @brief Creates a Zipf distribution with shape parameter and item count
44 % @param s Shape parameter (s > 0, controls skewness)
45 % @param n Number of items (positive integer)
46 % @return self Zipf distribution instance with specified parameters
47
48 self@DiscreteDistribution('Zipf',4,[1,n]);
49 p = 1./((1:n).^s)/Zipf.genHarmonic(s,n);
50 x = 1:n;
51 setParam(self, 1, 'p', p(:)');
52 setParam(self, 2, 'x', x(:)');
53 setParam(self, 3, 's', s);
54 setParam(self, 4, 'n', n);
55 end
56
57 function ex = getMean(self)
58 % EX = GETMEAN()
59
60 % Get distribution mean
61 s = self.getParam(3).paramValue;
62 n = self.getParam(4).paramValue;
63 ex = self.genHarmonic(s-1,n) / self.genHarmonic(s,n);
64 end
65
66 function SCV = getSCV(self)
67 % SCV = GETSCV()
68
69 % Get distribution squared coefficient of variation (SCV = variance / mean^2)
70 s = self.getParam(3).paramValue;
71 n = self.getParam(4).paramValue;
72 ex = getMean(self);
73 var = self.genHarmonic(s-2,n) / self.genHarmonic(s,n) - ex^2;
74 SCV = var / ex^2;
75 end
76
77 function X = sample(self, n)
78 % X = SAMPLE(N)
79 % to be checked
80
81 alpha = self.getParam(3).paramValue;
82 ranks = self.getParam(2).paramValue;
83
84 pmf = (1./ranks.^alpha) / sum(1./ranks.^alpha);
85 cdf = cumsum(pmf);
86 uniformRandomNumbers = rand(n, 1);
87 X = arrayfun(@(x) find(cdf >= x, 1, 'first'), uniformRandomNumbers);
88 end
89
90 function Ft = evalCDF(self,k)
91 % FT = EVALCDF(SELF,K)
92
93 % Evaluate the cumulative distribution function at t
94 % AT T
95
96 s = self.getParam(3).paramValue;
97 n = self.getParam(4).paramValue;
98 Ft = self.genHarmonic(s,k) / self.genHarmonic(s,n);
99 end
100
101 function L = evalLST(self, s)
102 % L = EVALST(S)
103 % Evaluate the Laplace-Stieltjes transform of the distribution function at s
104 % For Zipf distribution, LST(s) = Σ(k=1 to n) P(X=k) * e^(-s*k)
105
106 shape = self.getParam(3).paramValue;
107 n = self.getParam(4).paramValue;
108 Hns = self.genHarmonic(shape, n);
109
110 L = 0.0;
111 for k = 1:n
112 prob = 1.0 / k^shape / Hns;
113 L = L + prob * exp(-s * k);
114 end
115 end
116
117 function p = evalPMF(self, k)
118 % P = EVALPMF(K)
119
120 % Evaluate the probability mass function at k
121 % AT K
122
123 s = self.getParam(3).paramValue;
124 n = self.getParam(4).paramValue;
125 if nargin<2 %~exist('k','var')
126 k = 1:n;
127 end
128 Hns = Zipf.genHarmonic(s,n);
129 p = 1./(k.^s)/Hns;
130 end
131
132 function proc = getProcess(self)
133 % PROC = GETPROCESS()
134
135 % Get process representation for non-Markovian distribution
136 % Returns [mean, SCV] pair for use in network analysis
137 proc = [self.getMean(), self.getSCV()];
138 end
139 end
140
141 methods (Static)
142 function Hnm = genHarmonic(s,n)
143 % HNM = GENHARMONIC(S,N)
144
145 % Generate harmonic numbers to normalize a Zipf-like distribution
146 % on n items with shape parameter s
147 Hnm = 0;
148 for k=1:n
149 Hnm = Hnm + 1/k^s;
150 end
151 end
152 end
153
154end
155