LINE Solver
MATLAB API documentation
Loading...
Searching...
No Matches
retrieval_fpi_latency.m
1%{ @file retrieval_fpi_latency.m
2 % @brief FPI-based approximation of delayed-hit count and expected latency
3 %
4 % @author LINE Development Team
5%}
6
7%{
8 % @brief Approximates the mean delayed-hit count d_i and expected latency Z
9 %
10 % @details
11 % Implements the latency-approximation algorithm of the paper (Appendix
12 % "sec:fpi-di", Algorithm "Approximation procedure of the latency Z"), based on
13 % Theorem "thm:di":
14 %
15 % d_i = phi_i * lambda_i * E0[F_i^2] / (2 E0[F_i])
16 %
17 % where F_i is the fetch-period duration of item i and E0[.] is the Palm
18 % expectation conditioned on a miss. The fetch moments are obtained from a
19 % reduced absorbing CTMC of item i's visits to the retrieval stations:
20 %
21 % E0[F_i^k] = k! * pi_e^{(i)} (-D0^{(i)})^{-k} e (eq. moments)
22 %
23 % Steps (per the algorithm):
24 % 1. FPI on the full system -> phi_i, pi_{i,0} (retrieval_fpi)
25 % 2. For each i: FPI without item i -> phi^{(i)}_{s,k};
26 % PS-station occupancy phi~_s^{(i)} = sum_{k!=i} phi^{(i)}_{s,k}
27 % 3. Reduced CTMC for item i: one block of PH phases per station; shared
28 % stations (PS/SIRO/FCFS/LCFSPR) have their rates slowed by 1/(1+phi~_s^{(i)})
29 % while IS stations stay independent; routing per R; absorption = return to
30 % cache. D0^{(i)} = transient subgenerator, pi_e^{(i)} = entry distribution
31 % (R(outside->s) * PH-initial).
32 % 4. Moments via eq. moments -> d_i via Theorem thm:di.
33 % 5. Z = sum_i(phi_i + d_i) / sum_i lambda_i(phi_i + pi_{i,0}) (eq. latency tot)
34 %
35 % Routing convention: R is (S+1) x (S+1) x n; index 1 = outside (entry on miss /
36 % return to cache on completion), indices 2..S+1 = retrieval stations 1..S;
37 % R(a,b,i) is the routing probability from a to b for item i (rows sum to 1).
38 %
39 % @par Syntax:
40 % @code
41 % [Z, d, phi, pi0] = retrieval_fpi_latency(m, lambda, gamma, alpha, T, R, station_type)
42 % @endcode
43 %
44 % @par Parameters:
45 % <table>
46 % <tr><th>Name<th>Description
47 % <tr><td>m<td>Cache list capacities (1 x h)
48 % <tr><td>lambda<td>Per-item arrival rates (1 x n)
49 % <tr><td>gamma<td>Access factors gamma(i,j)=gamma_{i,j}, (n x h)
50 % <tr><td>alpha<td>Cell(1,S); alpha{s}(1,:,i) PH entry vector of item i at station s
51 % <tr><td>T<td>Cell(1,S); T{s}(:,:,i) PH subgenerator of item i at station s
52 % <tr><td>R<td>(S+1) x (S+1) x n routing matrices (index 1 = outside, 2..S+1 = stations)
53 % <tr><td>station_type<td>(1 x S) string/cellstr per station, one of "IS", "PS", "SIRO", "FCFS", "LCFSPR". PS and LCFSPR (symmetric/insensitive BCMP disciplines) admit general phase-type service and class-dependent rates. SIRO and FCFS require exponential (single-phase) service with identical per-class rates; otherwise an error is raised.
54 % </table>
55 %
56 % @par Returns:
57 % <table>
58 % <tr><th>Name<th>Description
59 % <tr><td>Z<td>Expected latency of the delayed-hit system
60 % <tr><td>d<td>Mean number of delayed hits awaiting fetch, per item (1 x n)
61 % <tr><td>phi<td>Delayed-hit ratio phi_i per item (1 x n)
62 % <tr><td>pi0<td>Miss ratio pi_{i,0} per item (1 x n)
63 % </table>
64%}
65function [Z,d,phi,pi0]=retrieval_fpi_latency(m,lambda,gamma,alpha,T,R,station_type)
66n = numel(lambda);
67m = m(:).';
68S = numel(alpha);
69fsz = zeros(1,S);
70for s = 1:S
71 fsz(s) = size(T{s},1);
72end
73station_type = string(station_type);
74% The latency approximation models IS (infinite-server), PS (processor-sharing),
75% SIRO (service-in-random-order), FCFS (first-come-first-served) and LCFSPR
76% (last-come-first-served preemptive-resume) retrieval stations. IS fetches are
77% independent; PS, SIRO, FCFS and LCFSPR fetches use the mean-field sharing
78% slowdown 1/(1+phitilde). For exponential service:
79% - SIRO: the tagged-job sojourn is a geometric sum of exponentials, i.e.
80% Exp(mu/(1+phitilde)) -- identical to PS.
81% - FCFS (product form): requires class-independent rates, so the background
82% clears at the common rate mu and the station behaves as M/M/1 whose sojourn
83% is exactly Exp(mu-Lambda) = Exp(mu/(1+phitilde)) -- again identical to PS.
84% LCFSPR is a symmetric, insensitive BCMP discipline (like PS): it is treated as
85% PS with no extra restriction (general phase-type service and class-dependent
86% rates are admissible). SIRO and FCFS instead require exponential (single-phase)
87% service AND identical per-class rates: their reduction to the PS single-exp
88% sojourn (geometric-sum-of-exponentials for SIRO, M/M/1 sojourn for FCFS) holds
89% only when all classes share the same exponential rate. Any other scheduling
90% policy is unsupported and must be rejected explicitly.
91unsupported = ~((station_type == "IS") | (station_type == "PS") | (station_type == "SIRO") | (station_type == "FCFS") | (station_type == "LCFSPR"));
92if any(unsupported)
93 bad = strjoin(cellstr(unique(station_type(unsupported))), ', ');
94 line_error(mfilename, sprintf(['retrieval_fpi_latency supports only IS, PS, SIRO, FCFS and ' ...
95 'LCFSPR retrieval stations; unsupported station type(s): %s'], bad));
96end
97for s = find(station_type == "SIRO" | station_type == "FCFS")
98 if fsz(s) > 1
99 line_error(mfilename, sprintf(['retrieval_fpi_latency supports SIRO/FCFS retrieval stations ' ...
100 'only with exponential (single-phase) service; station %d has phase-type service.'], s));
101 end
102end
103% SIRO and FCFS reduce to the PS single-exponential sojorn only with
104% class-independent service rates: every item must share the same mean service
105% time at such a station. (LCFSPR is exempt -- it is insensitive.)
106for s = find(station_type == "FCFS" | station_type == "SIRO")
107 taus = zeros(1, n);
108 for i = 1:n
109 taus(i) = -alpha{s}(:,:,i) / T{s}(:,:,i) * ones(fsz(s), 1);
110 end
111 if max(taus) - min(taus) > 1e-9 * max(taus)
112 line_error(mfilename, sprintf(['retrieval_fpi_latency requires class-independent (identical) ' ...
113 'mean service rates at SIRO/FCFS station %d.'], s));
114 end
115end
116isIdx = find(station_type == "IS");
117psIdx = find(station_type == "PS" | station_type == "SIRO" | station_type == "FCFS" | station_type == "LCFSPR");
118r = numel(psIdx);
119
120% --- per-item fetching demands eta_{s,i} = visits_{s,i} * mean PH service time ---
121% eta_fpi(i, 1) = sum over IS stations; eta_fpi(i, 1+p) = PS station psIdx(p)
122eta_fpi = zeros(n, r+1);
123for i = 1:n
124 Ri = R(:,:,i);
125 a = Ri(1, 2:S+1); % outside -> station entry probs (1 x S)
126 P = Ri(2:S+1, 2:S+1); % station -> station
127 visits = a / (eye(S) - P); % expected visits per fetch (1 x S)
128 tau = zeros(1,S);
129 for s = 1:S
130 al = alpha{s}(:,:,i);
131 Tm = T{s}(:,:,i);
132 tau(s) = -al / Tm * ones(fsz(s),1); % mean PH service time
133 end
134 eta_s = visits .* tau; % eta_{s,i} per station
135 eta_fpi(i,1) = sum(eta_s(isIdx));
136 for p = 1:r
137 eta_fpi(i,1+p) = eta_s(psIdx(p));
138 end
139end
140
141% --- step 1: FPI on the full system ---
142[pi0, ~, pdh] = retrieval_fpi(m, lambda, eta_fpi, gamma);
143phi = sum(pdh, 1); % phi_i = sum_s phi_{s,i}
144
145d = zeros(1,n);
146for i = 1:n
147 % --- step 2: FPI without item i -> PS-station occupancy phi~_s^{(i)} ---
148 keep = [1:i-1, i+1:n];
149 [~, ~, pdh_i] = retrieval_fpi(m, lambda(keep), eta_fpi(keep,:), gamma(keep,:));
150 phitilde = zeros(1,S); % occupancy per actual station
151 for p = 1:r
152 phitilde(psIdx(p)) = sum(pdh_i(1+p, :));
153 end
154
155 % --- step 3: reduced absorbing CTMC for item i's fetch ---
156 Phi = sum(fsz);
157 off = [0, cumsum(fsz(1:end-1))];
158 D0 = zeros(Phi, Phi);
159 pe = zeros(1, Phi);
160 Ri = R(:,:,i);
161 for s = 1:S
162 rows = off(s)+(1:fsz(s));
163 if station_type(s) == "PS" || station_type(s) == "SIRO" || station_type(s) == "FCFS" || station_type(s) == "LCFSPR"
164 scale = 1/(1 + phitilde(s)); % PS/SIRO/FCFS/LCFSPR mean-field sharing slowdown
165 else
166 scale = 1; % IS: independent
167 end
168 blk = scale * T{s}(:,:,i);
169 D0(rows, rows) = D0(rows, rows) + blk;
170 compl = -blk * ones(fsz(s),1); % completion-rate vector at station s
171 for sp = 1:S
172 cols = off(sp)+(1:fsz(sp));
173 D0(rows, cols) = D0(rows, cols) + compl * Ri(s+1, sp+1) * alpha{sp}(:,:,i);
174 end
175 pe(rows) = Ri(1, s+1) * alpha{s}(:,:,i); % entry distribution
176 end
177
178 % --- step 4: moments E0[F_i], E0[F_i^2] (eq. moments) and d_i (thm:di) ---
179 e = ones(Phi,1);
180 A = -D0;
181 M1 = pe * (A \ e);
182 M2 = 2 * pe * (A \ (A \ e));
183 d(i) = phi(i) * lambda(i) * M2 / (2*M1);
184end
185
186% --- step 5: expected latency Z (eq. latency tot) ---
187Z = sum(phi + d) / sum(lambda(:).' .* (phi + pi0));
188end