1%{ @file retrieval_fpi_latency.m
2 % @brief FPI-based approximation of delayed-hit count and expected latency
4 % @author LINE Development Team
8 % @brief Approximates the mean delayed-hit count d_i and expected latency Z
11 % Implements the latency-approximation algorithm of the paper (Appendix
12 %
"sec:fpi-di", Algorithm
"Approximation procedure of the latency Z"), based on
15 % d_i = phi_i * lambda_i * E0[F_i^2] / (2 E0[F_i])
17 % where F_i
is the fetch-period duration of item i and E0[.]
is the Palm
18 % expectation conditioned on a miss. The fetch moments are obtained from a
19 % reduced absorbing CTMC of item i
's visits to the retrieval stations:
21 % E0[F_i^k] = k! * pi_e^{(i)} (-D0^{(i)})^{-k} e (eq. moments)
23 % Steps (per the algorithm):
24 % 1. FPI on the full system -> phi_i, pi_{i,0} (retrieval_fpi)
25 % 2. For each i: FPI without item i -> phi^{(i)}_{s,k};
26 % PS-station occupancy phi~_s^{(i)} = sum_{k!=i} phi^{(i)}_{s,k}
27 % 3. Reduced CTMC for item i: one block of PH phases per station; shared
28 % stations (PS/SIRO/FCFS/LCFSPR) have their rates slowed by 1/(1+phi~_s^{(i)})
29 % while IS stations stay independent; routing per R; absorption = return to
30 % cache. D0^{(i)} = transient subgenerator, pi_e^{(i)} = entry distribution
31 % (R(outside->s) * PH-initial).
32 % 4. Moments via eq. moments -> d_i via Theorem thm:di.
33 % 5. Z = sum_i(phi_i + d_i) / sum_i lambda_i(phi_i + pi_{i,0}) (eq. latency tot)
35 % Routing convention: R is (S+1) x (S+1) x n; index 1 = outside (entry on miss /
36 % return to cache on completion), indices 2..S+1 = retrieval stations 1..S;
37 % R(a,b,i) is the routing probability from a to b for item i (rows sum to 1).
41 % [Z, d, phi, pi0] = retrieval_fpi_latency(m, lambda, gamma, alpha, T, R, station_type)
46 % <tr><th>Name<th>Description
47 % <tr><td>m<td>Cache list capacities (1 x h)
48 % <tr><td>lambda<td>Per-item arrival rates (1 x n)
49 % <tr><td>gamma<td>Access factors gamma(i,j)=gamma_{i,j}, (n x h)
50 % <tr><td>alpha<td>Cell(1,S); alpha{s}(1,:,i) PH entry vector of item i at station s
51 % <tr><td>T<td>Cell(1,S); T{s}(:,:,i) PH subgenerator of item i at station s
52 % <tr><td>R<td>(S+1) x (S+1) x n routing matrices (index 1 = outside, 2..S+1 = stations)
53 % <tr><td>station_type<td>(1 x S) string/cellstr per station, one of "IS", "PS", "SIRO", "FCFS", "LCFSPR". PS and LCFSPR (symmetric/insensitive BCMP disciplines) admit general phase-type service and class-dependent rates. SIRO and FCFS require exponential (single-phase) service with identical per-class rates; otherwise an error is raised.
58 % <tr><th>Name<th>Description
59 % <tr><td>Z<td>Expected latency of the delayed-hit system
60 % <tr><td>d<td>Mean number of delayed hits awaiting fetch, per item (1 x n)
61 % <tr><td>phi<td>Delayed-hit ratio phi_i per item (1 x n)
62 % <tr><td>pi0<td>Miss ratio pi_{i,0} per item (1 x n)
65function [Z,d,phi,pi0]=retrieval_fpi_latency(m,lambda,gamma,alpha,T,R,station_type)
71 fsz(s) = size(T{s},1);
73station_type = string(station_type);
74% The latency approximation models IS (infinite-server), PS (processor-sharing),
75% SIRO (service-in-random-order), FCFS (first-come-first-served) and LCFSPR
76% (last-come-first-served preemptive-resume) retrieval stations. IS fetches are
77% independent; PS, SIRO, FCFS and LCFSPR fetches use the mean-field sharing
78% slowdown 1/(1+phitilde). For exponential service:
79% - SIRO: the tagged-job sojourn
is a geometric sum of exponentials, i.e.
80% Exp(mu/(1+phitilde)) -- identical to PS.
81% - FCFS (product form): requires class-independent rates, so the background
82% clears at the common rate mu and the station behaves as M/M/1 whose sojourn
83%
is exactly Exp(mu-Lambda) = Exp(mu/(1+phitilde)) -- again identical to PS.
84% LCFSPR
is a symmetric, insensitive BCMP discipline (like PS): it
is treated as
85% PS with no extra restriction (general phase-type service and class-dependent
86% rates are admissible). SIRO and FCFS instead require exponential (single-phase)
87% service AND identical per-class rates: their reduction to the PS single-exp
88% sojourn (geometric-sum-of-exponentials for SIRO, M/M/1 sojourn for FCFS) holds
89% only when all
classes share the same exponential rate. Any other scheduling
90% policy
is unsupported and must be rejected explicitly.
91unsupported = ~((station_type ==
"IS") | (station_type ==
"PS") | (station_type ==
"SIRO") | (station_type ==
"FCFS") | (station_type ==
"LCFSPR"));
93 bad = strjoin(cellstr(unique(station_type(unsupported))),
', ');
94 line_error(mfilename, sprintf([
'retrieval_fpi_latency supports only IS, PS, SIRO, FCFS and ' ...
95 'LCFSPR retrieval stations; unsupported station type(s): %s'], bad));
97for s = find(station_type ==
"SIRO" | station_type ==
"FCFS")
99 line_error(mfilename, sprintf(['retrieval_fpi_latency supports SIRO/FCFS retrieval stations ' ...
100 'only with exponential (single-phase) service; station %d has phase-type service.'], s));
103% SIRO and FCFS reduce to the PS single-exponential sojorn only with
104% class-independent service rates: every item must share the same mean service
105% time at such a station. (LCFSPR
is exempt -- it
is insensitive.)
106for s = find(station_type == "FCFS" | station_type == "SIRO")
109 taus(i) = -alpha{s}(:,:,i) / T{s}(:,:,i) * ones(fsz(s), 1);
111 if max(taus) - min(taus) > 1e-9 * max(taus)
112 line_error(mfilename, sprintf([
'retrieval_fpi_latency requires class-independent (identical) ' ...
113 'mean service rates at SIRO/FCFS station %d.'], s));
116isIdx = find(station_type ==
"IS");
117psIdx = find(station_type ==
"PS" | station_type ==
"SIRO" | station_type ==
"FCFS" | station_type ==
"LCFSPR");
120% --- per-item fetching demands eta_{s,i} = visits_{s,i} * mean PH service time ---
121% eta_fpi(i, 1) = sum over IS stations; eta_fpi(i, 1+p) = PS station psIdx(p)
122eta_fpi = zeros(n, r+1);
125 a = Ri(1, 2:S+1); % outside -> station entry probs (1 x S)
126 P = Ri(2:S+1, 2:S+1); % station -> station
127 visits = a / (eye(S) -
P); % expected
visits per fetch (1 x S)
130 al = alpha{s}(:,:,i);
132 tau(s) = -al / Tm * ones(fsz(s),1); % mean PH service time
134 eta_s =
visits .* tau; % eta_{s,i} per station
135 eta_fpi(i,1) = sum(eta_s(isIdx));
137 eta_fpi(i,1+p) = eta_s(psIdx(p));
141% --- step 1: FPI on the full system ---
142[pi0, ~, pdh] = retrieval_fpi(m, lambda, eta_fpi, gamma);
143phi = sum(pdh, 1); % phi_i = sum_s phi_{s,i}
147 % --- step 2: FPI without item i -> PS-station occupancy phi~_s^{(i)} ---
148 keep = [1:i-1, i+1:n];
149 [~, ~, pdh_i] = retrieval_fpi(m, lambda(keep), eta_fpi(keep,:), gamma(keep,:));
150 phitilde = zeros(1,S); % occupancy per actual station
152 phitilde(psIdx(p)) = sum(pdh_i(1+p, :));
155 % --- step 3: reduced absorbing CTMC
for item i
's fetch ---
157 off = [0, cumsum(fsz(1:end-1))];
158 D0 = zeros(Phi, Phi);
162 rows = off(s)+(1:fsz(s));
163 if station_type(s) == "PS" || station_type(s) == "SIRO" || station_type(s) == "FCFS" || station_type(s) == "LCFSPR"
164 scale = 1/(1 + phitilde(s)); % PS/SIRO/FCFS/LCFSPR mean-field sharing slowdown
166 scale = 1; % IS: independent
168 blk = scale * T{s}(:,:,i);
169 D0(rows, rows) = D0(rows, rows) + blk;
170 compl = -blk * ones(fsz(s),1); % completion-rate vector at station s
172 cols = off(sp)+(1:fsz(sp));
173 D0(rows, cols) = D0(rows, cols) + compl * Ri(s+1, sp+1) * alpha{sp}(:,:,i);
175 pe(rows) = Ri(1, s+1) * alpha{s}(:,:,i); % entry distribution
178 % --- step 4: moments E0[F_i], E0[F_i^2] (eq. moments) and d_i (thm:di) ---
182 M2 = 2 * pe * (A \ (A \ e));
183 d(i) = phi(i) * lambda(i) * M2 / (2*M1);
186% --- step 5: expected latency Z (eq. latency tot) ---
187Z = sum(phi + d) / sum(lambda(:).' .* (phi + pi0));