1function [XN,UN,QN,RN,TN,CN,tranSysState,tranSync,snc]=solver_ssa_analyzer_parallel(sn, init_state, laboptions)
2% [XN,UN,QN,RN,TN,CN]=SOLVER_SSA_ANALYZER_PARALLEL(SN, INIT_STATE, LABOPTIONS)
4% Worker-count-invariant parallel SSA.
6% The simulation budget
is split into a FIXED number of independent
7% replications R (laboptions.config.nreplicas). Replication r simulates
8% ceil(samples/R) events and
is seeded deterministically with (seed+r-1):
9% inside a parfor body spmdIndex resolves to 1 in solver_ssa, so the
10% effective seed
is exactly laboptions.seed+r-1, independent of which
11% worker executes the iteration. The R replications are distributed over
12% whatever workers the parallel pool provides (and run on the client if no
13% pool exists), and the per-replication estimates are averaged.
15% Because replication r always uses the same seed and sample budget no
16% matter how many workers are available, the returned averages depend only
17% on (seed, samples, R) and are invariant to the worker count. This
is the
18% key difference from the previous spmd implementation, whose result varied
19% with the pool size (it both divided the budget by, and seeded labs from,
20% the runtime number of labs).
22% Copyright (c) 2012-2026, Imperial College London
27% Fixed number of independent replications (worker-count invariant). The
28%
default lives in SolverOptions (
'SSA' case); guard here so the analyzer
29% remains usable
if called with a hand-built options
struct.
30if isfield(laboptions,
'config') && isfield(laboptions.config,
'nreplicas') ...
31 && ~isempty(laboptions.config.nreplicas)
32 R = max(1, round(laboptions.config.nreplicas));
37baseSeed = laboptions.seed;
38perRepSamples = ceil(laboptions.samples / R);
40% per-replication outputs (sliced so they can be populated inside parfor)
50 repoptions = laboptions;
51 repoptions.samples = perRepSamples;
52 repoptions.verbose = VerboseLevel.SILENT;
53 % Deterministic per-replication seed (see header). solver_ssa adds
54 % (lab_idx-1) which
is 0 inside parfor, so this seed
is used verbatim.
55 repoptions.seed = baseSeed + r - 1;
56 [XNr{r},UNr{r},QNr{r},RNr{r},TNr{r},CNr{r},sncr{r}] = ...
57 run_replica(snc, init_state, repoptions);
60% average the per-replication estimates
68% average cache actual hit/miss probabilities across replications
70 for isf=1:sn.nstateful
71 if sn.nodetype(isf) == NodeType.Cache
72 ind = sn.statefulToNode(isf);
73 sn.nodeparam{ind}.actualhitprob(k) = 0;
74 sn.nodeparam{ind}.actualmissprob(k) = 0;
77 if length(qntmp.nodeparam{ind}.hitclass)>=k
78 sn.nodeparam{ind}.actualhitprob(k) = sn.nodeparam{ind}.actualhitprob(k) + (1/R) * qntmp.nodeparam{ind}.actualhitprob(k);
79 sn.nodeparam{ind}.actualmissprob(k) = sn.nodeparam{ind}.actualmissprob(k) + (1/R) * qntmp.nodeparam{ind}.actualmissprob(k);
90function [XN,UN,QN,RN,TN,CN,sncl]=run_replica(sn, init_state, repoptions)
91% Run a single SSA replication and reduce it to per-station/
class averages.
92% Kept as a local function so the (parfor-unfriendly) nested indexing runs
93% in an ordinary function workspace.
102% eventCache is not shared across replications
103if isfield(repoptions,'config
') && isfield(repoptions.config,'eventcache
')
104 eventCache = EventCache.create(repoptions.config.eventcache, sn);
106 eventCache = EventCache.create(false, sn);
109[probSysState,SSq,arvRates,depRates,~,~,sncl] = solver_ssa(sn, init_state, repoptions, eventCache);
118 refsf = sncl.stationToStateful(sncl.refstat(k));
119 XN(k) = probSysState*depRates(:,refsf,k);
121 isf = sncl.stationToStateful(ist);
122 TN(ist,k) = probSysState*depRates(:,isf,k);
123 QN(ist,k) = probSysState*SSq(:,(ist-1)*K+k);
124 switch sncl.sched(ist)
125 case SchedStrategy.INF
126 UN(ist,k) = QN(ist,k);
128 % we use Little's law, otherwise there are issues in
129 % estimating the fraction of time assigned to
class k (to
131 if ~isempty(PH{ist}{k})
132 UN(ist,k) = probSysState*arvRates(:,ist,k)/rates(ist,k)/S(ist);
141 RN(ist,k) = QN(ist,k)./TN(ist,k);
146 CN(k) = NK(k)./XN(k);
156% update cache actual hit and miss data
for this replication
157TNcache = zeros(sncl.nstateful,K);
159 for isf=1:sncl.nstateful
160 if sncl.nodetype(isf) == NodeType.Cache
161 TNcache(isf,k) = probSysState*depRates(:,isf,k);
166 for isf=1:sncl.nstateful
167 if sncl.nodetype(isf) == NodeType.Cache
168 ind = sncl.statefulToNode(isf);
169 if length(sncl.nodeparam{ind}.hitclass)>=k
170 h = sncl.nodeparam{ind}.hitclass(k);
171 m = sncl.nodeparam{ind}.missclass(k);
172 sncl.nodeparam{ind}.actualhitprob(k) = TNcache(isf,h)/sum(TNcache(isf,[h,m]));
173 sncl.nodeparam{ind}.actualmissprob(k) = TNcache(isf,m)/sum(TNcache(isf,[h,m]));