1function [steadyStateRewards, rewardNames, stateSpace, pi, V, t] = solver_ctmc_reward(sn, options)
2% SOLVER_CTMC_REWARD Compute steady-state and transient rewards
4% [STEADYSTATE, NAMES, STATESPACE, PI, V, T] = SOLVER_CTMC_REWARD(SN, OPTIONS)
6% Computes both steady-state expected rewards
using the stationary distribution:
7% E[r] = sum_s pi(s) * r(s)
9% and transient value functions
using uniformization with value iteration:
10% V^{k+1}(s) = r(s) + sum_{s
'} P(s,s') * V^k(s
')
13% sn - NetworkStruct with .reward field populated
14% options - Solver options with:
15% .rewardIterations - Number of iterations for transient (default 1000)
18% steadyStateRewards - Vector of steady-state expected rewards [nRewards x 1]
19% rewardNames - Cell array of reward names
20% stateSpace - State space matrix [nStates x nDims]
21% pi - Stationary distribution [1 x nStates]
22% V - Cell array of value functions {nRewards x 1}
23% Each V{r} is [Tmax+1 x nStates] matrix
24% t - Time vector (scaled by uniformization rate)
26% Copyright (c) 2012-2026, Imperial College London
29% Validate rewards are defined
31 error('No rewards defined. Use model.setReward() before calling
this solver.
');
34if isinf(options.cutoff)
36 line_printf(mfilename,'Cutoff option missing, setting cutoff=100');
39% Get
default iterations
for transient analysis
40if isfield(options,
'rewardIterations') && ~isempty(options.rewardIterations)
41 Tmax = options.rewardIterations;
46% Get generator and state space
47[Q, ~, stateSpaceAggr, ~, ~, ~, sn] = solver_ctmc(sn, options);
48stateSpace = stateSpaceAggr; % Return aggregated state space to user
51nRewards = length(sn.reward);
53% Compute stationary distribution for steady-state rewards
54pi = ctmc_solve(Q, options);
55pi(pi < GlobalConstants.Zero) = 0;
56pi = pi / sum(pi); % Normalize
58% Build index maps for RewardState
59% nodeToStationMap: node.index -> station index
60nodeToStationMap = containers.Map('KeyType', 'int32', 'ValueType', 'int32');
61classToIndexMap = containers.Map('KeyType', 'int32', 'ValueType', 'int32');
65 nodeToStationMap(int32(ind)) = sn.nodeToStation(ind);
70 classToIndexMap(int32(r)) = r;
73% Build reward vectors (one per reward definition)
74R = zeros(nRewards, nstates);
75rewardNames = cell(nRewards, 1);
77 rewardNames{r} = sn.reward{r}.name;
78 rewardFn = sn.reward{r}.fn;
81 % Create RewardState
for this state row
82 stateRow = stateSpaceAggr(s, :);
83 rewardState = RewardState(stateRow, sn, nodeToStationMap, classToIndexMap);
85 % Try calling with RewardState (
new API, single argument)
87 R(r, s) = rewardFn(rewardState);
89 % If that fails,
try backward compatibility with @(state, sn) signature
91 R(r, s) = rewardFn(stateSpaceAggr(s,:), sn);
93 % If both fail, report the original error from the
new API
100% Compute steady-state expected rewards: E[r] = pi * r
'
101steadyStateRewards = zeros(nRewards, 1);
103 steadyStateRewards(r) = pi * R(r, :)';
106% Compute transient value functions via uniformization
107% Uniformization: find maximum exit rate
108q = max(abs(diag(Q)));
110 q = 1; % Handle absorbing states edge
case
113% Build transition probability matrix
P = Q/q + I
114P = Q/q + speye(nstates);
117V = cell(nRewards, 1);
119 V{r} = zeros(Tmax+1, nstates);
120 v_prev = zeros(1, nstates);
122 % V^{k+1}(s) = r(s) + sum_{s
'} P(s,s') * V^k(s
')
123 v_new = R(r, :) + v_prev * P';
124 V{r}(k+1, :) = v_new;
129% Time scaling: convert iteration index to continuous time