LINE Solver
MATLAB API documentation
Loading...
Searching...
No Matches
solver_ctmc_reward.m
1function [steadyStateRewards, rewardNames, stateSpace, pi, V, t] = solver_ctmc_reward(sn, options)
2% SOLVER_CTMC_REWARD Compute steady-state and transient rewards
3%
4% [STEADYSTATE, NAMES, STATESPACE, PI, V, T] = SOLVER_CTMC_REWARD(SN, OPTIONS)
5%
6% Computes both steady-state expected rewards using the stationary distribution:
7% E[r] = sum_s pi(s) * r(s)
8%
9% and transient value functions using uniformization with value iteration:
10% V^{k+1}(s) = r(s) + sum_{s'} P(s,s') * V^k(s')
11%
12% INPUTS:
13% sn - NetworkStruct with .reward field populated
14% options - Solver options with:
15% .rewardIterations - Number of iterations for transient (default 1000)
16%
17% OUTPUTS:
18% steadyStateRewards - Vector of steady-state expected rewards [nRewards x 1]
19% rewardNames - Cell array of reward names
20% stateSpace - State space matrix [nStates x nDims]
21% pi - Stationary distribution [1 x nStates]
22% V - Cell array of value functions {nRewards x 1}
23% Each V{r} is [Tmax+1 x nStates] matrix
24% t - Time vector (scaled by uniformization rate)
25%
26% Copyright (c) 2012-2026, Imperial College London
27% All rights reserved.
28
29% Validate rewards are defined
30if isempty(sn.reward)
31 error('No rewards defined. Use model.setReward() before calling this solver.');
32end
33
34if isinf(options.cutoff)
35 options.cutoff = 100;
36 line_printf(mfilename,'Cutoff option missing, setting cutoff=100');
37end
38
39% Get default iterations for transient analysis
40if isfield(options, 'rewardIterations') && ~isempty(options.rewardIterations)
41 Tmax = options.rewardIterations;
42else
43 Tmax = 1000;
44end
45
46% Get generator and state space
47[Q, ~, stateSpaceAggr, ~, ~, ~, sn] = solver_ctmc(sn, options);
48stateSpace = stateSpaceAggr; % Return aggregated state space to user
49
50nstates = size(Q, 1);
51nRewards = length(sn.reward);
52
53% Compute stationary distribution for steady-state rewards
54pi = ctmc_solve(Q, options);
55pi(pi < GlobalConstants.Zero) = 0;
56pi = pi / sum(pi); % Normalize
57
58% Build index maps for RewardState
59% nodeToStationMap: node.index -> station index
60nodeToStationMap = containers.Map('KeyType', 'int32', 'ValueType', 'int32');
61classToIndexMap = containers.Map('KeyType', 'int32', 'ValueType', 'int32');
62
63for ind = 1:sn.nnodes
64 if sn.isstation(ind)
65 nodeToStationMap(int32(ind)) = sn.nodeToStation(ind);
66 end
67end
68
69for r = 1:sn.nclasses
70 classToIndexMap(int32(r)) = r;
71end
72
73% Build reward vectors (one per reward definition)
74R = zeros(nRewards, nstates);
75rewardNames = cell(nRewards, 1);
76for r = 1:nRewards
77 rewardNames{r} = sn.reward{r}.name;
78 rewardFn = sn.reward{r}.fn;
79
80 for s = 1:nstates
81 % Create RewardState for this state row
82 stateRow = stateSpaceAggr(s, :);
83 rewardState = RewardState(stateRow, sn, nodeToStationMap, classToIndexMap);
84
85 % Try calling with RewardState (new API, single argument)
86 try
87 R(r, s) = rewardFn(rewardState);
88 catch ME
89 % If that fails, try backward compatibility with @(state, sn) signature
90 try
91 R(r, s) = rewardFn(stateSpaceAggr(s,:), sn);
92 catch ME2
93 % If both fail, report the original error from the new API
94 rethrow(ME);
95 end
96 end
97 end
98end
99
100% Compute steady-state expected rewards: E[r] = pi * r'
101steadyStateRewards = zeros(nRewards, 1);
102for r = 1:nRewards
103 steadyStateRewards(r) = pi * R(r, :)';
104end
105
106% Compute transient value functions via uniformization
107% Uniformization: find maximum exit rate
108q = max(abs(diag(Q)));
109if q == 0
110 q = 1; % Handle absorbing states edge case
111end
112
113% Build transition probability matrix P = Q/q + I
114P = Q/q + speye(nstates);
115
116% Value iteration
117V = cell(nRewards, 1);
118for r = 1:nRewards
119 V{r} = zeros(Tmax+1, nstates);
120 v_prev = zeros(1, nstates);
121 for k = 1:Tmax
122 % V^{k+1}(s) = r(s) + sum_{s'} P(s,s') * V^k(s')
123 v_new = R(r, :) + v_prev * P';
124 V{r}(k+1, :) = v_new;
125 v_prev = v_new;
126 end
127end
128
129% Time scaling: convert iteration index to continuous time
130t = (0:Tmax) / q;
131
132end