Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MDP solution via linear programming for explicit engine. #149

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
WIP permissive
davexparker committed Jan 3, 2023

Verified

This commit was signed with the committer’s verified signature.
torkelrogstad Torkel Rogstad
commit c836cba0570a3df3c39289fc105db4f2b070f400
255 changes: 255 additions & 0 deletions prism/src/explicit/MDPModelChecker.java
Original file line number Diff line number Diff line change
@@ -763,6 +763,96 @@ public BitSet prob1(MDPGeneric<?> mdp, BitSet remain, BitSet target, boolean min
return u;
}

/**
* Zero total rewards precomputation algorithm, i.e., determine the states of an MDP
* from which the min/max expected total cumulative rewards is zero.
* @param mdp The MDP
* @param mdpRewards The rewards
* @param min Min or max rewards (true=min, false=max)
*/
public BitSet rewTot0(MDPGeneric<?> mdp, MDPRewards mdpRewards, boolean min)
{
int n, iters;
BitSet v, soln;
boolean v_done;
long timer;

// Start precomputation
timer = System.currentTimeMillis();
if (!silentPrecomputations)
mainLog.println("Starting RewTot0 (" + (min ? "min" : "max") + ")...");

// Initialise vectors
n = mdp.getNumStates();
v = new BitSet(n);
soln = new BitSet(n);

// Fixed point loop
iters = 0;
v_done = false;
// Least fixed point - start from 0
v.clear();
soln.clear();
while (!v_done) {
iters++;
rewTot0step(mdp, mdpRewards, v, min, soln);
// Check termination (inner)
v_done = soln.equals(v);
// v = soln
v.clear();
v.or(soln);
}

// Negate
v.flip(0, n);

// Finished precomputation
timer = System.currentTimeMillis() - timer;
if (!silentPrecomputations) {
mainLog.print("RewTot0 (" + (min ? "min" : "max") + ")");
mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds.");
}

return v;
}

/**
* Perform a single step of zero total rewards precomputation algorithm,
* i.e., set bit i of {@code result} iff, there is a positive (state or transition)
* reward or, for some/all choices, there is a transition to a state in {@code u}.
* @param mdp The MDP
* @param mdpRewards The rewards
* @param u Set of states {@code u}
* @param forall For-all or there-exists (true=for-all, false=there-exists)
* @param result Store results here
*/
private void rewTot0step(final MDPGeneric<?> mdp, final MDPRewards mdpRewards, final BitSet u, final boolean forall, BitSet result)
{
int n = mdp.getNumStates();
for (int s = 0; s < n; s++) {
if (mdpRewards.getStateReward(s) > 0) {
result.set(s);
continue;
}
boolean b1 = forall; // there exists or for all
for (int choice = 0, numChoices = mdp.getNumChoices(s); choice < numChoices; choice++) {
boolean b2 = mdpRewards.getTransitionReward(s, choice) > 0 || mdp.someSuccessorsInSet(s, choice, u);
if (forall) {
if (!b2) {
b1 = false;
break;
}
} else {
if (b2) {
b1 = true;
break;
}
}
}
result.set(s, b1);
}
}

/**
* Compute reachability probabilities using value iteration.
* Optionally, store optimal (memoryless) strategy info.
@@ -3015,6 +3105,171 @@ public List<Integer> expReachStrategy(MDP mdp, MDPRewards mdpRewards, int state,
return mdp.mvMultRewMinMaxSingleChoices(state, lastSoln, mdpRewards, min, val);
}

/**
* Compute a multi-strategy for...
* @param mdp: The MDP
* @param mdpRewards The rewards
* @param target Target states
* @param inf States for which reward is infinite
* @param min: Min or max probabilities (true=min, false=max)
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public ModelCheckerResult computeMultiStrategy(MDP mdp, MDPRewards mdpRewards, double bound) throws PrismException
{
boolean min = true;
double soln[] = null;

// Start solution
long timer = System.currentTimeMillis();
mainLog.println("Starting linear programming (" + (min ? "min" : "max") + ")...");

// Store MDP info
int n = mdp.getNumStates();
int sInit = mdp.getFirstInitialState();
// Find states where max expected total reward is 0
BitSet maxRew0 = rewTot0(mdp, mdpRewards, false);

try {
// Initialise MILP solver
GRBEnv env = new GRBEnv("gurobi.log");
env.set(GRB.IntParam.OutputFlag, 0);
GRBModel model = new GRBModel(env);
// Set up MILP variables (real)
GRBVar xVars[] = new GRBVar[n];
for (int s = 0; s < n; s++) {
xVars[s] = model.addVar(0.0, maxRew0.get(s) ? 0.0 : GRB.INFINITY, 0.0, GRB.CONTINUOUS, "x" + s);
}
// Set up MILP variables (binary)
GRBVar yaVars[][] = new GRBVar[n][];
for (int s = 0; s < n; s++) {
int nc = mdp.getNumChoices(s);
yaVars[s] = new GRBVar[nc];
for (int j = 0; j < nc; j++) {
yaVars[s][j] = model.addVar(0.0, 1.0, 0.0, GRB.BINARY, "y" + s + "_" + j + mdp.getAction(s, j));
}
}

double scale = 100.0;

// Set up objective function
GRBLinExpr exprObj = new GRBLinExpr();
exprObj.addTerm(-1.0, xVars[sInit]);
//double rhs = 0.0;
for (int s = 0; s < n; s++) {
int nc = mdp.getNumChoices(s);
for (int j = 0; j < nc; j++) {
double phi_s_j = 1.0 * scale;
exprObj.addTerm(-phi_s_j, yaVars[s][j]);
//rhs -= phi_s_j;
}
}
model.setObjective(exprObj, GRB.MINIMIZE);

// Set up arrays for passing MILP to solver
double row[] = new double[n + 1];
int colno[] = new int[n + 1];
// Add constraints
int counter = 0;
double b = bound;
GRBLinExpr expr = new GRBLinExpr();
expr.addTerm(1.0, xVars[sInit]);
model.addConstr(expr, GRB.GREATER_EQUAL, b, "c" + counter++);
for (int s = 0; s < n; s++) {
expr = new GRBLinExpr();
int nc = mdp.getNumChoices(s);
for (int j = 0; j < nc; j++) {
expr.addTerm(1.0, yaVars[s][j]);
}
model.addConstr(expr, GRB.GREATER_EQUAL, 1.0, "c" + counter++);
}
for (int s = 0; s < n; s++) {
int numChoices = mdp.getNumChoices(s);
for (int i = 0; i < numChoices; i++) {
int count = 0;
row[count] = 1.0;
colno[count] = s + 1;
count++;
Iterator<Map.Entry<Integer, Double>> iter = mdp.getTransitionsIterator(s, i);
while (iter.hasNext()) {
Map.Entry<Integer, Double> e = iter.next();
int t = e.getKey();
double p = e.getValue();
if (t == s) {
row[0] -= p;
} else {
row[count] = -p;
colno[count] = t + 1;
count++;
}
}
// TODO state rewards?
double r = mdpRewards.getTransitionReward(s, i);
expr = new GRBLinExpr();
for (int j = 0; j < count; j++) {
expr.addTerm(row[j], xVars[colno[j] - 1]);
}
expr.addTerm(scale, yaVars[s][i]);
model.addConstr(expr, GRB.LESS_EQUAL, r + scale, "c" + counter++);
}
}
// Solve MILP
//model.write("gurobi.lp");
model.optimize();
if (model.get(GRB.IntAttr.Status) == GRB.Status.INFEASIBLE) {
// model.computeIIS();
// System.out.println("\nThe following constraint(s) " + "cannot be satisfied:");
// for (GRBConstr c : model.getConstrs()) {
// if (c.get(GRB.IntAttr.IISConstr) == 1) {
// System.out.println(c.get(GRB.StringAttr.ConstrName));
// }
// }
throw new PrismException("Error solving LP: " + "infeasible");
}
if (model.get(GRB.IntAttr.Status) == GRB.Status.UNBOUNDED) {
throw new PrismException("Error solving LP: " + "unbounded");
}
if (model.get(GRB.IntAttr.Status) != GRB.Status.OPTIMAL) {
throw new PrismException("Error solving LP: " + "non-optimal " + model.get(GRB.IntAttr.Status));
}
soln = new double[n];
for (int s = 0; s < n; s++) {
System.out.println(xVars[s].get(GRB.StringAttr.VarName) + " = "+xVars[s].get(GRB.DoubleAttr.X));
soln[s] = xVars[s].get(GRB.DoubleAttr.X);
}
for (int s = 0; s < n; s++) {
int numChoices = mdp.getNumChoices(s);
for (int i = 0; i < numChoices; i++) {
System.out.println(yaVars[s][i].get(GRB.StringAttr.VarName) + " = " + yaVars[s][i].get(GRB.DoubleAttr.X));
}
}
// Print multi-strategy
for (int s = 0; s < n; s++) {
mainLog.print(s + ":");
int numChoices = mdp.getNumChoices(s);
for (int i = 0; i < numChoices; i++) {
if (yaVars[s][i].get(GRB.DoubleAttr.X) > 0) {
mainLog.print(" " + mdp.getAction(s, i));
}
}
mainLog.println();
}

// Clean up
model.dispose();
env.dispose();
} catch (GRBException e) {
throw new PrismException("Error solving LP: " +e.getMessage());
}


// Return results
ModelCheckerResult res = new ModelCheckerResult();
// res.accuracy = AccuracyFactory.boundedNumericalIterations();
res.soln = soln;
// res.timeTaken = timer / 1000.0;
return res;
}

/**
* Restrict a (memoryless) strategy for an MDP, stored as an integer array of choice indices,
* to the states of the MDP that are reachable under that strategy.
51 changes: 50 additions & 1 deletion prism/src/explicit/ProbModelChecker.java
Original file line number Diff line number Diff line change
@@ -49,6 +49,7 @@
import parser.type.TypePathDouble;
import prism.AccuracyFactory;
import prism.IntegerBound;
import prism.ModelType;
import prism.OpRelOpBound;
import prism.Prism;
import prism.PrismComponent;
@@ -565,8 +566,12 @@ protected StateValues checkExpressionStrategy(Model model, ExpressionStrategy ex
}
Expression exprSub = exprs.get(0);
// Pass onto relevant method:
// Multi-strategy
if ("multi".equals(expr.getModifier())) {
return checkExpressionMultiStrategy(model, expr, forAll, coalition, statesOfInterest);
}
// P operator
if (exprSub instanceof ExpressionProb) {
else if (exprSub instanceof ExpressionProb) {
return checkExpressionProb(model, (ExpressionProb) exprSub, forAll, coalition, statesOfInterest);
}
// R operator
@@ -579,6 +584,50 @@ else if (exprSub instanceof ExpressionReward) {
}
}

/**
* Model check a <<>> operator requesting a multi-strategy
* @param statesOfInterest the states of interest, see checkExpression()
*/
protected StateValues checkExpressionMultiStrategy(Model model, ExpressionStrategy expr, boolean forAll, Coalition coalition, BitSet statesOfInterest) throws PrismException
{
// Only support "exists" (<<>>) currently
if (forAll) {
throw new PrismException("Multi-strategies not supported for " + expr.getOperatorString());
}
// Only support R[C] currently
Expression exprSub = expr.getOperands().get(0);
if (!(exprSub instanceof ExpressionReward)) {
throw new PrismException("Multi-strategy synthesis only supports R[C] properties currently");
}
ExpressionReward exprRew = (ExpressionReward) exprSub;
if (!(exprRew.getExpression() instanceof ExpressionTemporal)) {
throw new PrismException("Multi-strategy synthesis only supports R[C] properties currently");
}
ExpressionTemporal exprTemp = (ExpressionTemporal) exprRew.getExpression();
if (!(exprTemp.getOperator() == ExpressionTemporal.R_C) && !exprTemp.hasBounds()) {
throw new PrismException("Multi-strategy synthesis only supports R[C] properties currently");
}

// Get info from R operator
OpRelOpBound opInfo = exprRew.getRelopBoundInfo(constantValues);
MinMax minMax = opInfo.getMinMax(model.getModelType(), false);

// Build rewards
int r = exprRew.getRewardStructIndexByIndexObject(rewardGen, constantValues);
mainLog.println("Building reward structure...");
Rewards modelRewards = constructRewards(model, r);

// Only support MDPs
if (model.getModelType() != ModelType.MDP) {
throw new PrismNotSupportedException("Multi-strategy synthesis not supported for " + model.getModelType() + "s");
}

ModelCheckerResult res = ((MDPModelChecker) this).computeMultiStrategy((MDP) model, (MDPRewards) modelRewards, opInfo.getBound());

result.setStrategy(res.strat);
return StateValues.createFromDoubleArrayResult(res, model);
}

/**
* Model check a P operator expression and return the values for the statesOfInterest.
* @param statesOfInterest the states of interest, see checkExpression()
Loading