A SAT characterization of boolean-program correctness K. Rustan M. Leino Microsoft Research, Redmond, WA 14 Nov 2002 IFIP WG 2.4 meeting, Schloβ Dagstuhl, Germany
Motivation This program has performed an illegal operation. If the problem persists, please contact the vendor.
APIERR DevGetStatus(LPSTATUS status) { unsigned int t; struct _Info info; for (i = 0; i result = t | 0x2055; return APIERR_Success; } APIERR W_GetStatus(struct _Info * pinfo) { APIERR err; bool fChanged = TRUE; do { err = DevRegisterColumn(pinfo, TRUE); if (err != APIERR_Success) { return err; } if (pinfo->huwMagi < 10) { fChanged = FALSE; } else { err = DevReleaseColumn(pinfo); } } while (fChanged); return DevReleaseColumn(pinfo); } APIERR DevGetStatus(LPSTATUS status) { unsigned int t; struct _Info info; for (i = 0; i result = t | 0x2055; return APIERR_Success; } APIERR W_GetStatus(struct _Info * pinfo) { APIERR err; bool fChanged = TRUE; do { err = DevRegisterColumn(pinfo, TRUE); if (err != APIERR_Success) { return err; } if (pinfo->huwMagi < 10) { fChanged = FALSE; } else { err = DevReleaseColumn(pinfo); } } while (fChanged); return DevReleaseColumn(pinfo); } The SLAM toolkit Tom Ball, Sriram Rajamani, et al., Microsoft Research Device driver (C program) Abstraction (boolean program) y := false; x := true; x := x y; assert x; Properties of interest: x resource is locked y t > 0 Predicate abstraction
APIERR DevGetStatus(LPSTATUS status) { unsigned int t; struct _Info info; for (i = 0; i result = t | 0x2055; return APIERR_Success; } APIERR W_GetStatus(struct _Info * pinfo) { APIERR err; bool fChanged = TRUE; do { err = DevRegisterColumn(pinfo, TRUE); if (err != APIERR_Success) { return err; } if (pinfo->huwMagi < 10) { fChanged = FALSE; } else { err = DevReleaseColumn(pinfo); } } while (fChanged); return DevReleaseColumn(pinfo); } APIERR DevGetStatus(LPSTATUS status) { unsigned int t; struct _Info info; for (i = 0; i result = t | 0x2055; return APIERR_Success; } APIERR W_GetStatus(struct _Info * pinfo) { APIERR err; bool fChanged = TRUE; do { err = DevRegisterColumn(pinfo, TRUE); if (err != APIERR_Success) { return err; } if (pinfo->huwMagi < 10) { fChanged = FALSE; } else { err = DevReleaseColumn(pinfo); } } while (fChanged); return DevReleaseColumn(pinfo); } The SLAM toolkit Tom Ball, Sriram Rajamani, et al., Microsoft Research Device driver (C program) Abstraction (boolean program) y := false; x := true; x := x y; assert x; assert x
APIERR DevGetStatus(LPSTATUS status) { unsigned int t; struct _Info info; for (i = 0; i result = t | 0x2055; return APIERR_Success; } APIERR W_GetStatus(struct _Info * pinfo) { APIERR err; bool fChanged = TRUE; do { err = DevRegisterColumn(pinfo, TRUE); if (err != APIERR_Success) { return err; } if (pinfo->huwMagi < 10) { fChanged = FALSE; } else { err = DevReleaseColumn(pinfo); } } while (fChanged); return DevReleaseColumn(pinfo); } APIERR DevGetStatus(LPSTATUS status) { unsigned int t; struct _Info info; for (i = 0; i result = t | 0x2055; return APIERR_Success; } APIERR W_GetStatus(struct _Info * pinfo) { APIERR err; bool fChanged = TRUE; do { err = DevRegisterColumn(pinfo, TRUE); if (err != APIERR_Success) { return err; } if (pinfo->huwMagi < 10) { fChanged = FALSE; } else { err = DevReleaseColumn(pinfo); } } while (fChanged); return DevReleaseColumn(pinfo); } The SLAM toolkit Tom Ball, Sriram Rajamani, et al., Microsoft Research Device driver (C program) Abstraction (boolean program) y := false; x := true; x := x y; assert x; assert x real error
APIERR DevGetStatus(LPSTATUS status) { unsigned int t; struct _Info info; for (i = 0; i result = t | 0x2055; return APIERR_Success; } APIERR W_GetStatus(struct _Info * pinfo) { APIERR err; bool fChanged = TRUE; do { err = DevRegisterColumn(pinfo, TRUE); if (err != APIERR_Success) { return err; } if (pinfo->huwMagi < 10) { fChanged = FALSE; } else { err = DevReleaseColumn(pinfo); } } while (fChanged); return DevReleaseColumn(pinfo); } APIERR DevGetStatus(LPSTATUS status) { unsigned int t; struct _Info info; for (i = 0; i result = t | 0x2055; return APIERR_Success; } APIERR W_GetStatus(struct _Info * pinfo) { APIERR err; bool fChanged = TRUE; do { err = DevRegisterColumn(pinfo, TRUE); if (err != APIERR_Success) { return err; } if (pinfo->huwMagi < 10) { fChanged = FALSE; } else { err = DevReleaseColumn(pinfo); } } while (fChanged); return DevReleaseColumn(pinfo); } The SLAM toolkit Tom Ball, Sriram Rajamani, et al., Microsoft Research Device driver (C program) Abstraction (boolean program) y := false; x := true; x := x y; assert x; infeasible path
APIERR DevGetStatus(LPSTATUS status) { unsigned int t; struct _Info info; for (i = 0; i result = t | 0x2055; return APIERR_Success; } APIERR W_GetStatus(struct _Info * pinfo) { APIERR err; bool fChanged = TRUE; do { err = DevRegisterColumn(pinfo, TRUE); if (err != APIERR_Success) { return err; } if (pinfo->huwMagi < 10) { fChanged = FALSE; } else { err = DevReleaseColumn(pinfo); } } while (fChanged); return DevReleaseColumn(pinfo); } APIERR DevGetStatus(LPSTATUS status) { unsigned int t; struct _Info info; for (i = 0; i result = t | 0x2055; return APIERR_Success; } APIERR W_GetStatus(struct _Info * pinfo) { APIERR err; bool fChanged = TRUE; do { err = DevRegisterColumn(pinfo, TRUE); if (err != APIERR_Success) { return err; } if (pinfo->huwMagi < 10) { fChanged = FALSE; } else { err = DevReleaseColumn(pinfo); } } while (fChanged); return DevReleaseColumn(pinfo); } The SLAM toolkit Tom Ball, Sriram Rajamani, et al., Microsoft Research Device driver (C program) Abstraction (boolean program) y := false; x := true; x := x y; assert x; Predicate abstraction Properties of interest: x resource is locked y t > 0 z p NULL z := true; if (z) … Predicate abstraction
Boolean programs Prog ::= var Id* ; Block* Prog ::= var Id* ; Block* Block ::= LabelId : Stmt* goto LabelId* Block ::= LabelId : Stmt* goto LabelId* Stmt ::=Id := Expr |assert Expr |assume Expr Stmt ::=Id := Expr |assert Expr |assume Expr Expr ::= false | true | Id | Expr |Expr Expr | Expr Expr Expr ::= false | true | Id | Expr |Expr Expr | Expr Expr
Example var x, y; A:x := true; goto B B:assert x; x := x y; goto B or C C: var x, y; A:x := true; goto B B:assert x; x := x y; goto B or C C: A:x := true B:assert x; x := x y C:
Semantics: Weakest preconditions For any statement S and postcondition Q, wp(S,Q) characterizes those pre-states from which execution is guaranteed: For any statement S and postcondition Q, wp(S,Q) characterizes those pre-states from which execution is guaranteed: not to go wrong, and not to go wrong, and either the execution doesnt terminate or it terminates in a state satisfying Q either the execution doesnt terminate or it terminates in a state satisfying Q S Q wp(S,Q)
Semantics: Weakest preconditions wp(x := E, Q) = Q[x:=E] wp(assert E, Q) = E Q wp(assume E, Q) = E Q wp(x := E, Q) = Q[x:=E] wp(assert E, Q) = E Q wp(assume E, Q) = E Q wp(skip, Q) = Q wp(S;T, Q) = wp(S, wp(T,Q)) wp(skip, Q) = Q wp(S;T, Q) = wp(S, wp(T,Q)) wp(goto labels, Q) = ( L labels :: wp(L,Q)) wp(goto labels, Q) = ( L labels :: wp(L,Q))
For any block: var w; … L: S; goto labels … introduce a boolean function L, such that: For any block: var w; … L: S; goto labels … introduce a boolean function L, such that: What I write: L(w) =wp(S, ( G labels :: G(w))) Semantics of blocks ( w ::L(w) =wp(S, ( G labels :: G(w))) ) L = (λw ::wp(S, ( G labels :: G(w))) ) What I really mean: or equivalently:
Example A(x,y) = wp(x := true, B(x,y)) B(x,y) = wp(assert x; x := x y, B(x,y) C(x,y)) C(x,y) = wp(skip, true) A(x,y) = wp(x := true, B(x,y)) B(x,y) = wp(assert x; x := x y, B(x,y) C(x,y)) C(x,y) = wp(skip, true) A:x := true B:assert x; x := x y C: A(x,y) = B(true,y) B(x,y) = x B(x y, y) C(x y, y) C(x,y) = true A(x,y) = B(true,y) B(x,y) = x B(x y, y) C(x y, y) C(x,y) = true
Equations with multiple solutions The unknowns Solution 0: A(x,y) = false B(x,y) = false C(x,y) = true Solution 1: A(x,y) = y B(x,y) = x y C(x,y) = true We want the weakest solution A, B, C :A(x,y) = B(true, y) B(x,y) = x B(x y, y) C(x y, y) C(x,y) = true
Weakest solution A,B,C :A(x,y) = B(true, y) B(x,y) = x B(x y, y) C(x y, y) C(x,y) = true
Weakest solution F(A,B,C)G(A,B,C)H(A,B,C) A,B,C :A = (λx,y :: B(true, y)) B = (λx,y :: x B(x y, y) C(x y, y)) C = (λx,y :: true)
F(A,B,C)G(A,B,C)H(A,B,C) A,B,C :A = F(A,B,C) B = G(A,B,C) C = H(A,B,C) where F = (λx,y :: B(true, y)) G = (λx,y :: x B(x y, y) C(x y, y)) H = (λx,y :: true) (λx,y :: x B(x y, y) C(x y, y)) (λx,y :: true) Weakest solution/fixpoint FGH Weakest solution of A,B,C (λx,y :: B(true, y)) Weakest fixpoint of F,G,H
Program correctness A program with variables w and start block A is correct iff: ( w :: A(w)) A program with variables w and start block A is correct iff: ( w :: A(w)) That is, the program has an error iff: A(w) is satisfiable. That is, the program has an error iff: A(w) is satisfiable. boolean equations, satisfiability functions, weakest solutions SAT
Equations over a closed set of terms Using the definitions: A(x,y) = B(true, y) B(x,y) = x B(x y, y) C(x y, y) C(x,y) = true Using the definitions: A(x,y) = B(true, y) B(x,y) = x B(x y, y) C(x y, y) C(x,y) = true We produce: We produce: A(x,y) = B(true, y) A(x,y) = B(true, y) B(true, y) = true B(true y, y) C(true y, y) B(true, y) = true B(true y, y) C(true y, y) B(true y, y) = true y B(true y y, y) C(true y y, y) B(true y, y) = true y B(true y y, y) C(true y y, y) C(true y, y) = true C(true y, y) = true
Point functions A function: f(w) = f(false) f(w) can be expressed as two point functions : f false = f false f false f true = f false f true A function: f(w) = f(false) f(w) can be expressed as two point functions : f false = f false f false f true = f false f true
Point-function equations A set of equations: f :f(w) = f(false) f(w) can be expressed as: f false, f true :f false = f false f false f true = f false f true A set of equations: f :f(w) = f(false) f(w) can be expressed as: f false, f true :f false = f false f false f true = f false f true
A fixpoint theorem Given a function F on a complete lattice, if F is continuous, then its weakest fixpoint exists and is given by: F k ( T ) for some natural number k. Given a function F on a complete lattice, if F is continuous, then its weakest fixpoint exists and is given by: F k ( T ) for some natural number k. k is the fixpoint depth of F k is the fixpoint depth of F fixpoint depth lattice height fixpoint depth lattice height T F k ( T ) lattice height
Computing fixpoints: outward T { apply F } F(T) F(T) { apply F } F(F(T)) F(F(T)) { apply F } F(F(F(T))) F(F(F(T))) { apply F } F(F(F(F(T)))) F(F(F(F(T)))) equal to each otherweakest fixpoint of F Suppose fixpoint depth of F is 3 produced in previous step
Computing fixpoints: inward { replace with F( ) } F( ) F( ) { replace with F( ) } F(F( )) F(F( )) { replace with F( ) } F(F(F( ))) F(F(F( ))) { replace with T } F(F(F(T))) F(F(F(T))) weakest fixpoint of F no need for further applications of F Suppose fixpoint depth of F is 3 produced in previous step
Multiple unknowns a,b :a = F(a,b) b = G(a,b) a,b :a = F(a,b) b = G(a,b) Suppose fixpoint depths of F,G are 2,1 Suppose fixpoint depths of F,G are 2,1 Weakest solution for a is: Weakest solution for a is: a 00 a 00 F(a 10, b 10 ) F(a 10, b 10 ) F(F(a 20, b 20 ), G(a 11, b 11 )) F(F(a 20, b 20 ), G(a 11, b 11 )) F(F(T, G(a 21, b 21 )), G(F(a 21, b 21 ), T )) F(F(T, G(a 21, b 21 )), G(F(a 21, b 21 ), T )) F(F(T, G(T, T )), G(F(T, T ), T )) F(F(T, G(T, T )), G(F(T, T ), T )) Number of enclosing applications of G Number of enclosing applications of F
Special instance Lattice of booleans has height 1 Lattice of booleans has height 1 If F returns a boolean, then Fs fixpoint depth is at most 1 If F returns a boolean, then Fs fixpoint depth is at most 1 and so Fs weakest fixpoint is F(T) and so Fs weakest fixpoint is F(T)
Back to our problem Using the definitions: A(x,y) = B(true, y) B(x,y) = x B(x y, y) C(x y, y) C(x,y) = true Using the definitions: A(x,y) = B(true, y) B(x,y) = x B(x y, y) C(x y, y) C(x,y) = true We produce: We produce: A x,y = B true,y A x,y = B true,y B true,y = true B true y,y C true y,y B true,y = true B true y,y C true y,y B y,y = y B y y,y C y y,y B y,y = y B y y,y C y y,y true
Using the definitions: A(x,y) = B(true, y) B(x,y) = x B(x y, y) C(x y, y) C(x,y) = true Using the definitions: A(x,y) = B(true, y) B(x,y) = x B(x y, y) C(x y, y) C(x,y) = true We produce: We produce: A x,y = B true,y A x,y = B true,y B true,y = true B true y,y C true y,y B true,y = true B true y,y C true y,y B y,y = y true C y y,y B y,y = y true C y y,y C y,y = true C y,y = true Back to our problem
Leibniz constraints Being a function means, for any f: ( w,w :: (w=w) (f(w)=f(w))) Leibnizs rule Being a function means, for any f: ( w,w :: (w=w) (f(w)=f(w))) Leibnizs rule So when we have: B true,y = … B y,y = … we also generate the following Leibniz constraint : (true=y) (y=y) (B true,y = B y,y ) So when we have: B true,y = … B y,y = … we also generate the following Leibniz constraint : (true=y) (y=y) (B true,y = B y,y )
SAT formula From the closed set of terms: A x,y = B true,y B true,y = true B true y,y C true y,y B y,y = y true C y y,y C y,y = true From the closed set of terms: A x,y = B true,y B true,y = true B true y,y C true y,y B y,y = y true C y y,y C y,y = true we produce the following SAT equations: a a = b b = true b c b = y true c c = true y (b=b) Leibniz constraint negated start symbol: A(x,y) abbcabbc
Summary Boolean program, whose semantics is defined by weakest solution of weakest-precondition equations Boolean program, whose semantics is defined by weakest solution of weakest-precondition equations Translate to SAT problem: Translate to SAT problem: Instantiate equations to get a closed set of terms Instantiate equations to get a closed set of terms Replace nested recursive instantiations by true Replace nested recursive instantiations by true Conjoin negated start symbol and Leibniz constraints Conjoin negated start symbol and Leibniz constraints Write point functions as propositional variables Write point functions as propositional variables Check for satisfiability Check for satisfiability Performance? Heuristics? Performance? Heuristics? Are Leibniz constraints really needed? Are Leibniz constraints really needed? Better encoding of procedures? Better encoding of procedures?
Complexity With N blocks and K variables: With N blocks and K variables: each boolean function has K arguments, each a boolean expression each boolean function has K arguments, each a boolean expression there are 2 2 K different boolean expressions there are 2 2 K different boolean expressions So, there are N·2 K · 2 K different terms So, there are N·2 K · 2 K different terms Suppose each of the 2 K initial states were considered individually: Suppose each of the 2 K initial states were considered individually: each boolean-function argument can then be folded into one of the 2 boolean constants each boolean-function argument can then be folded into one of the 2 boolean constants Then, there are only 2 K ·N·2 K different terms Then, there are only 2 K ·N·2 K different terms E x p l i c i t - s t a t e c h e c k i n g : S y m b o l i c c h e c k i n g :
Symbolic vs. explicit-state checking The following equality (and others?) can be exploited heuristically to try to get a good balance: The following equality (and others?) can be exploited heuristically to try to get a good balance: f(P, Q, R) = (P f(true, Q, R)) ( P f(false, Q, R))
Summary Boolean program, whose semantics is defined by weakest solution of weakest-precondition equations Boolean program, whose semantics is defined by weakest solution of weakest-precondition equations Translate to SAT problem: Translate to SAT problem: Instantiate equations to get a closed set of terms Instantiate equations to get a closed set of terms Replace nested recursive instantiations by true Replace nested recursive instantiations by true Conjoin negated start symbol and Leibniz constraints Conjoin negated start symbol and Leibniz constraints Write point functions as propositional variables Write point functions as propositional variables Check for satisfiability Check for satisfiability Performance heuristics: symbolic vs. explicit-state Performance heuristics: symbolic vs. explicit-state Other heuristics? Other heuristics? Are Leibniz constraints really needed? Are Leibniz constraints really needed? Better encoding of procedures? Better encoding of procedures?