Presentation is loading. Please wait.

Presentation is loading. Please wait.

Program Analysis and Verification

Similar presentations


Presentation on theme: "Program Analysis and Verification"— Presentation transcript:

1 Program Analysis and Verification 0368-4479
Noam Rinetzky Lecture 10: Shape Analysis Slides credit: Roman Manevich, Mooly Sagiv, Eran Yahav

2 Shape Analysis Automatically verify properties of programs manipulating dynamically allocated storage Identify all possible shapes (layout) of the heap

3 Analyzing Singly Linked Lists

4 Limitations of pointer analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } // Singly-linked list // data type. class SLL { int data; public SLL n; // next cell SLL(Object data) { this.data = data; this.n = null; } }

5 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; }

6 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null t

7 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null t n L1

8 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null t n L1 tmp

9 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null t n L1 n tmp L2

10 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null t n L1 n tmp L2

11 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null t n L1 n tmp L2

12 Flow&Field-sensitive Analysis
h // Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } null t n L1 n tmp != null tmp L2 h null t n L1 tmp == null tmp

13 Flow&Field-sensitive Analysis
h // Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } null t n L1 n tmp L2 h null t n L1 tmp

14 Flow&Field-sensitive Analysis
h // Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } null t n L1 n tmp L2 tmp == null ?

15 Flow&Field-sensitive Analysis
h // Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } null t n L1 n tmp L2

16 Flow&Field-sensitive Analysis
h // Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } null t n L1 n tmp L2 Possible null dereference!

17 Flow&Field-sensitive Analysis
h // Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } null t n L1 n tmp L2 Fixed-point for first loop

18 Flow&Field-sensitive Analysis
h // Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } null t n L1 n tmp L2

19 Flow&Field-sensitive Analysis
h // Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } null t n L1 n tmp L2

20 Flow&Field-sensitive Analysis
h // Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } null t n L1 n tmp L2 Possible null dereference!

21 What was the problem? Pointer analysis abstract all objects allocated at same program location into one summary object. However, objects allocated at same memory location may behave very differently E.g., object is first/last one in the list Assign extra predicates to distinguish between objects with different roles Number of objects represented by summary object 1 – does not allow strong updates Distinguish between concrete objects (#=1) and abstract objects (#1) Join operator very coarse – abstracts away important distinctions (tmp=null/tmp!=null) Apply disjunctive completion

22 Improved solution Pointer analysis abstract all objects allocated at same program location into one summary object. However, objects allocated at same memory location may behave very differently E.g., object is first/last one in the list Add extra instrumentation predicates to distinguish between objects with different roles Number of objects represented by summary object 1 – does not allow strong updates Distinguish between concrete objects (#=1) and abstract objects (#1) Join operator very coarse – abstracts away important distinctions (tmp=null/tmp!=null) Apply disjunctive completion

23 Adding properties to objects
Let’s first drop allocation site information and instead… Define a unary predicate x(v) for each pointer variable x meaning x points to x Predicate holds for at most one node Merge together nodes with same sets of predicates

24 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null t

25 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null t n {h, t}

26 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null t n {h, t} tmp

27 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null t n {h, t} tmp n {tmp} No null dereference

28 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null t n {h, t} tmp n {tmp}

29 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null t n {t} tmp n {h, tmp}

30 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null n t {t} n tmp {h, tmp} h null t n {h, t} tmp

31 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null n t {t} n tmp {h, tmp} h null t n {h, t} tmp Do we need to analyze this shape graph again?

32 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null n t {t} n tmp {h, tmp} h null t n {h, t} tmp

33 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null n t {t} n tmp { h} n { tmp} No null dereference

34 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null n t {t} n tmp { h} n { tmp}

35 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null n t {t} n tmp { } n { h, tmp}

36 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null n t {t} n tmp { } n { h, tmp}

37 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null n t {t} n tmp { } n { h} n { tmp} No null dereference

38 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null n t {t} n tmp { } n { h} n { tmp}

39 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null n t {t} n tmp { } n { } n {h, tmp}

40 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null n t {t} n tmp { } n n {h, tmp} How many objects does it represent? Summarization

41 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null n t {t} n tmp { } n n {h} n { tmp} No null dereference

42 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null n t {t} n tmp { } n n {h} n { tmp}

43 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null n t {t} n tmp { } n n {h, tmp} Fixed-point for first loop

44 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null n t {t} n tmp { } n n {h, tmp}

45 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null n t {t} n tmp { } n n {h, tmp}

46 Flow&Field-sensitive Analysis
// Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null n t {t} n tmp { tmp?} n n { tmp}

47 Best (induced) transformer
f#(a)= (f((a))) C A 4 f f# 3 1 2 Problem:  incomputable directly

48 Best transformer for tmp=tmp.n
null h t {t} tmp n { } {h, tmp} null h n {t} t n { } tmp n n {h, tmp} null h t {t} tmp n { } {h, tmp}

49 Best transformer for tmp=tmp.n: 
null h t {t} tmp n { } {h, tmp} null h n {t} t n { } tmp n n {h, tmp} null h t {t} tmp n { } {h, tmp} h null n t {t} n { } { } { } n tmp {h, tmp}

50 Best transformer for tmp=tmp.n
h null null n h t {t} n {t} n t {tmp } n n { } tmp {h} tmp n n {h, tmp} h h null null n n t {t} t {t} n { } n { } { } { } tmp {tmp } {tmp } tmp n n {h} {h}

51 Best transformer for tmp=tmp.n: 
h null null n h t {t} n {t} n t {tmp } n n { } tmp {h} tmp n n {h, tmp} h h null null n n t {t} t {t} n n { } { } tmp {tmp } {tmp } tmp n n {h} {h}

52 Handling updates on summary nodes
Transformers accessing only concrete nodes are easy Transformers accessing summary nodes are complicated Can’t concretize summary nodes – represents potentially unbounded number of concrete nodes We need to split into cases by “materializing” concrete nodes from summary node Introduce a new temporary predicate tmp.n Partial concretization

53 Transformer for tmp=tmp.n: ’
null h n {t} ’ t n { } tmp n n {h, tmp} Case 1: Exactly 1 object. Case 2: >1 objects.

54 Transformer for tmp=tmp.n: ’
null h t {t} tmp n {tmp.n } {h, tmp} null h n {t} ’ t n { } tmp n n {h, tmp} Summary node h null n t {t} spaghetti n { } tmp.n is a concrete node {tmp.n } n tmp {h, tmp}

55 Transformer tmp=tmp.n
null h t {t} tmp n {tmp, tmp.n } {h} null h n {t} t n { } tmp n n {h, tmp} h null n t {t} n { } tmp {tmp, tmp.n } n {h}

56 Transformer for tmp=tmp.n: 
null h t {t} tmp n {tmp} {h} // Build a list SLL h=null, t = null; L1: h=t= new SLL(-1); SLL tmp = null; while (…) { int data = getData(…); L2: tmp = new SLL(data); tmp.n = h; h = tmp; } // Process elements tmp = h; while (tmp != t) { assert tmp != null; tmp.data += 1; tmp = tmp.n; } h null n t {t} n { } tmp {tmp} n {h}

57 Transformer via partial-concretization
f#(a)= ’(f#’(’(a))) C A’ A ’ 6 4 f f#’ f# 3 5 1 2 ’

58 Recap Adding more properties to nodes refines abstraction
Can add temporary properties for partial concretization Materialize concrete nodes from summary nodes Allows turning weak updates into strong ones Focus operation in shape-analysis lingo Not trivial in general and requires more semantic reduction to clean up impossible edges General algorithms available via 3-valued logic and implemented in TVLA system

59 3-Value logic based shape analysis

60 Sequential Stack Want to Verify No Null Dereference
void push (int v) { Node x = malloc(sizeof(Node)); x->d = v; x->n = Top; Top = x; } int pop() { if (Top == NULL) return EMPTY; Node *s = Top->n; int r = Top->d; Top = s; return r; } Want to Verify No Null Dereference Underlying list remains acyclic after each operation

61 Shape Analysis via 3-valued Logic
1) Abstraction 3-valued logical structure canonical abstraction 2) Transformers via logical formulae soundness by construction embedding theorem, [SRW02]

62 Concrete State represent a concrete state as a two-valued logical structure Individuals = heap allocated objects Unary predicates = object properties Binary predicates = relations parametric vocabulary n n Top u1 u2 u3 (storeless, no heap addresses)

63 Concrete State S = <U,  > over a vocabulary P U – universe
 - interpretation, mapping each predicate from p to its truth value in S Top n u1 u2 u3 U = { u1, u2, u3} P = { Top, n } (n)(u1,u2) = 1, (n)(u1,u3)=0, (n)(u2,u1)=0,… (Top)(u1)=1, (Top)(u2)=0, (Top)(u3)=0

64 Formulae for Observing Properties
void push (int v) { Node x = malloc(sizeof(Node)); xd = v; xn = Top; Top = x; } Top u1 u2 u3 Top != null w: x(w) w:Top(w) 1 w: x(w) No node precedes Top v1,v2: n(v1, v2) Top(v2) 1 No Cycles 1 v1,v2: n(v1, v2)  n*(v2, v1) v1,v2: n(v1, v2)  n*(v2, v1) v1,v2: n(v1, v2) Top(v2)

65 Concrete Interpretation Rules
Statement Update formula x =NULL x’(v)= 0 x= malloc() x’(v) = IsNew(v) x=y x’(v)= y(v) x=y next x’(v)= w: y(w)  n(w, v) x next=y n’(v, w) = (x(v) n(v, w))  (x(v)  y(w))

66 Example: s = Topn Top Top s u1 u2 u3 u1 u2 u3
s’(v) = v1: Top(v1)  n(v1,v) u1 u2 u3 Top u1 1 u2 u3 n u1 u2 U3 1 u3 Top u1 1 u2 u3 n u1 u2 U3 1 u3 s u1 u2 u3 s u1 u2 1 u3

67 Collecting Semantics  { st(w)(S) | S  CSS[w] } 
if v = entry { <,> }  { st(w)(S) | S  CSS[w] }  (w,v)  E(G), w  Assignments(G)  { S | S  CSS[w] }  (w,v)  E(G), w  Skip(G) CSS [v] =  { S | S  CSS[w] and S  cond(w)}  othrewise (w,v)  True-Branches(G)  { S | S  CSS[w] and S  cond(w)} (w,v)  False-Branches(G)

68 Collecting Semantics At every program point – a potentially infinite set of two-valued logical structures Representing (at least) all possible heaps that can arise at the program point Next step: find a bounded abstract representation

69 3-Valued Logic 1 = true 0 = false 1/2 = unknown
A join semi-lattice, 0  1 = 1/2 1/2 information order 1 logical order

70 3-Valued Logical Structures
A set of individuals (nodes) U Relation meaning Interpretation of relation symbols in P p0()  {0,1, 1/2} p1(v)  {0,1, 1/2} p2(u,v)  {0,1, 1/2} A join semi-lattice: 0  1 = 1/2

71 Boolean Connectives [Kleene]

72 Property Space 3-struct[P] = the set of 3-valued logical structures over a vocabulary (set of predicates) P Abstract domain (3-Struct[P])  is 

73 Embedding Order Given two structures S = <U, >, S’ = <U’, ’> and an onto function f : U  U’ mapping individuals in U to individuals in U’ We say that f embeds S in S’ (denoted by S  S’) if for every predicate symbol p  P of arity k: u1, …, uk  U, (p)(u1, ..., uk)  ’(p)(f(u1), ..., f (uk)) and for all u’  U’ ( | { u | f (u) = u’ } | > 1)  ’(sm)(u’) We say that S can be embedded in S’ (denoted by S  S’) if there exists a function f such that S f S’

74 Tight Embedding S’ = <U’, ’> is a tight embedding of S=< U,  > with respect to a function f if: S’ does not lose unnecessary information ’(u’1,…, u’k) = {(u1 ..., uk) | f(u1)=u’1,..., f(uk)=u’k} One way to get tight embedding is canonical abstraction

75 Canonical Abstraction
Top u1 u1 u2 u2 u3 u3 [Sagiv, Reps, Wilhelm, TOPLAS02]

76 Canonical Abstraction
Top u1 u1 u2 u2 u3 u3 Top [Sagiv, Reps, Wilhelm, TOPLAS02]

77 Canonical Abstraction
Top u1 u1 u2 u2 u3 u3 Top

78 Canonical Abstraction
Top u1 u1 u2 u2 u3 u3 Top

79 Canonical Abstraction
Top u1 u1 u2 u2 u3 u3 Top

80 Canonical Abstraction
Top u1 u1 u2 u2 u3 u3 Top

81 Canonical Abstraction ()
Merge all nodes with the same unary predicate values into a single summary node Join predicate values  ’(u’1 ,..., u’k) =  { (u1 ,..., uk) | f(u1)=u’1 ,..., f(uk)=u’k } Converts a state of arbitrary size into a 3-valued abstract state of bounded size (C) =  { (c) | c  C }

82 Information Loss Top n Canonical abstraction Top Top n Top Top Top n

83 Instrumentation Predicates
Record additional derived information via predicates rx(v) = v1: x(v1)  n*(v1,v) c(v) = v1: n(v1, v)  n*(v, v1) Canonical abstraction Top n Top c Top n c Top

84 Embedding Theorem: Conservatively Observing Properties
Top rTop rTop No Cycles No cycles (derived) 1/2 1 v1,v2: n(v1, v2)  n*(v2, v1) v:c(v)

85 Operational Semantics
void push (int v) { Node x = malloc(sizeof(Node)); x->d = v; x->n = Top; Top = x; } Top n u1 u2 u3 int pop() { if (Top == NULL) return EMPTY; Node *s = Top->n; int r = Top->d; Top = s; return r; } s’(v) = v1: Top(v1)  n(v1,v) s = Top->n Top n u1 u2 u3 s

86 ? Abstract Semantics s = Topn s’(v) = v1: Top(v1)  n(v1,v) Top Top
rTop s Top rTop rTop s’(v) = v1: Top(v1)  n(v1,v) s = Top->n

87 Best Transformer (s = Topn)
rTop Top rTop Concrete Semantics Top s rTop Top rTop s’(v) = v1: Top(v1)  n(v1,v) . . Canonical Abstraction ? Top s rTop Abstract Semantics Top s rTop Top rTop rTop

88 Semantic Reduction Improve the precision of the analysis by recovering properties of the program semantics A Galois connection (C, , , A) An operation op:AA is a semantic reduction when lL2 op(l)l and (op(l)) = (l) l op C A

89 The Focus Operation Focus: Formula((3-Struct)  (3-Struct))
Generalizes materialization For every formula  Focus()(X) yields structure in which  evaluates to a definite values in all assignments Only maximal in terms of embedding Focus() is a semantic reduction But Focus()(X) may be undefined for some X

90 Partial Concretization Based on Transformer (s=Topn)
rTop Top rTop Abstract Semantics Top rTop Top s rTop s’(v) = v1: Top(v1)  n(v1,v) Canonical Abstraction Focus (Topn) Partial Concretization u: top(u) n(u, v) Top s rTop Abstract Semantics Top rTop Top rTop s

91 Partial Concretization
Locally refine the abstract domain per statement Soundness is immediate Employed in other shape analysis algorithms [Distefano et.al., TACAS’06, Evan et.al., SAS’07, POPL’08]

92 The Coercion Principle
Another Semantic Reduction Can be applied after Focus or after Update or both Increase precision by exploiting some structural properties possessed by all stores (Global invariants) Structural properties captured by constraints Apply a constraint solver

93 Apply Constraint Solver
Top rTop Top rTop x Top rTop x x rx rx,ry n y x rx rx,ry n y

94 Sources of Constraints
Properties of the operational semantics Domain specific knowledge Instrumentation predicates User supplied

95 Example Constraints x(v1) x(v2)eq(v1, v2)
n(v, v1) n(v,v2)eq(v1, v2) n(v1, v) n(v2,v)eq(v1, v2)is(v) n*(v1, v2)t[n](v1, v2)

96 Abstract Transformers: Summary
Kleene evaluation yields sound solution Focus is a statement-specific partial concretization Coerce applies global constraints

97 Abstract Semantics  { t_embed(coerce(st(w)3(focusF(w)(SS[w] )))) 
if v = entry { <,> }  { t_embed(coerce(st(w)3(focusF(w)(SS[w] ))))  (w,v)  E(G), w  Assignments(G)  { S | S  SS[w] }  othrewise (w,v)  E(G), w  Skip(G) SS [v] =  { t_embed(S) | S  coerce(st(w)3(focusF(w)(SS[w] ))) and S 3 cond(w)}  (w,v)  True-Branches(G)  { t_embed(S) | S  coerce(st(w)3(focusF(w)(SS[w] ))) and S 3 cond(w)}  (w,v)  False-Branches(G)

98 Recap Abstraction Transformers canonical abstraction
recording derived information Transformers partial concretization (focus) constraint solver (coerce) sound information extraction

99 Stack Push … v: x(v) v: x(v) v:c(v) v1,v2: n(v1, v2) Top(v2)
emp Top void push (int v) { Node x = alloc(sizeof(Node)); xd = v; xn = Top; Top = x; } x x Top v: x(v) v: x(v) x x Top x Top x Top v1,v2: n(v1, v2) Top(v2) Top v:c(v) Top

100 What about procedures?

101 A Semantics for Procedure Local Heaps and its Abstractions
Noam Rinetzky Tel Aviv University Jörg Bauer Universität des Saarlandes Thomas Reps University of Wisconsin Mooly Sagiv Tel Aviv University Reinhard Wilhelm Universität des Saarlandes

102 Motivation Interprocedural shape analysis Challenge
Conservative static pointer analysis Heap intensive programs Imperative programs with procedures Recursive data structures Challenge Destructive update Localized effect of procedures

103 Main idea Local heaps x x x y t g x call p(x); y g t

104 Main idea Local heaps Cutpoints x x x y t g x call p(x); y g t

105 Main Results Concrete operational semantics Abstractions Large step
Functional analysis Storeless Shape abstractions Local heap Observationally equivalent to “standard” semantics Java and “clean” C Abstractions Shape analysis [Sagiv, Reps, Wilhelm, TOPLAS ‘02] May-alias [Deutsch, PLDI ‘94]

106 Outline Motivating example Why semantics
Local heaps Cutpoints Why semantics Local heap storeless semantics Shape abstraction

107 Example … static void main() { } static List reverse(List t) { } n t n
q n q List x = reverse(p); n p x t r n t r n List y = reverse(q); List z = reverse(x); return r;

108 Example static void main() { } static List reverse(List t) { }
List x = reverse(p); n t n p x n p x n t n n q List y = reverse(q); n t r n t r n q y List z = reverse(x); return r;

109 Example n t n static void main() { } static List reverse(List t) { }
List x = reverse(p); n t List y = reverse(q); n p x n p x n t q y n q y n List z = reverse(x); p n x z t r n t r n return r;

110 Cutpoints Separating objects Not pointed-to by a parameter

111 Cutpoints proc(x) Separating objects Not pointed-to by a parameter
Stack sharing

112 Cutpoints proc(x) proc(x) Separating objects
Not pointed-to by a parameter proc(x) proc(x) n n n n n n n n n n x p x n n y Stack sharing Heap sharing

113 Cutpoints proc(x) proc(x) Separating objects
Not pointed-to by a parameter Capture external sharing patterns proc(x) proc(x) n n n n n n n n n n x p x n n y Stack sharing Heap sharing

114 Example static void main() { } static List reverse(List t) { }
List x = reverse(p); List y = reverse(q); n p x n t n n n p x q y n q y n List z = reverse(x); p n z x r t n r t n return r;

115 Outline Motivating example Why semantics
Local heap storeless semantics Shape abstraction

116 Abstract Interpretation [Cousot and Cousot, POPL ’77]
Operational semantics Abstract transformer

117 Introducing local heap semantics
Operational semantics ~ Local heap Operational semantics ’ ’ Abstract transformer

118 Outline Motivating example Why semantics
Local heap storeless semantics Shape abstraction

119 Programming model Single threaded Procedures Heap Value parameters
Recursion Heap Recursive data structures Destructive update No explicit addressing (&) No pointer arithmetic

120 Simplifying assumptions
No primitive values (only references) No globals Formals not modified

121 Storeless semantics No addresses Memory state: y=x Alias analysis
Object: 2Access paths Heap: 2Object Alias analysis x n x x.n x.n.n y=x x n y x.n y.n x.n.n y.n.n x=null y n y.n y.n.n

122 Example p? static void main() { } static List reverse(List t) {
return r; } List x = reverse(p); t n List y = reverse(q); t.n.n.n t.n.n t.n t t.n.n n t.n.n.n t t.n n n n p x.n.n.n p x.n.n x.n x x y.n.n q n y y.n y.n.n q n y y.n List z = reverse(x); z.n n z x z.n.n.n z.n.n z x r.n n r t r.n.n.n r.n.n r.n n r t r.n.n.n r.n.n p?

123 Example static void main() { } static List reverse(List t) { return r;
List x = reverse(p); L t n List y = reverse(q); t.n.n.n L t.n.n t.n t t.n.n n t.n.n.n L t t.n n n n p x.n.n.n p x.n.n x.n x x y.n.n q n y y.n y.n.n q n y y.n List z = reverse(x); p.n z.n n p z x p.n.n.n z.n.n.n p.n.n z.n.n p z x p.n p p.n.n p.n.n.n L.n r.n n L r t L.n.n.n r.n.n.n L.n.n r.n.n t L.n r.n n L r t L.n.n.n r.n.n.n L.n.n r.n.n t

124 Cutpoint labels Relate pre-state with post-state Additional roots
Mark cutpoints at and throughout an invocation

125 Cutpoint labels L  {t.n.n.n} t L
Cutpoint label: the set of access paths that point to a cutpoint when the invoked procedure starts t.n.n.n L t.n.n t.n t L t L  {t.n.n.n}

126 Sharing patterns L  {t.n.n.n} Cutpoint labels encode sharing patterns
w.n w w p Stack sharing Heap sharing L  {t.n.n.n}

127 Observational equivalence
L  L (Local-heap Storeless Semantics) G  G (Global-heap Store-based Semantics) L and G observationally equivalent when for every access paths AP1, AP2  AP1 = AP2 (L)   AP1 = AP2 (G)

128 Main theorem: semantic equivalence
L  L (Local-heap Storeless Semantics) G  G (Global-heap Store-based Semantics) L and G observationally equivalent st, L  ’L st, G  ’G LSL GSB ’L and ’G are observationally equivalent

129 Corollaries Preservation of invariants Detection of memory leaks
Assertions: AP1 = AP2 Detection of memory leaks

130 Applications Develop new static analyses
Shape analysis Justify soundness of existing analyses

131 Related work Storeless semantics Jonkers, Algorithmic Languages ‘81
Deutsch, ICCL ‘92

132 Shape abstraction Shape descriptors represent unbounded memory states
Conservatively In a bounded way Two dimensions Local heap (objects) Sharing pattern (cutpoint labels)

133 A Shape abstraction L={t.n.n.n} r n n n t L r L r.n L.n r.n.n L.n.n
t, r.n.n.n L.n.n.n t L

134 A Shape abstraction L=* r n n n t L r L r.n L.n r.n.n L.n.n t, r.n.n.n
L.n.n.n t L

135 A Shape abstraction L t L=* n L=* r n n n t L r t, r.n L.n r.n r L r.n

136 A Shape abstraction L r t, r.n L.n r.n t L=* n

137 A Shape abstraction L={t.n.n.n} r n n n t L L=* n t L r L r.n L.n
L.n.n t, r.n.n.n L.n.n.n t L L r t, r.n L.n r.n t L=* n

138 A Shape abstraction L1={t.n.n.n} L2={g.n.n.n} d n n n g L2 r n n n t
L2.n d.n.n L2.n.n g, d.n.n.n L2.n.n.n g L2 r n n n r L1 r.n L1.n r.n.n L1.n.n t, r.n.n.n L1.n.n.n t L1 L=* n d n n d L d.n L.n t, d.n L.n t n L n n r L r.n L.n t, r.n L.n t r

139 Cutpoint-Freedom

140 How to tabulate procedures?
Procedure  input/output relation Not reachable  Not effected proc: local (reachable) heap  local heap main() { append(y,z); } p q append(List p, List q) { } p q x n t x n t n y z y n z p q n n

141 How to handle sharing? External sharing may break the functional view
main() { append(y,z); } p q n append(List p, List q) { } p q n n n y t t x z y n z p q n x n

142 What’s the difference? n n n n x y append(y,z); append(y,z); t t y z x
1st Example 2nd Example append(y,z); append(y,z); x n t n n n y y t z x z

143 Cutpoints An object is a cutpoint for an invocation
Reachable from actual parameters Not pointed to by an actual parameter Reachable without going through a parameter append(y,z) append(y,z) n n n n y y n n t t x z z

144 Cutpoint freedom Cutpoint-free Invocation: has no cutpoints
Execution: every invocation is cutpoint-free Program: every execution is cutpoint-free append(y,z) append(y,z) n n n n y x t x t y z z

145 Interprocedural shape analysis for cutpoint-free programs using 3-Valued Shape Analysis

146 Memory states: 2-Valued Logical Structure
A memory state encodes a local heap Local variables of the current procedure invocation Relevant part of the heap Relevant  Reachable main append p q n n x t y z

147 Memory states Represented by first-order logical structures Predicate
Meaning x(v) Variable x points to v n(v1,v2) Field n of object v1 points to v2

148 Memory states Represented by first-order logical structures Predicate
Meaning x(v) Variable x points to v n(v1,v2) Field n of object v1 points to v2 p u1 1 u2 q u1 u2 1 n u1 u2 1 p q n u1 u2

149 Operational semantics
Statements modify values of predicates Specified by predicate-update formulae Formulae in FO-TC

150 Procedure calls append(p,q) 1. Verify cutpoint freedom Compute input
z x n p q 1. Verify cutpoint freedom Compute input … Execute callee … 3 Combine output append(y,z) append body p n q y z x n

151 Procedure call: 1. Verifying cutpoint-freedom
An object is a cutpoint for an invocation Reachable from actual parameters Not pointed to by an actual parameter Reachable without going through a parameter append(y,z) append(y,z) n n n n y y x n z x z n t t Cutpoint free Not Cutpoint free

152 Procedure call: 1. Verifying cutpoint-freedom
Invoking append(y,z) in main R{y,z}(v)=v1:y(v1)n*(v1,v)  v1:z(v1)n*(v1,v) isCPmain,{y,z}(v)= R{y,z}(v)  (y(v)z(v1))  ( x(v)  t(v)  v1: R{y,z}(v1)n(v1,v)) (main’s locals: x,y,z,t) n n n n y y x n z x z n t t Cutpoint free Not Cutpoint free

153 Procedure call: 2. Computing the input local heap
Retain only reachable objects Bind formal parameters Call state p n q Input state n n y x z n t

154 Procedure body: append(p,q)
Input state Output state p n q n n p q

155 Procedure call: 3. Combine output
Call state Output state n n p n q n y x z n t

156 Procedure call: 3. Combine output
Call state Output state n n n n n y p x z q n t Auxiliary predicates y n z x t inUc(v) inUx(v)

157 Observational equivalence
CPF  CPF (Cutpoint free semantics) GSB  GSB (Standard semantics) CPF and GSB observationally equivalent when for every access paths AP1, AP2  AP1 = AP2 (CPF)   AP1 = AP2 (GSB)

158 Observational equivalence
For cutpoint free programs: CPF  CPF (Cutpoint free semantics) GSB  GSB (Standard semantics) CPF and GSB observationally equivalent It holds that st, CPF  ’CPF  st, GSB  ’GSB ’CPF and ’GSB are observationally equivalent

159 Introducing local heap semantics
Operational semantics ~ Local heap Operational semantics ’ ’ Abstract transformer

160 Shape abstraction Abstract memory states represent unbounded concrete memory states Conservatively In a bounded way Using 3-valued logical structures

161 3-Valued logic 1 = true 0 = false 1/2 = unknown
A join semi-lattice, 0  1 = 1/2

162 Canonical abstraction
y z n n n n n x n n t y z x n t

163 Instrumentation predicates
Record derived properties Refine the abstraction Instrumentation principle [SRW, TOPLAS’02] Reachability is central! Predicate Meaning rx(v) v is reachable from variable x robj(v1,v2) v2 is reachable from v1 ils(v) v is heap-shared c(v) v resides on a cycle

164 Abstract memory states (with reachability)
z n n n n n x rx rx rx rx rx rx rx,ry rx,ry rz rz rz rz rz rz n n t rt rt rt rt rt rt y rx rx,ry n z rz x rt t

165 The importance of reachability: Call append(y,z)
x rx rx rx rx,ry rz rz rz n n t rt rt rt y z n y z x n t n n n n x rx rx rx,ry rz rz n n t rt rt

166 Abstract semantics Conservatively apply statements on abstract memory states Same formulae as in concrete semantics Soundness guaranteed [SRW, TOPLAS’02]

167 Procedure calls append(p,q) 1. Verify cutpoint freedom Compute input
z x n p q 1. Verify cutpoint freedom Compute input 2 … Execute callee … 3 Combine output append(y,z) append body p n q y z x n

168 Conservative verification of cutpoint-freedom
Invoking append(y,z) in main R{y,z}(v)=v1:y(v1)n*(v1,v)  v1:z(v1)n*(v1,v) isCPmain,{y,z}(v)= R{y,z}(v)  (y(v)z(v1))  ( x(v)  t(v)  v1: R{y,z}(v1)n(v1,v)) n n n n n y ry ry rz y ry ry ry rx rz n n n z n x z rt rt t rt rt t Cutpoint free Not Cutpoint free

169 Interprocedural shape analysis
Tabulation exits p p call f(x) y x x y

170 Interprocedural shape analysis
Analyze f p p Tabulation exits p p call f(x) y x x y

171 Interprocedural shape analysis
Procedure  input/output relation Input Output q q rq rq p q p q n rp rq rp rq rp p n n q p q n n n rp rp rq rp rp rp rq

172 Interprocedural shape analysis
Reusable procedure summaries Heap modularity q p rp rq n y n z x append(y,z) rx ry rz y x z n rx ry rz append(y,z) g h i k n rg rh ri rk append(h,i)

173 Plan Cutpoint freedom Non-standard concrete semantics
Interprocedural shape analysis Prototype implementation

174 Prototype implementation
TVLA based analyzer Soot-based Java front-end Parametric abstraction Data structure Verified properties Singly linked list Cleanness, acyclicity Sorting (of SLL) + Sortedness Unshared binary trees Cleaness, tree-ness

175 Iterative vs. Recursive (SLL)
585

176 Inline vs. Procedural abstraction
// Allocates a list of // length 3 List create3(){ } main() { List x1 = create3(); List x2 = create3(); List x3 = create3(); List x4 = create3();

177 Call string vs. Relational vs
Call string vs. Relational vs. CPF [Rinetzky and Sagiv, CC’01] [Jeannet et al., SAS’04]

178 Summary Cutpoint freedom Non-standard operational semantics
Interprocedural shape analysis Partial correctness of quicksort Prototype implementation

179 Application Properties proved Recursive  Iterative
Absence of null dereferences Listness preservation API conformance Recursive  Iterative Procedural abstraction

180 Related Work Interprocedural shape analysis Local Reasoning
Rinetzky and Sagiv, CC ’01 Chong and Rugina, SAS ’03 Jeannet et al., SAS ’04 Hackett and Rugina, POPL ’05 Rinetzky et al., POPL ‘05 Local Reasoning Ishtiaq and O’Hearn, POPL ‘01 Reynolds, LICS ’02 Encapsulation Noble et al. IWACO ’03 ...

181 Summary Operational semantics Applications Storeless Local heap
Cutpoints Equivalence theorem Applications Shape analysis May-alias analysis


Download ppt "Program Analysis and Verification"

Similar presentations


Ads by Google