Merge pull request #448 from ethereum/simpl-copySlice

`CopySlice` simplification, more generic Array/Map slot simplification, fixing `decompose`, `--nodecompose` flag
ethereum · Mar 13, 2024 · bb7a70f · bb7a70f
2 parents ca21f8b + 01ac760
commit bb7a70f
Show file tree

Hide file tree

Showing 7 changed files with 203 additions and 60 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,9 +6,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## Changed
+- More complete and precise array/mapping slot rewrite, along with a copySlice improvement
 
 ## Added
 - More POr and PAnd rules
+- Array/Map slot decomposition can be turned off via a flag
 - More PEq, PLEq, and PLT rules
 - New `label` cheatcode.
 - Updated Bitwuzla to newer version
@@ -19,6 +21,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - CVC5 needs `--incremental` flag to work properly in abstraction-refinement mode
 - cli.hs now uses with-utf8 so no release binary will have locale issues anymore
 - Took ideas for simplification rules from "Super-optimization of Smart Contracts" paper by Albert et al.
+- Decomposition does not take place when entire states are compared, as that would necessitate
+  a different approach.
 
 ## [0.53.0] - 2024-02-23
 

diff --git a/cli/cli.hs b/cli/cli.hs
@@ -97,6 +97,7 @@ data Command w
       , loopDetectionHeuristic :: w ::: LoopHeuristic <!> "StackBased" <?> "Which heuristic should be used to determine if we are in a loop: StackBased (default) or Naive"
       , abstractArithmetic    :: w ::: Bool             <?> "Use abstraction-refinement for complicated arithmetic functions such as MulMod. This runs the solver first with abstraction turned on, and if it returns a potential counterexample, the counterexample is refined to make sure it is a counterexample for the actual (not the abstracted) problem"
       , abstractMemory    :: w ::: Bool                      <?> "Use abstraction-refinement for Memory. This runs the solver first with abstraction turned on, and if it returns a potential counterexample, the counterexample is refined to make sure it is a counterexample for the actual (not the abstracted) problem"
+      , noDecompose   :: w ::: Bool               <?> "Don't decompose storage slots into separate arrays"
       }
   | Equivalence -- prove equivalence between two programs
       { codeA         :: w ::: ByteString       <?> "Bytecode of the first program"
@@ -116,6 +117,7 @@ data Command w
       , loopDetectionHeuristic :: w ::: LoopHeuristic <!> "StackBased" <?> "Which heuristic should be used to determine if we are in a loop: StackBased (default) or Naive"
       , abstractArithmetic    :: w ::: Bool             <?> "Use abstraction-refinement for complicated arithmetic functions such as MulMod. This runs the solver first with abstraction turned on, and if it returns a potential counterexample, the counterexample is refined to make sure it is a counterexample for the actual (not the abstracted) problem"
       , abstractMemory    :: w ::: Bool                      <?> "Use abstraction-refinement for Memory. This runs the solver first with abstraction turned on, and if it returns a potential counterexample, the counterexample is refined to make sure it is a counterexample for the actual (not the abstracted) problem"
+      , noDecompose   :: w ::: Bool             <?> "Don't decompose storage slots into separate arrays"
       }
   | Exec -- Execute a given program with specified env & calldata
       { code        :: w ::: Maybe ByteString  <?> "Program bytecode"
@@ -163,6 +165,7 @@ data Command w
       , loopDetectionHeuristic :: w ::: LoopHeuristic   <!> "StackBased" <?> "Which heuristic should be used to determine if we are in a loop: StackBased (default) or Naive"
       , abstractArithmetic    :: w ::: Bool             <?> "Use abstraction-refinement for complicated arithmetic functions such as MulMod. This runs the solver first with abstraction turned on, and if it returns a potential counterexample, the counterexample is refined to make sure it is a counterexample for the actual (not the abstracted) problem"
       , abstractMemory    :: w ::: Bool                      <?> "Use abstraction-refinement for Memory. This runs the solver first with abstraction turned on, and if it returns a potential counterexample, the counterexample is refined to make sure it is a counterexample for the actual (not the abstracted) problem"
+      , noDecompose   :: w ::: Bool                     <?> "Don't decompose storage slots into separate arrays"
       , numCexFuzz    :: w ::: Integer                  <!> "3" <?> "Number of fuzzing tries to do to generate a counterexample (default: 3)"
       , askSmtIterations :: w ::: Integer               <!> "1" <?> "Number of times we may revisit a particular branching point before we consult the smt solver to check reachability (default: 1)"
       }
@@ -207,6 +210,7 @@ main = withUtf8 $ do
     , abstRefineMem = cmd.abstractMemory
     , abstRefineArith = cmd.abstractArithmetic
     , dumpTrace = cmd.trace
+    , decomposeStorage = Prelude.not cmd.noDecompose
     } }
   case cmd of
     Version {} ->putStrLn getFullVersion

diff --git a/src/EVM/Effects.hs b/src/EVM/Effects.hs
@@ -47,18 +47,19 @@ class Monad m => ReadConfig m where
   readConfig ::  m Config
 
 data Config = Config
-  { dumpQueries     :: Bool
-  , dumpExprs       :: Bool
-  , dumpEndStates   :: Bool
-  , debug           :: Bool
-  , abstRefineArith :: Bool
-  , abstRefineMem   :: Bool
-  , dumpTrace       :: Bool
-  , numCexFuzz      :: Integer
+  { dumpQueries      :: Bool
+  , dumpExprs        :: Bool
+  , dumpEndStates    :: Bool
+  , debug            :: Bool
+  , abstRefineArith  :: Bool
+  , abstRefineMem    :: Bool
+  , dumpTrace        :: Bool
+  , numCexFuzz       :: Integer
    -- Used to debug fuzzer in test.hs. It disables the SMT solver
    -- and uses the fuzzer ONLY to try to find a counterexample.
    -- Returns Unknown if the Cex cannot be found via fuzzing
-  , onlyCexFuzz     :: Bool
+  , onlyCexFuzz      :: Bool
+  , decomposeStorage :: Bool
   }
   deriving (Show, Eq)
 
@@ -73,6 +74,7 @@ defaultConfig = Config
   , dumpTrace = False
   , numCexFuzz = 10
   , onlyCexFuzz  = False
+  , decomposeStorage = True
   }
 
 -- Write to the console

diff --git a/src/EVM/Expr.hs b/src/EVM/Expr.hs
@@ -607,6 +607,13 @@ readStorage' loc store = case readStorage loc store of
 -- no explicit writes to the requested slot. This makes implementing rpc
 -- storage lookups much easier. If the store is backed by an AbstractStore we
 -- always return a symbolic value.
+--
+-- This does not strip writes that cannot possibly match a read, in case there are
+-- some write(s) in between that it cannot statically determine to be removable, because
+-- it will early-abort. So (load idx1 (store idx1 (store idx1 (store idx0)))) will not strip
+-- the idx0 store, in case things in between cannot be stripped. See simplify-storage-map-todo
+-- test for an example where this happens. Note that decomposition solves this, though late in
+-- the simplification lifecycle (just before SMT generation, which can be too late)
 readStorage :: Expr EWord -> Expr Storage -> Maybe (Expr EWord)
 readStorage w st = go (simplify w) st
   where
@@ -623,16 +630,16 @@ readStorage w st = go (simplify w) st
       (Lit _, Lit _) -> go slot prev
 
       -- slot is for a map + map -> skip write
-      (MappingSlot idA _, MappingSlot idB _)     | idsDontMatch idA idB  -> go slot prev
-      (MappingSlot _ keyA, MappingSlot _ keyB)   | surelyNotEq keyA keyB -> go slot prev
+      (MappingSlot idA _, MappingSlot idB _)       | BS.length idB == 64 && BS.length idA == 64 && idsDontMatch idA idB  -> go slot prev
+      (MappingSlot idA keyA, MappingSlot idB keyB) | BS.length idB == 64 && BS.length idA == 64 && surelyNotEq keyA keyB -> go slot prev
 
       -- special case of array + map -> skip write
-      (ArraySlotWithOffset idA _, Keccak64Bytes) | BS.length idA /= 64 -> go slot prev
-      (ArraySlotZero idA, Keccak64Bytes)         | BS.length idA /= 64 -> go slot prev
+      (ArraySlotWithOffs idA _, Keccak   k)      | bufLength k == Lit 64 && BS.length idA == 32 -> go slot prev
+      (ArraySlotZero idA, Keccak k)              | bufLength k == Lit 64 && BS.length idA == 32 -> go slot prev
 
-      -- special case of array + map -> skip write
-      (Keccak64Bytes, ArraySlotWithOffset idA _) | BS.length idA /= 64 -> go slot prev
-      (Keccak64Bytes, ArraySlotZero idA)         | BS.length idA /= 64 -> go slot prev
+      -- special case of map + array -> skip write
+      (Keccak k, ArraySlotWithOffs idA _)      | bufLength k == Lit 64 && BS.length idA == 32 -> go slot prev
+      (ArraySlotWithOffs idA _, Keccak k)      | bufLength k == Lit 64 && BS.length idA == 32 -> go slot prev
 
       -- Fixed SMALL value will never match Keccak (well, it might, but that's VERY low chance)
       (Lit a, Keccak _) | a < 256 -> go slot prev
@@ -658,17 +665,17 @@ readStorage w st = go (simplify w) st
 
       -- case of array + array, but different id's or different concrete offsets
       -- zero offs vs zero offs
-      (ArraySlotZero idA, ArraySlotZero idB)                   | idA /= idB -> go slot prev
+      (ArraySlotZero idA, ArraySlotZero idB)                   | BS.length idA == 32, BS.length idB == 32, idA /= idB -> go slot prev
       -- zero offs vs non-zero offs
-      (ArraySlotZero idA, ArraySlotWithOffset idB _)           | idA /= idB -> go slot prev
-      (ArraySlotZero _, ArraySlotWithOffset _ (Lit offB))      | offB /= 0  -> go slot prev
+      (ArraySlotZero idA, ArraySlotWithOffs idB _)             | BS.length idA == 32, BS.length idB == 32, idA /= idB -> go slot prev
+      (ArraySlotZero idA, ArraySlotWithOffs idB (Lit offB))    | BS.length idA == 32, BS.length idB == 32, offB /= 0  -> go slot prev
       -- non-zero offs vs zero offs
-      (ArraySlotWithOffset idA _, ArraySlotZero idB)           | idA /= idB -> go slot prev
-      (ArraySlotWithOffset _ (Lit offA), ArraySlotZero _)      | offA /= 0  -> go slot prev
+      (ArraySlotWithOffs idA _, ArraySlotZero idB)             | BS.length idA == 32, BS.length idB == 32, idA /= idB -> go slot prev
+      (ArraySlotWithOffs idA (Lit offA), ArraySlotZero idB)    | BS.length idA == 32, BS.length idB == 32, offA /= 0  -> go slot prev
       -- non-zero offs vs non-zero offs
-      (ArraySlotWithOffset idA _, ArraySlotWithOffset idB _)   | idA /= idB -> go slot prev
+      (ArraySlotWithOffs idA _, ArraySlotWithOffs idB _)       | BS.length idA == 32, BS.length idB == 32, idA /= idB -> go slot prev
 
-      (ArraySlotWithOffset _ offA, ArraySlotWithOffset _ offB) | surelyNotEq offA offB -> go slot prev
+      (ArraySlotWithOffs idA offA, ArraySlotWithOffs idB offB) | BS.length idA == 32, BS.length idB == 32, surelyNotEq offA offB -> go slot prev
 
       -- we are unable to determine statically whether or not we can safely move deeper in the write chain, so return an abstract term
       _ -> Just $ SLoad slot s
@@ -687,16 +694,12 @@ readStorage w st = go (simplify w) st
 
 -- storage slots for maps are determined by (keccak (bytes32(key) ++ bytes32(id)))
 pattern MappingSlot :: ByteString -> Expr EWord -> Expr EWord
-pattern MappingSlot id key = Keccak (CopySlice (Lit 0) (Lit 0) (Lit 64) (WriteWord (Lit 0) key (ConcreteBuf id)) (ConcreteBuf ""))
-
--- keccak of any 64 bytes value
-pattern Keccak64Bytes :: Expr EWord
-pattern Keccak64Bytes <- Keccak (CopySlice (Lit 0) (Lit 0) (Lit 64) _ (ConcreteBuf ""))
+pattern MappingSlot idx key = Keccak (WriteWord (Lit 0) key (ConcreteBuf idx))
 
 -- storage slots for arrays are determined by (keccak(bytes32(id)) + offset)
 -- note that `normArgs` puts the Lit as the 2nd argument to `Add`
-pattern ArraySlotWithOffset :: ByteString -> Expr EWord -> Expr EWord
-pattern ArraySlotWithOffset id offset = Add (Keccak (ConcreteBuf id)) offset
+pattern ArraySlotWithOffs :: ByteString -> Expr EWord -> Expr EWord
+pattern ArraySlotWithOffs id offset = Add (Keccak (ConcreteBuf id)) offset
 
 -- special pattern to match the 0th element because the `Add` term gets simplified out
 pattern ArraySlotZero :: ByteString -> Expr EWord
@@ -721,7 +724,7 @@ structureArraySlots e = mapExpr go e
   where
     go :: Expr a -> Expr a
     go orig@(Lit key) = case litToArrayPreimage key of
-      Just (array, offset) -> ArraySlotWithOffset (slotPos array) (Lit offset)
+      Just (array, offset) -> ArraySlotWithOffs (slotPos array) (Lit offset)
       _ -> orig
     go a = a
 
@@ -777,23 +780,48 @@ getLogicalIdx (GVar _) = internalError "cannot determine addr of a GVar"
 data StorageType = SmallSlot | Array | Map | Mixed | UNK
   deriving (Show, Eq)
 
-safeToDecompose :: Expr a -> Bool
-safeToDecompose inp = result /= Mixed
+-- We can't currently decompose cases when the FULL returned state is equated
+-- This is because the decomposition would need to take into account that ALL
+-- maps/arrays/small-slots need to be equivalent. This could be done, but is left
+-- as a TODO. Currently this only affects equivalence checking as there is no
+-- EVM bytecode to compare the FULL state, so such comparison could only be
+-- generated via hevm itself
+safeToDecomposeProp :: Prop -> Bool
+safeToDecomposeProp p = isJust $ mapPropM' findPEqStore p
+  where
+    findPEqStore :: Prop -> Maybe Prop
+    findPEqStore = \case
+      (PEq (SStore {}) (SStore {})) -> Nothing
+      (PEq (AbstractStore {}) (SStore {})) -> Nothing
+      (PEq (SStore {}) (AbstractStore {})) -> Nothing
+      (PEq (AbstractStore {}) (AbstractStore {})) -> Nothing
+      a -> Just a
+
+-- This checks if the decomposition is possible by making sure there is no
+-- mixture of different types of accesses such as array/map/small-slot.
+safeToDecompose :: Expr a -> Maybe ()
+safeToDecompose inp = if result /= Mixed then Just () else Nothing
   where
     result = execState (safeToDecomposeRunner inp) UNK
 
     safeToDecomposeRunner :: forall a. Expr a -> State StorageType ()
     safeToDecomposeRunner a = go a
 
     go :: forall b. Expr b -> State StorageType ()
-    go e@(SLoad (MappingSlot {}) _) = setMap e
-    go e@(SLoad (ArraySlotZero {}) _) = setArray e
-    go e@(SLoad (ArraySlotWithOffset {}) _) = setArray e
+    go e@(SLoad (MappingSlot x _) _) = if BS.length x == 64 then setMap e else setMixed e
+    go e@(SLoad (Keccak x) _) = case bufLength x of
+                                  Lit 32 -> setArray e
+                                  Lit 64 -> setMap e
+                                  _ -> setMixed e
+    go e@(SLoad (ArraySlotWithOffs x _) _) = if BS.length x == 32 then setArray e else setMixed e
     go e@(SLoad (Lit x) _) | x < 256 = setSmall e
     go e@(SLoad _ _) = setMixed e
-    go e@(SStore (MappingSlot {}) _ _) = setMap e
-    go e@(SStore (ArraySlotZero {}) _ _) = setArray e
-    go e@(SStore (ArraySlotWithOffset {}) _ _) = setArray e
+    go e@(SStore (MappingSlot x _) _ _) = if BS.length x == 64 then setMap e else setMixed e
+    go e@(SStore (Keccak x) _ _) =  case bufLength x of
+                                  Lit 32 -> setArray e
+                                  Lit 64 -> setMap e
+                                  _ -> setMixed e
+    go e@(SStore (ArraySlotWithOffs x _) _ _) = if BS.length x == 32 then setArray e else setMixed e
     go e@(SStore (Lit x) _ _) | x < 256 = setSmall e
     go e@(SStore _ _ _) = setMixed e
     go _ = pure ()
@@ -828,9 +856,9 @@ safeToDecompose inp = result /= Mixed
       pure ()
 
 -- | Splits storage into logical sub-stores if (1) all SLoad->SStore* chains are one of:
---     (1a) Lit < 256, (1b) MappingSlot, (1c) ArraySlotWithOffset, (1d) ArraySlotZero
+--     (1a) Lit < 256, (1b) MappingSlot, (1c) ArraySlotWithOffs, (1d) ArraySlotZero
 --  and (2) there is no mixing of different types (e.g. Map with Array) within
---  the same SStore -> SLoad* chain, and (3) there is no mixing of different array/map slots.
+--  the same SStore -> SLoad* chain
 --
 --  Mixing (2) and (3) are attempted to be prevented (if possible) as part of the rewrites
 --  done by the `readStorage` function that is ran before this. If there is still mixing here,
@@ -843,7 +871,7 @@ decomposeStorage :: Expr a -> Maybe (Expr a)
 decomposeStorage = go
   where
     go :: Expr a -> Maybe (Expr a)
-    go e@(SLoad origKey store) = if Prelude.not (safeToDecompose e) then Nothing else tryRewrite origKey store
+    go (SLoad key store) = tryRewrite key store
     go e = Just e
 
     tryRewrite :: Expr EWord -> Expr Storage -> Maybe (Expr EWord)
@@ -859,8 +887,14 @@ decomposeStorage = go
     inferLogicalIdx = \case
       Lit a | a >= 256 -> Nothing
       Lit a -> Just (Nothing, Lit a)
+      -- maps
+      (Keccak (ConcreteBuf k)) | BS.length k == 64 -> do
+        let key = idxToWord (BS.take 32 k)
+            idx = Lit $ idxToWord (BS.drop 32 k)
+        Just (Just key, idx)
       (MappingSlot idx key) | BS.length idx == 64 -> Just (Just $ idxToWord idx, key)
-      (ArraySlotWithOffset idx offset) | BS.length idx == 32 -> Just (Just $ idxToWord64 idx, offset)
+      -- arrays
+      (ArraySlotWithOffs idx offset) | BS.length idx == 32 -> Just (Just $ idxToWord64 idx, offset)
       (ArraySlotZero idx) | BS.length idx == 32 -> Just (Just $ idxToWord64 idx, Lit 0)
       _ -> Nothing
 
@@ -873,12 +907,16 @@ decomposeStorage = go
     -- Updates the logical base store of the given expression if it is safe to do so
     setLogicalBase :: Maybe W256 -> Expr Storage -> Maybe (Expr Storage)
 
-    -- abstract bases get their logical idx set to the new value
-    setLogicalBase idx (AbstractStore addr _) = Just $ AbstractStore addr idx
-    setLogicalBase idx (SStore i v s) = do
-      b <- setLogicalBase idx s
-      (idx2, key2) <- inferLogicalIdx i
-      if idx == idx2 then Just (SStore key2 v b) else Nothing
+    setLogicalBase idx (AbstractStore addr Nothing) = Just $ AbstractStore addr idx
+    setLogicalBase idx (AbstractStore addr idx2) | idx == idx2 = Just $ AbstractStore addr idx
+    setLogicalBase _ (AbstractStore _ _) = internalError "we only rewrite idx once, on load"
+    setLogicalBase idx (SStore k v prevStorage) = do
+      (idx2, key2) <- inferLogicalIdx k
+      b <- setLogicalBase idx prevStorage
+      -- If it's not the same IDX, we can skip. This is possible because there are no
+      -- mixed arrays/maps/small-slots, as checked by safeToDecompose
+      if idx == idx2 then Just (SStore key2 v b)
+      else setLogicalBase idx b
 
     -- empty concrete base is safe to reuse without any rewriting
     setLogicalBase _ s@(ConcreteStore m) | Map.null m = Just s
@@ -909,9 +947,6 @@ simplify e = if (mapExpr go e == e)
     go (CopySlice (Lit 0x0) (Lit 0x0) (Lit 0x0) _ dst) = dst
 
     -- simplify storage
-    go (Keccak (ConcreteBuf bs)) = case BS.length bs == 64 of
-      True -> MappingSlot bs (Lit $ word bs)
-      False -> Keccak (ConcreteBuf bs)
     go (SLoad slot store) = readStorage' slot store
     go (SStore slot val store) = writeStorage slot val store
 
@@ -936,6 +971,10 @@ simplify e = if (mapExpr go e == e)
 
     go (WriteByte a b c) = writeByte a b c
 
+    -- eliminate a CopySlice if the resulting buffer is the same as the src buffer
+    go (CopySlice (Lit 0) (Lit 0) (Lit s) src (ConcreteBuf ""))
+      | bufLength src == (Lit s) = src
+
     -- truncate some concrete source buffers to the portion relevant for the CopySlice if we're copying a fully concrete region
     go orig@(CopySlice srcOff@(Lit n) dstOff size@(Lit sz)
         -- It doesn't matter what wOffs we write to, because only the first