From eacd25ffa7564b99caa85264a31121bf94fd4350 Mon Sep 17 00:00:00 2001 From: Mate Soos Date: Mon, 4 Mar 2024 14:10:15 +0100 Subject: [PATCH 1/7] Fixing decompose, adding nodecompose flag, more explanation, cleaner code --- CHANGELOG.md | 4 ++ cli/cli.hs | 4 ++ src/EVM/Effects.hs | 2 + src/EVM/Expr.hs | 101 +++++++++++++++++++++++++++++------------- src/EVM/SMT.hs | 10 ++++- src/EVM/Traversals.hs | 45 +++++++++++++++++++ test/test.hs | 53 +++++++++++++++++++--- 7 files changed, 181 insertions(+), 38 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ccaf9eed..cc5a482b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,9 +6,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## Changed +- More complete and precise array/mapping slot rewrite, along with a copySlice improvement ## Added - More POr and PAnd rules +- Array/Map slot decomposition can be turned off via a flag ## Fixed - `concat` is a 2-ary, not an n-ary function in SMT2LIB, declare-const does not exist in QF_AUFBV, replacing @@ -16,6 +18,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - CVC5 needs `--incremental` flag to work properly in abstraction-refinement mode - cli.hs now uses with-utf8 so no release binary will have locale issues anymore - Took ideas for simplification rules from "Super-optimization of Smart Contracts" paper by Albert et al. +- Decomposition does not take place when entire states are compared, as that would necessitate + a different approach. ## [0.53.0] - 2024-02-23 diff --git a/cli/cli.hs b/cli/cli.hs index 7dfa31c92..f358298de 100644 --- a/cli/cli.hs +++ b/cli/cli.hs @@ -97,6 +97,7 @@ data Command w , loopDetectionHeuristic :: w ::: LoopHeuristic "StackBased" "Which heuristic should be used to determine if we are in a loop: StackBased (default) or Naive" , abstractArithmetic :: w ::: Bool "Use abstraction-refinement for complicated arithmetic functions such as MulMod. This runs the solver first with abstraction turned on, and if it returns a potential counterexample, the counterexample is refined to make sure it is a counterexample for the actual (not the abstracted) problem" , abstractMemory :: w ::: Bool "Use abstraction-refinement for Memory. This runs the solver first with abstraction turned on, and if it returns a potential counterexample, the counterexample is refined to make sure it is a counterexample for the actual (not the abstracted) problem" + , nodecompose :: w ::: Bool "Don't decompose storage slots into separate arrays" } | Equivalence -- prove equivalence between two programs { codeA :: w ::: ByteString "Bytecode of the first program" @@ -116,6 +117,7 @@ data Command w , loopDetectionHeuristic :: w ::: LoopHeuristic "StackBased" "Which heuristic should be used to determine if we are in a loop: StackBased (default) or Naive" , abstractArithmetic :: w ::: Bool "Use abstraction-refinement for complicated arithmetic functions such as MulMod. This runs the solver first with abstraction turned on, and if it returns a potential counterexample, the counterexample is refined to make sure it is a counterexample for the actual (not the abstracted) problem" , abstractMemory :: w ::: Bool "Use abstraction-refinement for Memory. This runs the solver first with abstraction turned on, and if it returns a potential counterexample, the counterexample is refined to make sure it is a counterexample for the actual (not the abstracted) problem" + , nodecompose :: w ::: Bool "Don't decompose storage slots into separate arrays" } | Exec -- Execute a given program with specified env & calldata { code :: w ::: Maybe ByteString "Program bytecode" @@ -163,6 +165,7 @@ data Command w , loopDetectionHeuristic :: w ::: LoopHeuristic "StackBased" "Which heuristic should be used to determine if we are in a loop: StackBased (default) or Naive" , abstractArithmetic :: w ::: Bool "Use abstraction-refinement for complicated arithmetic functions such as MulMod. This runs the solver first with abstraction turned on, and if it returns a potential counterexample, the counterexample is refined to make sure it is a counterexample for the actual (not the abstracted) problem" , abstractMemory :: w ::: Bool "Use abstraction-refinement for Memory. This runs the solver first with abstraction turned on, and if it returns a potential counterexample, the counterexample is refined to make sure it is a counterexample for the actual (not the abstracted) problem" + , nodecompose :: w ::: Bool "Don't decompose storage slots into separate arrays" , numCexFuzz :: w ::: Integer "3" "Number of fuzzing tries to do to generate a counterexample (default: 3)" , askSmtIterations :: w ::: Integer "1" "Number of times we may revisit a particular branching point before we consult the smt solver to check reachability (default: 1)" } @@ -207,6 +210,7 @@ main = withUtf8 $ do , abstRefineMem = cmd.abstractMemory , abstRefineArith = cmd.abstractArithmetic , dumpTrace = cmd.trace + , decomposeStorage = Prelude.not cmd.nodecompose } } case cmd of Version {} ->putStrLn getFullVersion diff --git a/src/EVM/Effects.hs b/src/EVM/Effects.hs index 17fe4ebd7..681ae8f40 100644 --- a/src/EVM/Effects.hs +++ b/src/EVM/Effects.hs @@ -59,6 +59,7 @@ data Config = Config -- and uses the fuzzer ONLY to try to find a counterexample. -- Returns Unknown if the Cex cannot be found via fuzzing , onlyCexFuzz :: Bool + , decomposeStorage:: Bool } deriving (Show, Eq) @@ -73,6 +74,7 @@ defaultConfig = Config , dumpTrace = False , numCexFuzz = 10 , onlyCexFuzz = False + , decomposeStorage = True } -- Write to the console diff --git a/src/EVM/Expr.hs b/src/EVM/Expr.hs index cae5f4ea2..8b3cda22e 100644 --- a/src/EVM/Expr.hs +++ b/src/EVM/Expr.hs @@ -607,6 +607,13 @@ readStorage' loc store = case readStorage loc store of -- no explicit writes to the requested slot. This makes implementing rpc -- storage lookups much easier. If the store is backed by an AbstractStore we -- always return a symbolic value. +-- +-- This does not strip writes that cannot possibly match a read, in case there are +-- some write(s) in between that it cannot statically determine to be removable, because +-- it will early-abort. So (load idx1 (store idx1 (store idx1 (store idx0)))) will not strip +-- the idx0 store, in case things in between cannot be stripped. See simplify-storage-map-todo +-- test for an example where this happens. Note that decomposition solves this, though late in +-- the simplification lifecycle (just before SMT generation, which can be too late) readStorage :: Expr EWord -> Expr Storage -> Maybe (Expr EWord) readStorage w st = go (simplify w) st where @@ -623,16 +630,16 @@ readStorage w st = go (simplify w) st (Lit _, Lit _) -> go slot prev -- slot is for a map + map -> skip write - (MappingSlot idA _, MappingSlot idB _) | idsDontMatch idA idB -> go slot prev - (MappingSlot _ keyA, MappingSlot _ keyB) | surelyNotEq keyA keyB -> go slot prev + (MappingSlot idA _, MappingSlot idB _) | BS.length idB == 64 && BS.length idA == 64 && idsDontMatch idA idB -> go slot prev + (MappingSlot idA keyA, MappingSlot idB keyB) | BS.length idB == 64 && BS.length idA == 64 && surelyNotEq keyA keyB -> go slot prev -- special case of array + map -> skip write - (ArraySlotWithOffset idA _, Keccak64Bytes) | BS.length idA /= 64 -> go slot prev - (ArraySlotZero idA, Keccak64Bytes) | BS.length idA /= 64 -> go slot prev + (ArraySlotWithOffset idA _, Keccak k) | bufLength k == Lit 64 && BS.length idA == 32 -> go slot prev + (ArraySlotZero idA, Keccak k) | bufLength k == Lit 64 && BS.length idA == 32 -> go slot prev - -- special case of array + map -> skip write - (Keccak64Bytes, ArraySlotWithOffset idA _) | BS.length idA /= 64 -> go slot prev - (Keccak64Bytes, ArraySlotZero idA) | BS.length idA /= 64 -> go slot prev + -- special case of map + array -> skip write + (Keccak k, ArraySlotWithOffset idA _) | bufLength k == Lit 64 && BS.length idA == 32 -> go slot prev + (ArraySlotWithOffset idA _, Keccak k) | bufLength k == Lit 64 && BS.length idA == 32 -> go slot prev -- Fixed SMALL value will never match Keccak (well, it might, but that's VERY low chance) (Lit a, Keccak _) | a < 256 -> go slot prev @@ -687,11 +694,7 @@ readStorage w st = go (simplify w) st -- storage slots for maps are determined by (keccak (bytes32(key) ++ bytes32(id))) pattern MappingSlot :: ByteString -> Expr EWord -> Expr EWord -pattern MappingSlot id key = Keccak (CopySlice (Lit 0) (Lit 0) (Lit 64) (WriteWord (Lit 0) key (ConcreteBuf id)) (ConcreteBuf "")) - --- keccak of any 64 bytes value -pattern Keccak64Bytes :: Expr EWord -pattern Keccak64Bytes <- Keccak (CopySlice (Lit 0) (Lit 0) (Lit 64) _ (ConcreteBuf "")) +pattern MappingSlot idx key = Keccak (WriteWord (Lit 0) key (ConcreteBuf idx)) -- storage slots for arrays are determined by (keccak(bytes32(id)) + offset) -- note that `normArgs` puts the Lit as the 2nd argument to `Add` @@ -777,8 +780,27 @@ getLogicalIdx (GVar _) = internalError "cannot determine addr of a GVar" data StorageType = SmallSlot | Array | Map | Mixed | UNK deriving (Show, Eq) -safeToDecompose :: Expr a -> Bool -safeToDecompose inp = result /= Mixed +-- We can't currently decompose cases when the FULL returned state is equated +-- This is because the decomposition would need to take into account that ALL +-- maps/arrays/small-slots need to be equivalent. This could be done, but is left +-- as a TODO. Currently this only affects equivalence checking as there is no +-- EVM bytecode to compare the FULL state, so such comparison could only be +-- generated via hevm itself +safeToDecomposeProp :: Prop -> Maybe () +safeToDecomposeProp p = void $ mapPropM' findPEqStore p + where + findPEqStore :: Prop -> Maybe Prop + findPEqStore = \case + (PEq (SStore {}) (SStore {})) -> Nothing + (PEq (AbstractStore {}) (SStore {})) -> Nothing + (PEq (SStore {}) (AbstractStore {})) -> Nothing + (PEq (AbstractStore {}) (AbstractStore {})) -> Nothing + a -> Just a + +-- This checks if the decomposition is possible by making sure there is no +-- mixture of different types of accesses such as array/map/small-slot. +safeToDecompose :: Expr a -> Maybe () +safeToDecompose inp = if result /= Mixed then Just () else Nothing where result = execState (safeToDecomposeRunner inp) UNK @@ -786,14 +808,20 @@ safeToDecompose inp = result /= Mixed safeToDecomposeRunner a = go a go :: forall b. Expr b -> State StorageType () - go e@(SLoad (MappingSlot {}) _) = setMap e - go e@(SLoad (ArraySlotZero {}) _) = setArray e - go e@(SLoad (ArraySlotWithOffset {}) _) = setArray e + go e@(SLoad (MappingSlot x _) _) = if BS.length x == 64 then setMap e else setMixed e + go e@(SLoad (Keccak x) _) = case bufLength x of + Lit 32 -> setArray e + Lit 64 -> setMap e + _ -> setMixed e + go e@(SLoad (ArraySlotWithOffset x _) _) = if BS.length x == 32 then setArray e else setMixed e go e@(SLoad (Lit x) _) | x < 256 = setSmall e go e@(SLoad _ _) = setMixed e - go e@(SStore (MappingSlot {}) _ _) = setMap e - go e@(SStore (ArraySlotZero {}) _ _) = setArray e - go e@(SStore (ArraySlotWithOffset {}) _ _) = setArray e + go e@(SStore (MappingSlot x _) _ _) = if BS.length x == 64 then setMap e else setMixed e + go e@(SStore (Keccak x) _ _) = case bufLength x of + Lit 32 -> setArray e + Lit 64 -> setMap e + _ -> setMixed e + go e@(SStore (ArraySlotWithOffset x _) _ _) = if BS.length x == 32 then setArray e else setMixed e go e@(SStore (Lit x) _ _) | x < 256 = setSmall e go e@(SStore _ _ _) = setMixed e go _ = pure () @@ -830,7 +858,7 @@ safeToDecompose inp = result /= Mixed -- | Splits storage into logical sub-stores if (1) all SLoad->SStore* chains are one of: -- (1a) Lit < 256, (1b) MappingSlot, (1c) ArraySlotWithOffset, (1d) ArraySlotZero -- and (2) there is no mixing of different types (e.g. Map with Array) within --- the same SStore -> SLoad* chain, and (3) there is no mixing of different array/map slots. +-- the same SStore -> SLoad* chain -- -- Mixing (2) and (3) are attempted to be prevented (if possible) as part of the rewrites -- done by the `readStorage` function that is ran before this. If there is still mixing here, @@ -843,7 +871,7 @@ decomposeStorage :: Expr a -> Maybe (Expr a) decomposeStorage = go where go :: Expr a -> Maybe (Expr a) - go e@(SLoad origKey store) = if Prelude.not (safeToDecompose e) then Nothing else tryRewrite origKey store + go (SLoad key store) = tryRewrite key store go e = Just e tryRewrite :: Expr EWord -> Expr Storage -> Maybe (Expr EWord) @@ -859,7 +887,13 @@ decomposeStorage = go inferLogicalIdx = \case Lit a | a >= 256 -> Nothing Lit a -> Just (Nothing, Lit a) + -- maps + (Keccak (ConcreteBuf k)) | BS.length k == 64 -> do + let key = idxToWord (BS.take 32 k) + idx = Lit $ idxToWord (BS.drop 32 k) + Just (Just key, idx) (MappingSlot idx key) | BS.length idx == 64 -> Just (Just $ idxToWord idx, key) + -- arrays (ArraySlotWithOffset idx offset) | BS.length idx == 32 -> Just (Just $ idxToWord64 idx, offset) (ArraySlotZero idx) | BS.length idx == 32 -> Just (Just $ idxToWord64 idx, Lit 0) _ -> Nothing @@ -873,12 +907,16 @@ decomposeStorage = go -- Updates the logical base store of the given expression if it is safe to do so setLogicalBase :: Maybe W256 -> Expr Storage -> Maybe (Expr Storage) - -- abstract bases get their logical idx set to the new value - setLogicalBase idx (AbstractStore addr _) = Just $ AbstractStore addr idx - setLogicalBase idx (SStore i v s) = do - b <- setLogicalBase idx s - (idx2, key2) <- inferLogicalIdx i - if idx == idx2 then Just (SStore key2 v b) else Nothing + setLogicalBase idx (AbstractStore addr Nothing) = Just $ AbstractStore addr idx + setLogicalBase idx (AbstractStore addr idx2) | idx == idx2 = Just $ AbstractStore addr idx + setLogicalBase _ (AbstractStore _ _) = internalError "we only rewrite idx once, on load" + setLogicalBase idx (SStore k v prevStorage) = do + (idx2, key2) <- inferLogicalIdx k + b <- setLogicalBase idx prevStorage + -- If it's not the same IDX, we can skip. This is possible because there are no + -- mixed arrays/maps/small-slots, as checked by safeToDecompose + if idx == idx2 then Just (SStore key2 v b) + else setLogicalBase idx b -- empty concrete base is safe to reuse without any rewriting setLogicalBase _ s@(ConcreteStore m) | Map.null m = Just s @@ -909,9 +947,6 @@ simplify e = if (mapExpr go e == e) go (CopySlice (Lit 0x0) (Lit 0x0) (Lit 0x0) _ dst) = dst -- simplify storage - go (Keccak (ConcreteBuf bs)) = case BS.length bs == 64 of - True -> MappingSlot bs (Lit $ word bs) - False -> Keccak (ConcreteBuf bs) go (SLoad slot store) = readStorage' slot store go (SStore slot val store) = writeStorage slot val store @@ -936,6 +971,10 @@ simplify e = if (mapExpr go e == e) go (WriteByte a b c) = writeByte a b c + -- eliminate a CopySlice if the resulting buffer is the same as the src buffer + go (CopySlice (Lit 0) (Lit 0) (Lit s) src (ConcreteBuf "")) + | bufLength src == (Lit s) = src + -- truncate some concrete source buffers to the portion relevant for the CopySlice if we're copying a fully concrete region go orig@(CopySlice srcOff@(Lit n) dstOff size@(Lit sz) -- It doesn't matter what wOffs we write to, because only the first diff --git a/src/EVM/SMT.hs b/src/EVM/SMT.hs index 22f2ed377..e635d28fb 100644 --- a/src/EVM/SMT.hs +++ b/src/EVM/SMT.hs @@ -17,7 +17,7 @@ import Data.List qualified as List import Data.List.NonEmpty (NonEmpty((:|))) import Data.List.NonEmpty qualified as NonEmpty import Data.String.Here -import Data.Maybe (fromJust, fromMaybe) +import Data.Maybe (fromJust, fromMaybe, isJust) import Data.Map.Strict (Map) import Data.Map.Strict qualified as Map import Data.Set (Set) @@ -210,7 +210,13 @@ assertProps :: Config -> [Prop] -> SMT2 assertProps conf ps = assertPropsNoSimp conf (decompose . Expr.simplifyProps $ ps) where decompose :: [Prop] -> [Prop] - decompose props = fromMaybe props (mapM (mapPropM Expr.decomposeStorage) props) + decompose props = if conf.decomposeStorage && (isJust $ sequence_ safeList1) && (isJust $ sequence_ safeList2) + then fromMaybe props (mapM (mapPropM Expr.decomposeStorage) props) + else props + where + -- All in these lists must be a `Just ()` or we cannot decompose + safeList1 = map (mapPropM_ Expr.safeToDecompose) props + safeList2 = map Expr.safeToDecomposeProp props -- Note: we need a version that does NOT call simplify, -- because we make use of it to verify the correctness of our simplification diff --git a/src/EVM/Traversals.hs b/src/EVM/Traversals.hs index 66bb1afa3..4e1e055ce 100644 --- a/src/EVM/Traversals.hs +++ b/src/EVM/Traversals.hs @@ -233,9 +233,44 @@ mapProp' f = \case POr a b -> f $ POr (mapProp' f a) (mapProp' f b) PImpl a b -> f $ PImpl (mapProp' f a) (mapProp' f b) + +mapPropM' :: forall m . (Monad m) => (Prop -> m Prop) -> Prop -> m Prop +mapPropM' f = \case + PBool b -> f $ PBool b + PEq a b -> f $ PEq a b + PLT a b -> f $ PLT a b + PGT a b -> f $ PGT a b + PLEq a b -> f $ PLEq a b + PGEq a b -> f $ PGEq a b + PNeg a -> do + x <-mapPropM' f a + f $ PNeg x + PAnd a b -> do + x <- mapPropM' f a + y <- (mapPropM' f b) + f $ PAnd x y + POr a b -> do + x <- mapPropM' f a + y <- (mapPropM' f b) + f $ POr x y + PImpl a b -> do + x <- mapPropM' f a + y <- (mapPropM' f b) + f $ PImpl x y + mapExpr :: (forall a . Expr a -> Expr a) -> Expr b -> Expr b mapExpr f expr = runIdentity (mapExprM (Identity . f) expr) +-- Like mapExprM but allows a function of type `Expr a -> m ()` to be passed +mapExprM_ :: Monad m => (forall a . Expr a -> m ()) -> Expr b -> m () +mapExprM_ f expr = void ret + where + ret = mapExprM (fUpd f) expr + fUpd :: Monad m => (Expr a -> m ()) -> (Expr a-> m (Expr a)) + fUpd action e = do + action e + pure e + mapExprM :: Monad m => (forall a . Expr a -> m (Expr a)) -> Expr b -> m (Expr b) mapExprM f expr = case expr of @@ -560,6 +595,16 @@ mapExprM f expr = case expr of a' <- mapExprM f a f (BufLength a') +-- Like mapPropM but allows a function of type `Expr a -> m ()` to be passed +mapPropM_ :: Monad m => (forall a . Expr a -> m ()) -> Prop -> m () +mapPropM_ f expr = void ret + where + ret = mapPropM (fUpd f) expr + fUpd :: Monad m => (Expr a -> m ()) -> (Expr a-> m (Expr a)) + fUpd action e = do + action e + pure e + mapPropM :: Monad m => (forall a . Expr a -> m (Expr a)) -> Prop -> m Prop mapPropM f = \case PBool b -> pure $ PBool b diff --git a/test/test.hs b/test/test.hs index f7102e7df..068ab7fe9 100644 --- a/test/test.hs +++ b/test/test.hs @@ -79,6 +79,7 @@ testEnv = Env { config = defaultConfig { , abstRefineArith = False , abstRefineMem = False , dumpTrace = False + , decomposeStorage = True } } putStrLnM :: (MonadUnliftIO m) => String -> m () @@ -185,6 +186,51 @@ tests = testGroup "hevm" |] expr <- withSolvers Z3 1 Nothing $ \s -> getExpr s c (Just (Sig "transfer(uint256)" [AbiUIntType 256])) [] (defaultVeriOpts { maxIter = Just 5 }) assertEqualM "Expression is not clean." (badStoresInExpr expr) False + , test "simplify-storage-map-newtest1" $ do + Just c <- solcRuntime "MyContract" + [i| + contract MyContract { + mapping (uint => uint) a; + mapping (uint => uint) b; + function fun(uint v, uint i) public { + require(i < 1000); + require(v < 1000); + b[i+v] = v+1; + a[i] = v; + b[i+1] = v+1; + assert(a[i] == v); + assert(b[i+1] == v+1); + } + } + |] + expr <- withSolvers Z3 1 Nothing $ \s -> getExpr s c (Just (Sig "fun(uint256,uint256)" [AbiUIntType 256, AbiUIntType 256])) [] defaultVeriOpts + assertEqualM "Expression is not clean." (badStoresInExpr expr) False + (_, [(Qed _)]) <- withSolvers Z3 1 Nothing $ \s -> checkAssert s [0x11] c (Just (Sig "fun(uint256,uint256)" [AbiUIntType 256, AbiUIntType 256])) [] defaultVeriOpts + liftIO $ putStrLn "OK" + , test "simplify-storage-map-todo" $ do + Just c <- solcRuntime "MyContract" + [i| + contract MyContract { + mapping (uint => uint) a; + mapping (uint => uint) b; + function fun(uint v, uint i) public { + require(i < 1000); + require(v < 1000); + a[i] = v; + b[i+1] = v+1; + b[i+v] = 55; // note: this can overwrite b[i+1], hence assert below can fail + assert(a[i] == v); + assert(b[i+1] == v+1); + } + } + |] + -- TODO: expression below contains (load idx1 (store idx1 (store idx1 (store idx0)))), and the idx0 + -- is not stripped. This is due to us not doing all we can in this case, see + -- note above readStorage. Decompose remedies this (when it can be decomposed) + -- expr <- withSolvers Z3 1 Nothing $ \s -> getExpr s c (Just (Sig "fun(uint256,uint256)" [AbiUIntType 256, AbiUIntType 256])) [] defaultVeriOpts + -- putStrLnM $ T.unpack $ formatExpr expr + (_, [Cex _]) <- withSolvers Z3 1 Nothing $ \s -> checkAssert s defaultPanicCodes c (Just (Sig "fun(uint256,uint256)" [AbiUIntType 256, AbiUIntType 256])) [] defaultVeriOpts + liftIO $ putStrLn "OK" , test "simplify-storage-array-loop-struct" $ do Just c <- solcRuntime "MyContract" [i| @@ -219,6 +265,7 @@ tests = testGroup "hevm" } |] expr <- withSolvers Z3 1 Nothing $ \s -> getExpr s c (Just (Sig "prove_mapping_access(address,address)" [AbiAddressType, AbiAddressType])) [] defaultVeriOpts + putStrLnM $ T.unpack $ formatExpr expr let simpExpr = mapExprM Expr.decomposeStorage expr -- putStrLnM $ T.unpack $ formatExpr (fromJust simpExpr) assertEqualM "Decompose did not succeed." (isJust simpExpr) True @@ -238,10 +285,7 @@ tests = testGroup "hevm" let simpExpr = mapExprM Expr.decomposeStorage expr -- putStrLnM $ T.unpack $ formatExpr (fromJust simpExpr) assertEqualM "Decompose did not succeed." (isJust simpExpr) True - -- NOTE: we can't do the rewrite below, because x could be very large, - -- which would allow us to overwrite *anything in the storage*, and that throws - -- everything off. - , expectFail $ test "decompose-3" $ do + , test "decompose-3" $ do Just c <- solcRuntime "MyContract" [i| contract MyContract { @@ -256,7 +300,6 @@ tests = testGroup "hevm" |] expr <- withSolvers Z3 1 Nothing $ \s -> getExpr s c (Just (Sig "prove_array(uint256,uint256,uint256,uint256)" [AbiUIntType 256, AbiUIntType 256, AbiUIntType 256, AbiUIntType 256])) [] defaultVeriOpts let simpExpr = mapExprM Expr.decomposeStorage expr - -- putStrLnM $ T.unpack $ formatExpr (fromJust simpExpr) assertEqualM "Decompose did not succeed." (isJust simpExpr) True , test "decompose-4-mixed" $ do Just c <- solcRuntime "MyContract" From 0766a6546a480164bbf131c12b15c586eb2edced Mon Sep 17 00:00:00 2001 From: Mate Soos Date: Mon, 11 Mar 2024 14:46:51 +0100 Subject: [PATCH 2/7] Fixing up use of sequence to `all` Thanks to @zoep for this --- src/EVM/SMT.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/EVM/SMT.hs b/src/EVM/SMT.hs index e635d28fb..09ff9944a 100644 --- a/src/EVM/SMT.hs +++ b/src/EVM/SMT.hs @@ -210,13 +210,13 @@ assertProps :: Config -> [Prop] -> SMT2 assertProps conf ps = assertPropsNoSimp conf (decompose . Expr.simplifyProps $ ps) where decompose :: [Prop] -> [Prop] - decompose props = if conf.decomposeStorage && (isJust $ sequence_ safeList1) && (isJust $ sequence_ safeList2) + decompose props = if conf.decomposeStorage && safeExprs && safeProps then fromMaybe props (mapM (mapPropM Expr.decomposeStorage) props) else props where -- All in these lists must be a `Just ()` or we cannot decompose - safeList1 = map (mapPropM_ Expr.safeToDecompose) props - safeList2 = map Expr.safeToDecomposeProp props + safeExprs = all (isJust . mapPropM_ Expr.safeToDecompose) props + safeProps = all (isJust . Expr.safeToDecomposeProp) props -- Note: we need a version that does NOT call simplify, -- because we make use of it to verify the correctness of our simplification From 494db132e1a5192f0117e23c4dfcef04fdc379f9 Mon Sep 17 00:00:00 2001 From: "Mate Soos @ Ethereum Foundation" <99662964+msooseth@users.noreply.github.com> Date: Mon, 11 Mar 2024 14:55:04 +0100 Subject: [PATCH 3/7] Update src/EVM/Traversals.hs Co-authored-by: Zoe Paraskevopoulou --- src/EVM/Traversals.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/EVM/Traversals.hs b/src/EVM/Traversals.hs index 4e1e055ce..64876ed48 100644 --- a/src/EVM/Traversals.hs +++ b/src/EVM/Traversals.hs @@ -243,7 +243,7 @@ mapPropM' f = \case PLEq a b -> f $ PLEq a b PGEq a b -> f $ PGEq a b PNeg a -> do - x <-mapPropM' f a + x <- mapPropM' f a f $ PNeg x PAnd a b -> do x <- mapPropM' f a From dc0000163fabe2f128f018ff87ba3f93aefce3d5 Mon Sep 17 00:00:00 2001 From: Mate Soos Date: Mon, 11 Mar 2024 14:56:06 +0100 Subject: [PATCH 4/7] Fixing use of parenthesis Thanks to @zoep for spotting this --- src/EVM/Traversals.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/EVM/Traversals.hs b/src/EVM/Traversals.hs index 64876ed48..1acc8c31e 100644 --- a/src/EVM/Traversals.hs +++ b/src/EVM/Traversals.hs @@ -247,15 +247,15 @@ mapPropM' f = \case f $ PNeg x PAnd a b -> do x <- mapPropM' f a - y <- (mapPropM' f b) + y <- mapPropM' f b f $ PAnd x y POr a b -> do x <- mapPropM' f a - y <- (mapPropM' f b) + y <- mapPropM' f b f $ POr x y PImpl a b -> do x <- mapPropM' f a - y <- (mapPropM' f b) + y <- mapPropM' f b f $ PImpl x y mapExpr :: (forall a . Expr a -> Expr a) -> Expr b -> Expr b From 24f531af4d4f2b433141ed90041f46b5b546a2e5 Mon Sep 17 00:00:00 2001 From: "Mate Soos @ Ethereum Foundation" <99662964+msooseth@users.noreply.github.com> Date: Mon, 11 Mar 2024 14:56:53 +0100 Subject: [PATCH 5/7] Update src/EVM/Traversals.hs Co-authored-by: Zoe Paraskevopoulou --- src/EVM/Traversals.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/EVM/Traversals.hs b/src/EVM/Traversals.hs index 1acc8c31e..3e0d55729 100644 --- a/src/EVM/Traversals.hs +++ b/src/EVM/Traversals.hs @@ -266,7 +266,7 @@ mapExprM_ :: Monad m => (forall a . Expr a -> m ()) -> Expr b -> m () mapExprM_ f expr = void ret where ret = mapExprM (fUpd f) expr - fUpd :: Monad m => (Expr a -> m ()) -> (Expr a-> m (Expr a)) + fUpd :: Monad m => (Expr a -> m ()) -> (Expr a -> m (Expr a)) fUpd action e = do action e pure e From 3f388df006af4b3cbddb8954d5117b6c1b6320bb Mon Sep 17 00:00:00 2001 From: Mate Soos Date: Tue, 12 Mar 2024 17:29:38 +0100 Subject: [PATCH 6/7] Fixing up as per @zoep's request --- src/EVM/Expr.hs | 34 +++++++++++++++++----------------- test/test.hs | 2 +- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/EVM/Expr.hs b/src/EVM/Expr.hs index 1e1ea30e6..62d981c62 100644 --- a/src/EVM/Expr.hs +++ b/src/EVM/Expr.hs @@ -634,12 +634,12 @@ readStorage w st = go (simplify w) st (MappingSlot idA keyA, MappingSlot idB keyB) | BS.length idB == 64 && BS.length idA == 64 && surelyNotEq keyA keyB -> go slot prev -- special case of array + map -> skip write - (ArraySlotWithOffset idA _, Keccak k) | bufLength k == Lit 64 && BS.length idA == 32 -> go slot prev + (ArraySlotWithOffs idA _, Keccak k) | bufLength k == Lit 64 && BS.length idA == 32 -> go slot prev (ArraySlotZero idA, Keccak k) | bufLength k == Lit 64 && BS.length idA == 32 -> go slot prev -- special case of map + array -> skip write - (Keccak k, ArraySlotWithOffset idA _) | bufLength k == Lit 64 && BS.length idA == 32 -> go slot prev - (ArraySlotWithOffset idA _, Keccak k) | bufLength k == Lit 64 && BS.length idA == 32 -> go slot prev + (Keccak k, ArraySlotWithOffs idA _) | bufLength k == Lit 64 && BS.length idA == 32 -> go slot prev + (ArraySlotWithOffs idA _, Keccak k) | bufLength k == Lit 64 && BS.length idA == 32 -> go slot prev -- Fixed SMALL value will never match Keccak (well, it might, but that's VERY low chance) (Lit a, Keccak _) | a < 256 -> go slot prev @@ -665,17 +665,17 @@ readStorage w st = go (simplify w) st -- case of array + array, but different id's or different concrete offsets -- zero offs vs zero offs - (ArraySlotZero idA, ArraySlotZero idB) | idA /= idB -> go slot prev + (ArraySlotZero idA, ArraySlotZero idB) | BS.length idA == 32, BS.length idB == 32, idA /= idB -> go slot prev -- zero offs vs non-zero offs - (ArraySlotZero idA, ArraySlotWithOffset idB _) | idA /= idB -> go slot prev - (ArraySlotZero _, ArraySlotWithOffset _ (Lit offB)) | offB /= 0 -> go slot prev + (ArraySlotZero idA, ArraySlotWithOffs idB _) | BS.length idA == 32, BS.length idB == 32, idA /= idB -> go slot prev + (ArraySlotZero idA, ArraySlotWithOffs idB (Lit offB)) | BS.length idA == 32, BS.length idB == 32, offB /= 0 -> go slot prev -- non-zero offs vs zero offs - (ArraySlotWithOffset idA _, ArraySlotZero idB) | idA /= idB -> go slot prev - (ArraySlotWithOffset _ (Lit offA), ArraySlotZero _) | offA /= 0 -> go slot prev + (ArraySlotWithOffs idA _, ArraySlotZero idB) | BS.length idA == 32, BS.length idB == 32, idA /= idB -> go slot prev + (ArraySlotWithOffs idA (Lit offA), ArraySlotZero idB) | BS.length idA == 32, BS.length idB == 32, offA /= 0 -> go slot prev -- non-zero offs vs non-zero offs - (ArraySlotWithOffset idA _, ArraySlotWithOffset idB _) | idA /= idB -> go slot prev + (ArraySlotWithOffs idA _, ArraySlotWithOffs idB _) | BS.length idA == 32, BS.length idB == 32, idA /= idB -> go slot prev - (ArraySlotWithOffset _ offA, ArraySlotWithOffset _ offB) | surelyNotEq offA offB -> go slot prev + (ArraySlotWithOffs idA offA, ArraySlotWithOffs idB offB) | BS.length idA == 32, BS.length idB == 32, surelyNotEq offA offB -> go slot prev -- we are unable to determine statically whether or not we can safely move deeper in the write chain, so return an abstract term _ -> Just $ SLoad slot s @@ -698,8 +698,8 @@ pattern MappingSlot idx key = Keccak (WriteWord (Lit 0) key (ConcreteBuf idx)) -- storage slots for arrays are determined by (keccak(bytes32(id)) + offset) -- note that `normArgs` puts the Lit as the 2nd argument to `Add` -pattern ArraySlotWithOffset :: ByteString -> Expr EWord -> Expr EWord -pattern ArraySlotWithOffset id offset = Add (Keccak (ConcreteBuf id)) offset +pattern ArraySlotWithOffs :: ByteString -> Expr EWord -> Expr EWord +pattern ArraySlotWithOffs id offset = Add (Keccak (ConcreteBuf id)) offset -- special pattern to match the 0th element because the `Add` term gets simplified out pattern ArraySlotZero :: ByteString -> Expr EWord @@ -724,7 +724,7 @@ structureArraySlots e = mapExpr go e where go :: Expr a -> Expr a go orig@(Lit key) = case litToArrayPreimage key of - Just (array, offset) -> ArraySlotWithOffset (slotPos array) (Lit offset) + Just (array, offset) -> ArraySlotWithOffs (slotPos array) (Lit offset) _ -> orig go a = a @@ -813,7 +813,7 @@ safeToDecompose inp = if result /= Mixed then Just () else Nothing Lit 32 -> setArray e Lit 64 -> setMap e _ -> setMixed e - go e@(SLoad (ArraySlotWithOffset x _) _) = if BS.length x == 32 then setArray e else setMixed e + go e@(SLoad (ArraySlotWithOffs x _) _) = if BS.length x == 32 then setArray e else setMixed e go e@(SLoad (Lit x) _) | x < 256 = setSmall e go e@(SLoad _ _) = setMixed e go e@(SStore (MappingSlot x _) _ _) = if BS.length x == 64 then setMap e else setMixed e @@ -821,7 +821,7 @@ safeToDecompose inp = if result /= Mixed then Just () else Nothing Lit 32 -> setArray e Lit 64 -> setMap e _ -> setMixed e - go e@(SStore (ArraySlotWithOffset x _) _ _) = if BS.length x == 32 then setArray e else setMixed e + go e@(SStore (ArraySlotWithOffs x _) _ _) = if BS.length x == 32 then setArray e else setMixed e go e@(SStore (Lit x) _ _) | x < 256 = setSmall e go e@(SStore _ _ _) = setMixed e go _ = pure () @@ -856,7 +856,7 @@ safeToDecompose inp = if result /= Mixed then Just () else Nothing pure () -- | Splits storage into logical sub-stores if (1) all SLoad->SStore* chains are one of: --- (1a) Lit < 256, (1b) MappingSlot, (1c) ArraySlotWithOffset, (1d) ArraySlotZero +-- (1a) Lit < 256, (1b) MappingSlot, (1c) ArraySlotWithOffs, (1d) ArraySlotZero -- and (2) there is no mixing of different types (e.g. Map with Array) within -- the same SStore -> SLoad* chain -- @@ -894,7 +894,7 @@ decomposeStorage = go Just (Just key, idx) (MappingSlot idx key) | BS.length idx == 64 -> Just (Just $ idxToWord idx, key) -- arrays - (ArraySlotWithOffset idx offset) | BS.length idx == 32 -> Just (Just $ idxToWord64 idx, offset) + (ArraySlotWithOffs idx offset) | BS.length idx == 32 -> Just (Just $ idxToWord64 idx, offset) (ArraySlotZero idx) | BS.length idx == 32 -> Just (Just $ idxToWord64 idx, Lit 0) _ -> Nothing diff --git a/test/test.hs b/test/test.hs index 068ab7fe9..43eee298a 100644 --- a/test/test.hs +++ b/test/test.hs @@ -4222,7 +4222,7 @@ genStorage sz = liftM3 SStore key val subStore genStorageKey :: Gen (Expr EWord) genStorageKey = frequency -- array slot - [ (4, liftM2 Expr.ArraySlotWithOffset (genByteStringKey 32) (genSmallLit 5)) + [ (4, liftM2 Expr.ArraySlotWithOffs (genByteStringKey 32) (genSmallLit 5)) , (4, fmap Expr.ArraySlotZero (genByteStringKey 32)) -- mapping slot , (8, liftM2 Expr.MappingSlot (genByteStringKey 64) (genSmallLit 5)) From 01ac760c25472bf007635d6d49da084b4c01eb32 Mon Sep 17 00:00:00 2001 From: Mate Soos Date: Tue, 12 Mar 2024 20:00:03 +0100 Subject: [PATCH 7/7] Thanks to @arcz for spotting these --- cli/cli.hs | 8 ++++---- src/EVM/Effects.hs | 20 ++++++++++---------- src/EVM/Expr.hs | 4 ++-- src/EVM/SMT.hs | 2 +- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/cli/cli.hs b/cli/cli.hs index f358298de..7b9679adf 100644 --- a/cli/cli.hs +++ b/cli/cli.hs @@ -97,7 +97,7 @@ data Command w , loopDetectionHeuristic :: w ::: LoopHeuristic "StackBased" "Which heuristic should be used to determine if we are in a loop: StackBased (default) or Naive" , abstractArithmetic :: w ::: Bool "Use abstraction-refinement for complicated arithmetic functions such as MulMod. This runs the solver first with abstraction turned on, and if it returns a potential counterexample, the counterexample is refined to make sure it is a counterexample for the actual (not the abstracted) problem" , abstractMemory :: w ::: Bool "Use abstraction-refinement for Memory. This runs the solver first with abstraction turned on, and if it returns a potential counterexample, the counterexample is refined to make sure it is a counterexample for the actual (not the abstracted) problem" - , nodecompose :: w ::: Bool "Don't decompose storage slots into separate arrays" + , noDecompose :: w ::: Bool "Don't decompose storage slots into separate arrays" } | Equivalence -- prove equivalence between two programs { codeA :: w ::: ByteString "Bytecode of the first program" @@ -117,7 +117,7 @@ data Command w , loopDetectionHeuristic :: w ::: LoopHeuristic "StackBased" "Which heuristic should be used to determine if we are in a loop: StackBased (default) or Naive" , abstractArithmetic :: w ::: Bool "Use abstraction-refinement for complicated arithmetic functions such as MulMod. This runs the solver first with abstraction turned on, and if it returns a potential counterexample, the counterexample is refined to make sure it is a counterexample for the actual (not the abstracted) problem" , abstractMemory :: w ::: Bool "Use abstraction-refinement for Memory. This runs the solver first with abstraction turned on, and if it returns a potential counterexample, the counterexample is refined to make sure it is a counterexample for the actual (not the abstracted) problem" - , nodecompose :: w ::: Bool "Don't decompose storage slots into separate arrays" + , noDecompose :: w ::: Bool "Don't decompose storage slots into separate arrays" } | Exec -- Execute a given program with specified env & calldata { code :: w ::: Maybe ByteString "Program bytecode" @@ -165,7 +165,7 @@ data Command w , loopDetectionHeuristic :: w ::: LoopHeuristic "StackBased" "Which heuristic should be used to determine if we are in a loop: StackBased (default) or Naive" , abstractArithmetic :: w ::: Bool "Use abstraction-refinement for complicated arithmetic functions such as MulMod. This runs the solver first with abstraction turned on, and if it returns a potential counterexample, the counterexample is refined to make sure it is a counterexample for the actual (not the abstracted) problem" , abstractMemory :: w ::: Bool "Use abstraction-refinement for Memory. This runs the solver first with abstraction turned on, and if it returns a potential counterexample, the counterexample is refined to make sure it is a counterexample for the actual (not the abstracted) problem" - , nodecompose :: w ::: Bool "Don't decompose storage slots into separate arrays" + , noDecompose :: w ::: Bool "Don't decompose storage slots into separate arrays" , numCexFuzz :: w ::: Integer "3" "Number of fuzzing tries to do to generate a counterexample (default: 3)" , askSmtIterations :: w ::: Integer "1" "Number of times we may revisit a particular branching point before we consult the smt solver to check reachability (default: 1)" } @@ -210,7 +210,7 @@ main = withUtf8 $ do , abstRefineMem = cmd.abstractMemory , abstRefineArith = cmd.abstractArithmetic , dumpTrace = cmd.trace - , decomposeStorage = Prelude.not cmd.nodecompose + , decomposeStorage = Prelude.not cmd.noDecompose } } case cmd of Version {} ->putStrLn getFullVersion diff --git a/src/EVM/Effects.hs b/src/EVM/Effects.hs index 681ae8f40..c0d3f8778 100644 --- a/src/EVM/Effects.hs +++ b/src/EVM/Effects.hs @@ -47,19 +47,19 @@ class Monad m => ReadConfig m where readConfig :: m Config data Config = Config - { dumpQueries :: Bool - , dumpExprs :: Bool - , dumpEndStates :: Bool - , debug :: Bool - , abstRefineArith :: Bool - , abstRefineMem :: Bool - , dumpTrace :: Bool - , numCexFuzz :: Integer + { dumpQueries :: Bool + , dumpExprs :: Bool + , dumpEndStates :: Bool + , debug :: Bool + , abstRefineArith :: Bool + , abstRefineMem :: Bool + , dumpTrace :: Bool + , numCexFuzz :: Integer -- Used to debug fuzzer in test.hs. It disables the SMT solver -- and uses the fuzzer ONLY to try to find a counterexample. -- Returns Unknown if the Cex cannot be found via fuzzing - , onlyCexFuzz :: Bool - , decomposeStorage:: Bool + , onlyCexFuzz :: Bool + , decomposeStorage :: Bool } deriving (Show, Eq) diff --git a/src/EVM/Expr.hs b/src/EVM/Expr.hs index 62d981c62..7ce07a56f 100644 --- a/src/EVM/Expr.hs +++ b/src/EVM/Expr.hs @@ -786,8 +786,8 @@ data StorageType = SmallSlot | Array | Map | Mixed | UNK -- as a TODO. Currently this only affects equivalence checking as there is no -- EVM bytecode to compare the FULL state, so such comparison could only be -- generated via hevm itself -safeToDecomposeProp :: Prop -> Maybe () -safeToDecomposeProp p = void $ mapPropM' findPEqStore p +safeToDecomposeProp :: Prop -> Bool +safeToDecomposeProp p = isJust $ mapPropM' findPEqStore p where findPEqStore :: Prop -> Maybe Prop findPEqStore = \case diff --git a/src/EVM/SMT.hs b/src/EVM/SMT.hs index 09ff9944a..5b7144fa2 100644 --- a/src/EVM/SMT.hs +++ b/src/EVM/SMT.hs @@ -216,7 +216,7 @@ assertProps conf ps = assertPropsNoSimp conf (decompose . Expr.simplifyProps $ p where -- All in these lists must be a `Just ()` or we cannot decompose safeExprs = all (isJust . mapPropM_ Expr.safeToDecompose) props - safeProps = all (isJust . Expr.safeToDecomposeProp) props + safeProps = all Expr.safeToDecomposeProp props -- Note: we need a version that does NOT call simplify, -- because we make use of it to verify the correctness of our simplification