diff --git a/src/4.1-data_struct.md b/src/4.1-data_struct.md index 8b44d2b..eecba98 100644 --- a/src/4.1-data_struct.md +++ b/src/4.1-data_struct.md @@ -1,5 +1,702 @@ # Chatper 4.1 Key Data Structures of Rust Compiler +## Toy Example + +**Source Rust code** +```rust +fn foo(x:i32) -> i32 { + x + 1 +} + +fn main() { + foo(1); +} +``` + +**HIR** +Obtain the HIR of the source code +``` +cargo rustc -- -Z unpretty=hir-tree +``` +``` +Crate { + owners: [ + Owner( + OwnerInfo { + nodes: OwnerNodes { + node: Some( + ParentedNode { + parent: 4294967040, + node: Crate( + Mod { + spans: ModSpans { + inner_span: src/main.rs:1:1: 7:2 (#0), + inject_use_span: no-location (#0), + }, + item_ids: [ + ItemId { + owner_id: DefId(0:1 ~ dangling_min[e946]::{use#0}), + }, + ItemId { + owner_id: DefId(0:2 ~ dangling_min[e946]::std), + }, + ItemId { + owner_id: DefId(0:3 ~ dangling_min[e946]::foo), + }, + ItemId { + owner_id: DefId(0:4 ~ dangling_min[e946]::main), + }, + ], + }, + ), + }, + ), + parents: [ + (0, Some(4294967040)), + ], + bodies: {}, + opt_hash_including_bodies: Some( + Fingerprint( + 15480548004602838890, + 12691971312735902839, + ), + ), + }, + parenting: { + DefId(0:2 ~ dangling_min[e946]::std): 0, + DefId(0:4 ~ dangling_min[e946]::main): 0, + DefId(0:1 ~ dangling_min[e946]::{use#0}): 0, + DefId(0:3 ~ dangling_min[e946]::foo): 0, + }, + attrs: AttributeMap { + map: {}, + opt_hash: Some( + Fingerprint( + 17025902295854411478, + 11375155654212205663, + ), + ), + }, + trait_map: {}, + }, + ), + Owner( + OwnerInfo { + nodes: OwnerNodes { + node: Some( + ParentedNode { + parent: 4294967040, + node: Item( + Item { + ident: #0, + owner_id: DefId(0:1 ~ dangling_min[e946]::{use#0}), + kind: Use( + Path { + span: no-location (#1), + res: [ + Err, + ], + segments: [ + PathSegment { + ident: std#1, + hir_id: HirId(DefId(0:1 ~ dangling_min[e946]::{use#0}).1), + res: Def( + Mod, + DefId(1:0 ~ std[7da8]), + ), + args: None, + infer_args: false, + }, + PathSegment { + ident: prelude#1, + hir_id: HirId(DefId(0:1 ~ dangling_min[e946]::{use#0}).2), + res: Def( + Mod, + DefId(1:47 ~ std[7da8]::prelude), + ), + args: None, + infer_args: false, + }, + PathSegment { + ident: rust_2021#1, + hir_id: HirId(DefId(0:1 ~ dangling_min[e946]::{use#0}).3), + res: Def( + Mod, + DefId(1:134 ~ std[7da8]::prelude::rust_2021), + ), + args: None, + infer_args: false, + }, + ], + }, + Glob, + ), + span: no-location (#1), + vis_span: no-location (#1), + }, + ), + }, + ), + parents: [ + (0, Some(4294967040)), + (1, Some(0)), + (2, Some(0)), + (3, Some(0)), + ], + bodies: {}, + opt_hash_including_bodies: Some( + Fingerprint( + 15533371358964327490, + 14426402185471474089, + ), + ), + }, + parenting: {}, + attrs: AttributeMap { + map: { + 0: [ + Attribute { + kind: Normal( + NormalAttr { + item: AttrItem { + path: Path { + span: no-location (#1), + segments: [ + PathSegment { + ident: prelude_import#1, + id: NodeId(2), + args: None, + }, + ], + tokens: None, + }, + args: Empty, + tokens: None, + }, + tokens: None, + }, + ), + id: AttrId(1), + style: Outer, + span: no-location (#1), + }, + ], + }, + opt_hash: Some( + Fingerprint( + 15938952493093053050, + 4411459381772721558, + ), + ), + }, + trait_map: {}, + }, + ), + Owner( + OwnerInfo { + nodes: OwnerNodes { + node: Some( + ParentedNode { + parent: 4294967040, + node: Item( + Item { + ident: std#1, + owner_id: DefId(0:2 ~ dangling_min[e946]::std), + kind: ExternCrate( + None, + ), + span: no-location (#1), + vis_span: no-location (#1), + }, + ), + }, + ), + parents: [ + (0, Some(4294967040)), + ], + bodies: {}, + opt_hash_including_bodies: Some( + Fingerprint( + 416651569354920430, + 15780430889442875071, + ), + ), + }, + parenting: {}, + attrs: AttributeMap { + map: { + 0: [ + Attribute { + kind: Normal( + NormalAttr { + item: AttrItem { + path: Path { + span: no-location (#1), + segments: [ + PathSegment { + ident: macro_use#1, + id: NodeId(7), + args: None, + }, + ], + tokens: None, + }, + args: Empty, + tokens: None, + }, + tokens: None, + }, + ), + id: AttrId(0), + style: Outer, + span: no-location (#1), + }, + ], + }, + opt_hash: Some( + Fingerprint( + 12847034408097978073, + 10435605145054749136, + ), + ), + }, + trait_map: {}, + }, + ), + Owner( + OwnerInfo { + nodes: OwnerNodes { + node: Some( + ParentedNode { + parent: 4294967040, + node: Item( + Item { + ident: foo#0, + owner_id: DefId(0:3 ~ dangling_min[e946]::foo), + kind: Fn( + FnSig { + header: FnHeader { + unsafety: Normal, + constness: NotConst, + asyncness: NotAsync, + abi: Rust, + }, + decl: FnDecl { + inputs: [ + Ty { + hir_id: HirId(DefId(0:3 ~ dangling_min[e946]::foo).9), + kind: Path( + Resolved( + None, + Path { + span: src/main.rs:1:10: 1:13 (#0), + res: PrimTy( + Int( + I32, + ), + ), + segments: [ + PathSegment { + ident: i32#0, + hir_id: HirId(DefId(0:3 ~ dangling_min[e946]::foo).10), + res: PrimTy( + Int( + I32, + ), + ), + args: None, + infer_args: false, + }, + ], + }, + ), + ), + span: src/main.rs:1:10: 1:13 (#0), + }, + ], + output: Return( + Ty { + hir_id: HirId(DefId(0:3 ~ dangling_min[e946]::foo).11), + kind: Path( + Resolved( + None, + Path { + span: src/main.rs:1:18: 1:21 (#0), + res: PrimTy( + Int( + I32, + ), + ), + segments: [ + PathSegment { + ident: i32#0, + hir_id: HirId(DefId(0:3 ~ dangling_min[e946]::foo).12), + res: PrimTy( + Int( + I32, + ), + ), + args: None, + infer_args: false, + }, + ], + }, + ), + ), + span: src/main.rs:1:18: 1:21 (#0), + }, + ), + c_variadic: false, + implicit_self: None, + lifetime_elision_allowed: false, + }, + span: src/main.rs:1:1: 1:21 (#0), + }, + Generics { + params: [], + predicates: [], + has_where_clause_predicates: false, + where_clause_span: src/main.rs:1:21: 1:21 (#0), + span: src/main.rs:1:7: 1:7 (#0), + }, + BodyId { + hir_id: HirId(DefId(0:3 ~ dangling_min[e946]::foo).8), + }, + ), + span: src/main.rs:1:1: 3:2 (#0), + vis_span: no-location (#0), + }, + ), + }, + ), + parents: [ + (0, Some(4294967040)), + (1, Some(0)), + (2, Some(1)), + (3, Some(7)), + (4, Some(3)), + (5, Some(4)), + (6, Some(3)), + (7, Some(8)), + (8, Some(0)), + (9, Some(0)), + (10, Some(9)), + (11, Some(0)), + (12, Some(11)), + ], + bodies: { + 8: Body { + params: [ + Param { + hir_id: HirId(DefId(0:3 ~ dangling_min[e946]::foo).1), + pat: Pat { + hir_id: HirId(DefId(0:3 ~ dangling_min[e946]::foo).2), + kind: Binding( + BindingAnnotation( + No, + Not, + ), + HirId(DefId(0:3 ~ dangling_min[e946]::foo).2), + x#0, + None, + ), + span: src/main.rs:1:8: 1:9 (#0), + default_binding_modes: true, + }, + ty_span: src/main.rs:1:10: 1:13 (#0), + span: src/main.rs:1:8: 1:13 (#0), + }, + ], + value: Expr { + hir_id: HirId(DefId(0:3 ~ dangling_min[e946]::foo).8), + kind: Block( + Block { + stmts: [], + expr: Some( + Expr { + hir_id: HirId(DefId(0:3 ~ dangling_min[e946]::foo).3), + kind: Binary( + Spanned { + node: Add, + span: src/main.rs:2:7: 2:8 (#0), + }, + Expr { + hir_id: HirId(DefId(0:3 ~ dangling_min[e946]::foo).4), + kind: Path( + Resolved( + None, + Path { + span: src/main.rs:2:5: 2:6 (#0), + res: Local( + HirId(DefId(0:3 ~ dangling_min[e946]::foo).2), + ), + segments: [ + PathSegment { + ident: x#0, + hir_id: HirId(DefId(0:3 ~ dangling_min[e946]::foo).5), + res: Local( + HirId(DefId(0:3 ~ dangling_min[e946]::foo).2), + ), + args: None, + infer_args: true, + }, + ], + }, + ), + ), + span: src/main.rs:2:5: 2:6 (#0), + }, + Expr { + hir_id: HirId(DefId(0:3 ~ dangling_min[e946]::foo).6), + kind: Lit( + Spanned { + node: Int( + 1, + Unsuffixed, + ), + span: src/main.rs:2:9: 2:10 (#0), + }, + ), + span: src/main.rs:2:9: 2:10 (#0), + }, + ), + span: src/main.rs:2:5: 2:10 (#0), + }, + ), + hir_id: HirId(DefId(0:3 ~ dangling_min[e946]::foo).7), + rules: DefaultBlock, + span: src/main.rs:1:22: 3:2 (#0), + targeted_by_break: false, + }, + None, + ), + span: src/main.rs:1:22: 3:2 (#0), + }, + generator_kind: None, + }, + }, + opt_hash_including_bodies: Some( + Fingerprint( + 16722617718150666664, + 17475955433999683263, + ), + ), + }, + parenting: {}, + attrs: AttributeMap { + map: {}, + opt_hash: Some( + Fingerprint( + 17025902295854411478, + 11375155654212205663, + ), + ), + }, + trait_map: {}, + }, + ), + Owner( + OwnerInfo { + nodes: OwnerNodes { + node: Some( + ParentedNode { + parent: 4294967040, + node: Item( + Item { + ident: main#0, + owner_id: DefId(0:4 ~ dangling_min[e946]::main), + kind: Fn( + FnSig { + header: FnHeader { + unsafety: Normal, + constness: NotConst, + asyncness: NotAsync, + abi: Rust, + }, + decl: FnDecl { + inputs: [], + output: DefaultReturn( + src/main.rs:5:10: 5:10 (#0), + ), + c_variadic: false, + implicit_self: None, + lifetime_elision_allowed: false, + }, + span: src/main.rs:5:1: 5:10 (#0), + }, + Generics { + params: [], + predicates: [], + has_where_clause_predicates: false, + where_clause_span: src/main.rs:5:10: 5:10 (#0), + span: src/main.rs:5:8: 5:8 (#0), + }, + BodyId { + hir_id: HirId(DefId(0:4 ~ dangling_min[e946]::main).7), + }, + ), + span: src/main.rs:5:1: 7:2 (#0), + vis_span: src/main.rs:5:1: 5:1 (#0), + }, + ), + }, + ), + parents: [ + (0, Some(4294967040)), + (1, Some(5)), + (2, Some(1)), + (3, Some(2)), + (4, Some(1)), + (5, Some(6)), + (6, Some(7)), + (7, Some(0)), + ], + bodies: { + 7: Body { + params: [], + value: Expr { + hir_id: HirId(DefId(0:4 ~ dangling_min[e946]::main).7), + kind: Block( + Block { + stmts: [ + Stmt { + hir_id: HirId(DefId(0:4 ~ dangling_min[e946]::main).5), + kind: Semi( + Expr { + hir_id: HirId(DefId(0:4 ~ dangling_min[e946]::main).1), + kind: Call( + Expr { + hir_id: HirId(DefId(0:4 ~ dangling_min[e946]::main).2), + kind: Path( + Resolved( + None, + Path { + span: src/main.rs:6:5: 6:8 (#0), + res: Def( + Fn, + DefId(0:3 ~ dangling_min[e946]::foo), + ), + segments: [ + PathSegment { + ident: foo#0, + hir_id: HirId(DefId(0:4 ~ dangling_min[e946]::main).3), + res: Def( + Fn, + DefId(0:3 ~ dangling_min[e946]::foo), + ), + args: None, + infer_args: true, + }, + ], + }, + ), + ), + span: src/main.rs:6:5: 6:8 (#0), + }, + [ + Expr { + hir_id: HirId(DefId(0:4 ~ dangling_min[e946]::main).4), + kind: Lit( + Spanned { + node: Int( + 1, + Unsuffixed, + ), + span: src/main.rs:6:9: 6:10 (#0), + }, + ), + span: src/main.rs:6:9: 6:10 (#0), + }, + ], + ), + span: src/main.rs:6:5: 6:11 (#0), + }, + ), + span: src/main.rs:6:5: 6:12 (#0), + }, + ], + expr: None, + hir_id: HirId(DefId(0:4 ~ dangling_min[e946]::main).6), + rules: DefaultBlock, + span: src/main.rs:5:11: 7:2 (#0), + targeted_by_break: false, + }, + None, + ), + span: src/main.rs:5:11: 7:2 (#0), + }, + generator_kind: None, + }, + }, + opt_hash_including_bodies: Some( + Fingerprint( + 10444852135074002406, + 10429703125625124906, + ), + ), + }, + parenting: {}, + attrs: AttributeMap { + map: {}, + opt_hash: Some( + Fingerprint( + 17025902295854411478, + 11375155654212205663, + ), + ), + }, + trait_map: {}, + }, + ), + ], + opt_hir_hash: Some( + Fingerprint( + 13530047477517720889, + 11555688454682141703, + ), + ), +} + +``` + + +**MIR** +Obtain the MIR of the source code +``` +cargo rustc -- -Zunpretty=mir +``` + +``` +fn foo(_1: i32) -> i32 { + debug x => _1; + let mut _0: i32; + let mut _2: (i32, bool); + + bb0: { + _2 = CheckedAdd(_1, const 1_i32); + assert(!move (_2.1: bool), "attempt to compute `{} + {}`, which would overflow", _1, const 1_i32) -> [success: bb1, unwind continue]; + } + + bb1: { + _0 = move (_2.0: i32); + return; + } +} + +fn main() -> () { + let mut _0: (); + let _1: i32; + + bb0: { + _1 = foo(const 1_i32) -> [return: bb1, unwind continue]; + } + + bb1: { + return; + } +} +``` + ## Terminalogies - [**TyCtxt**](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/struct.TyCtxt.html) is the central data structure of Rust compilers. We can obtain the hir or mir of a function based on the object. @@ -11,5 +708,3 @@ let mir = optimized_mir(def_id); // def_id is of type DefId - [**Local**](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/mir/struct.Local.html): - [**LocalDecl**](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/mir/struct.LocalDecl.html): -## MIR -