MCCFR training working on small shortdeck!!

krukah · krukah · commit f0f31302d323 · 2024-10-26T17:59:11.000-07:00
diff --git a/src/clustering/encoding.rs b/src/clustering/encoding.rs
@@ -104,11 +104,11 @@ impl Encoder {
     /// wrap the (Game, Bucket) in a Data
     pub fn encode(&self, game: Game, action: Action, past: &Vec<&Edge>) -> (Data, Edge) {
         let edge = Edge::from(action);
-        let chance = self.chance_abstraction(&game);
         let choice = self.action_abstraction(&past, &edge);
+        let chance = self.chance_abstraction(&game);
         let bucket = Bucket::from((choice, chance));
-        let choice = Data::from((game, bucket));
-        (choice, edge)
+        let data = Data::from((game, bucket));
+        (data, edge)
     }
 
     /// i like to think of this as "positional encoding"
@@ -118,25 +118,18 @@ impl Encoder {
     /// the cards we see at a Node are memoryless, but the
     /// Path represents "how we got here"
     ///
-    /// for 2-players, depth works okay but there are definitely tradeoffs:
-    /// - the same Card info at the same depth doesn't necessarily
-    /// allow for the same available actions. which is actually a breaking problem
-    /// since we assume all Nodes in the same Infoset have the same avaialble actions...
-    ///
     /// we need to assert that: any Nodes in the same Infoset have the
-    /// same available actions. in addition to depth, we should consider
-    /// whether we can Check, Raise, Fold, Call
+    /// same available actions. in addition to depth, we consider
+    /// whether or not we are in a Checkable or Foldable state.
     fn action_abstraction(&self, past: &Vec<&Edge>, edge: &Edge) -> Path {
-        match edge {
-            Edge::Random => Path::from(0),
-            Edge::Choice(_) => Path::from(
-                past.iter()
-                    .rev()
-                    .take_while(|edge| matches!(edge, Edge::Choice(_)))
-                    .count() as u64
-                    + 1,
-            ),
-        }
+        let mut round = past
+            .iter()
+            .chain(std::iter::once(&edge))
+            .rev()
+            .take_while(|e| e.is_choice());
+        let depth = round.clone().count();
+        let raise = round.any(|e| e.is_raise());
+        Path::from((depth, raise))
     }
 
     /// the compressed card information for an observation
@@ -191,16 +184,9 @@ impl From<Street> for Encoder {
 impl Encoder {
     /// indicates whether the abstraction table is already on disk
     pub fn done() -> bool {
-        [
-            "flop.abstraction.pgcopy",
-            "turn.abstraction.pgcopy",
-            "preflop.metric.pgcopy",
-            "flop.metric.pgcopy",
-            "turn.metric.pgcopy",
-            "river.metric.pgcopy",
-        ]
-        .iter()
-        .any(|file| std::path::Path::new(file).exists())
+        ["flop.abstraction.pgcopy", "turn.abstraction.pgcopy"]
+            .iter()
+            .any(|file| std::path::Path::new(file).exists())
     }
 
     /// pulls the entire pre-computed abstraction table
diff --git a/src/main.rs b/src/main.rs
@@ -6,7 +6,7 @@ fn main() {
     // The k-means earth mover's distance hand-clustering algorithm.
     clustering::encoding::Encoder::learn();
     // Monet Carlo counter-factual regret minimization. External sampling, alternating regret updates, linear weighting schedules.
-    mccfr::training::Blueprint::load().train();
+    mccfr::minimizer::Blueprint::load().train();
     // After 100s of CPU-days of training in the arena, the CPU is ready to see you.
     play::game::Game::play();
 }
diff --git a/src/mccfr/edge.rs b/src/mccfr/edge.rs
@@ -7,6 +7,19 @@ pub enum Edge {
     Random,
 }
 
+impl Edge {
+    pub fn is_raise(&self) -> bool {
+        if let Edge::Choice(action) = self {
+            matches!(action, Action::Raise(_) | Action::Shove(_))
+        } else {
+            false
+        }
+    }
+    pub fn is_choice(&self) -> bool {
+        matches!(self, Edge::Choice(_))
+    }
+}
+
 impl From<Action> for Edge {
     fn from(action: Action) -> Self {
         match action {
@@ -33,6 +46,7 @@ impl From<Edge> for u32 {
         }
     }
 }
+
 impl std::fmt::Display for Edge {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
diff --git a/src/mccfr/minimizer.rs b/src/mccfr/minimizer.rs
@@ -88,7 +88,6 @@ impl Blueprint {
     /// Build the Tree iteratively starting from the root node.
     /// This function uses a stack to simulate recursion and builds the tree in a depth-first manner.
     fn sample(&mut self) -> Sample {
-        log::info!("sampling tree");
         let mut tree = Tree::empty();
         let mut partition = Partition::new();
         let ref mut queue = Vec::new();
@@ -104,47 +103,39 @@ impl Blueprint {
             let head = tree.at(tail);
             self.visit(&head, queue, infos);
         }
-        println!("\n{}\n", self.profile);
-        println!("\n{}\n", tree);
         Sample(tree, partition)
     }
 
     /// Process a node: witness it for profile and partition if necessary,
     /// and add its children to the exploration queue.
+    /// under external sampling rules:
+    /// - explore ALL my options
+    /// - explore 1 of Chance
+    /// - explore 1 of Villain
     fn visit(&mut self, head: &Node, queue: &mut Vec<Branch>, infosets: &mut Partition) {
-        let explored = self.explore(head);
-        if head.player() == self.profile.walker() {
-            infosets.witness(head);
-        }
-        if head.player() != Player::chance() {
-            self.profile.witness(head, &explored);
-        }
-        for (tail, from) in explored {
-            queue.push((tail, from, head.index()));
-        }
-    }
-
-    /// generate children for a given node
-    /// under external sampling rules.
-    /// explore all MY options
-    /// but only 1 of Chance, 1 of Villain
-    fn explore(&self, node: &Node) -> Vec<(Data, Edge)> {
-        let children = self.children(node);
-        let walker = self.profile.walker();
         let chance = Player::chance();
-        let player = node.player();
-        if children.is_empty() {
+        let player = head.player();
+        let walker = self.profile.walker();
+        let children = self.children(head);
+        let sample = if children.is_empty() {
             vec![]
         } else if player == chance {
-            self.take_any(children, node)
-        } else if player == walker {
-            self.take_all(children, node)
+            self.sample_any(children, head)
         } else if player != walker {
-            self.take_one(children, node)
+            self.profile.witness(head, &children);
+            self.sample_one(children, head)
+        } else if player == walker {
+            infosets.witness(head);
+            self.profile.witness(head, &children);
+            self.sample_all(children, head)
         } else {
             panic!("at the disco")
+        };
+        for (tail, from) in sample {
+            queue.push((tail, from, head.index()));
         }
     }
+
     fn children(&self, node: &Node) -> Vec<(Data, Edge)> {
         const MAX_N_RAISE: usize = 2;
         let ref past = node.history();
@@ -173,14 +164,14 @@ impl Blueprint {
     // external sampling
 
     /// full exploration of my decision space Edges
-    fn take_all(&self, choices: Vec<(Data, Edge)>, _: &Node) -> Vec<(Data, Edge)> {
+    fn sample_all(&self, choices: Vec<(Data, Edge)>, _: &Node) -> Vec<(Data, Edge)> {
         assert!(choices
             .iter()
             .all(|(_, edge)| matches!(edge, Edge::Choice(_))));
         choices
     }
     /// uniform sampling of chance Edge
-    fn take_any(&self, mut choices: Vec<(Data, Edge)>, head: &Node) -> Vec<(Data, Edge)> {
+    fn sample_any(&self, mut choices: Vec<(Data, Edge)>, head: &Node) -> Vec<(Data, Edge)> {
         let ref mut rng = self.profile.rng(head);
         let n = choices.len();
         let choice = rng.gen_range(0..n);
@@ -189,7 +180,7 @@ impl Blueprint {
         vec![chosen]
     }
     /// Profile-weighted sampling of opponent Edge
-    fn take_one(&self, mut choices: Vec<(Data, Edge)>, head: &Node) -> Vec<(Data, Edge)> {
+    fn sample_one(&self, mut choices: Vec<(Data, Edge)>, head: &Node) -> Vec<(Data, Edge)> {
         let ref mut rng = self.profile.rng(head);
         let policy = choices
             .iter()
@@ -207,7 +198,7 @@ impl Blueprint {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::mccfr::training::Blueprint;
+    use crate::mccfr::minimizer::Blueprint;
     use petgraph::graph::NodeIndex;
 
     #[test]
diff --git a/src/mccfr/mod.rs b/src/mccfr/mod.rs
@@ -2,11 +2,11 @@ pub mod bucket;
 pub mod data;
 pub mod edge;
 pub mod info;
+pub mod minimizer;
 pub mod node;
 pub mod partition;
 pub mod path;
 pub mod player;
 pub mod profile;
 pub mod strategy;
-pub mod training;
 pub mod tree;
diff --git a/src/mccfr/node.rs b/src/mccfr/node.rs
@@ -110,12 +110,6 @@ impl<'tree> Node<'tree> {
                 .collect()
         }
     }
-    /// SAFETY:
-    /// we have logical assurance that lifetimes work out effectively:
-    /// 'info: 'node: 'tree
-    /// Info is created from a Node
-    /// Node is created from a Tree
-    /// Tree owns its Graph
     pub fn graph(&self) -> &'tree DiGraph<Data, Edge> {
         self.graph
     }
diff --git a/src/mccfr/path.rs b/src/mccfr/path.rs
@@ -1,6 +1,12 @@
 #[derive(Debug, Clone, Copy, Eq, Hash, PartialEq, Ord, PartialOrd)]
 pub struct Path(u64);
 
+impl From<(usize, bool)> for Path {
+    fn from((depth, raise): (usize, bool)) -> Self {
+        Path((depth as u64) << 1 | raise as u64)
+    }
+}
+
 impl From<u64> for Path {
     fn from(value: u64) -> Self {
         Path(value)
@@ -15,6 +21,6 @@ impl From<Path> for u64 {
 
 impl std::fmt::Display for Path {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "d{:02}", self.0)
+        write!(f, "H{:02}", self.0)
     }
 }
diff --git a/src/mccfr/profile.rs b/src/mccfr/profile.rs
@@ -13,6 +13,7 @@ use rand::rngs::SmallRng;
 use rand::SeedableRng;
 use std::collections::hash_map::DefaultHasher;
 use std::collections::BTreeMap;
+use std::collections::HashSet;
 use std::hash::Hash;
 use std::hash::Hasher;
 
@@ -42,30 +43,42 @@ impl Profile {
     /// increment Epoch counter
     /// and return current count
     pub fn next(&mut self) -> usize {
+        log::info!("{:>10}", self.iterations);
         self.iterations += 1;
         self.iterations
     }
     /// idempotent initialization of Profile
     /// at a given Node.
     ///
-    /// if we've already visited this Infoset,
-    /// then we can skip over it.
+    /// if we've already visited this Bucket,
+    /// then we just want to make sure that
+    /// the available outgoing Edges are consistent.
     ///
     /// otherwise, we initialize the strategy
     /// at this Node with uniform distribution
-    /// over its spawned support:
-    /// Data -> Vec<(Data, Edge)>.
+    /// over its outgoing Edges .
+    ///
+    /// @assertion
     pub fn witness(&mut self, node: &Node, children: &Vec<(Data, Edge)>) {
-        let n = children.len();
-        let uniform = 1. / n as Probability;
         let bucket = node.bucket();
-        for (_, edge) in children {
-            self.strategies
-                .entry(bucket.clone())
-                .or_insert_with(BTreeMap::default)
-                .entry(edge.clone())
-                .or_insert_with(Strategy::default)
-                .policy = uniform;
+        match self.strategies.get(bucket) {
+            Some(strategy) => {
+                let expected = children.iter().map(|(_, e)| e).collect::<HashSet<_>>();
+                let observed = strategy.keys().collect::<HashSet<_>>();
+                assert!(observed == expected);
+            }
+            None => {
+                let n = children.len();
+                let uniform = 1. / n as Probability;
+                for (_, edge) in children {
+                    self.strategies
+                        .entry(bucket.clone())
+                        .or_insert_with(BTreeMap::default)
+                        .entry(edge.clone())
+                        .or_insert_with(Strategy::default)
+                        .policy = uniform;
+                }
+            }
         }
     }
 
@@ -89,10 +102,10 @@ impl Profile {
         let epochs = self.epochs();
         for (action, policy) in vector {
             let strategy = self.strategy(bucket, action);
-            strategy.policy = *policy;
             strategy.advice *= epochs as Probability;
             strategy.advice += policy;
             strategy.advice /= epochs as Probability + 1.;
+            strategy.policy = *policy;
         }
     }
 
@@ -142,7 +155,7 @@ impl Profile {
     /// division by 2 is used to allow each player
     /// one iteration to walk the Tree in a single Epoch
     pub fn epochs(&self) -> usize {
-        self.iterations
+        self.iterations / 2
     }
     /// which player is traversing the Tree on this Epoch?
     /// used extensively in assertions and utility calculations
@@ -158,7 +171,7 @@ impl Profile {
     /// emulate the "opponent" strategy. the opponent is just whoever is not
     /// the traverser
     pub fn policy(&self, node: &Node, edge: &Edge) -> Probability {
-        assert!(node.player() != Player::chance().to_owned());
+        assert!(node.player() != Player::chance());
         assert!(node.player() != self.walker());
         self.strategies
             .get(node.bucket())
@@ -413,7 +426,7 @@ impl Profile {
     }
 }
 impl std::fmt::Display for Profile {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
         write!(
             f,
             "{}",
diff --git a/src/mccfr/tree.rs b/src/mccfr/tree.rs
@@ -49,7 +49,7 @@ impl Tree {
                 .0
                 .edge_weight(self.0.find_edge(index, *child).unwrap())
                 .unwrap();
-            writeln!(f, "{}{}──{} ->   {}", prefix, stem, edge, head)?;
+            writeln!(f, "{}{}──{} → {}", prefix, stem, edge, head)?;
             self.draw(f, *child, &format!("{}{}", prefix, gaps))?;
         }
         Ok(())

Original file line number	Diff line number	Diff line change
`@@ -6,7 +6,7 @@ fn main() {`
`6`	`6`	`// The k-means earth mover's distance hand-clustering algorithm.`
`7`	`7`	`clustering::encoding::Encoder::learn();`
`8`	`8`	`// Monet Carlo counter-factual regret minimization. External sampling, alternating regret updates, linear weighting schedules.`
`9`		`- mccfr::training::Blueprint::load().train();`
	`9`	`+ mccfr::minimizer::Blueprint::load().train();`
`10`	`10`	`// After 100s of CPU-days of training in the arena, the CPU is ready to see you.`
`11`	`11`	`play::game::Game::play();`
`12`	`12`	`}`
Original file line number	Diff line number	Diff line change
`@@ -110,12 +110,6 @@ impl<'tree> Node<'tree> {`
`110`	`110`	`.collect()`
`111`	`111`	`}`
`112`	`112`	`}`
`113`		`- /// SAFETY:`
`114`		`- /// we have logical assurance that lifetimes work out effectively:`
`115`		`- /// 'info: 'node: 'tree`
`116`		`- /// Info is created from a Node`
`117`		`- /// Node is created from a Tree`
`118`		`- /// Tree owns its Graph`
`119`	`113`	`pub fn graph(&self) -> &'tree DiGraph<Data, Edge> {`
`120`	`114`	`self.graph`
`121`	`115`	`}`
Original file line number	Diff line number	Diff line change
`@@ -49,7 +49,7 @@ impl Tree {`
`49`	`49`	`.0`
`50`	`50`	`.edge_weight(self.0.find_edge(index, *child).unwrap())`
`51`	`51`	`.unwrap();`
`52`		`- writeln!(f, "{}{}──{} -> {}", prefix, stem, edge, head)?;`
	`52`	`+ writeln!(f, "{}{}──{} → {}", prefix, stem, edge, head)?;`
`53`	`53`	`self.draw(f, *child, &format!("{}{}", prefix, gaps))?;`
`54`	`54`	`}`
`55`	`55`	`Ok(())`