From a9b9771b92371f0cfe0d13dd0aec39c1f159ed42 Mon Sep 17 00:00:00 2001
From: Walter Behmann <walter@crate.io>
Date: Wed, 20 Aug 2025 00:32:49 +0200
Subject: [PATCH 01/18] Admin/XMover: Add XMover - CrateDB Shard Analyzer and
 Movement Tool

---
 CHANGES.md                               |    2 +
 cratedb_toolkit/admin/__init__.py        |    0
 cratedb_toolkit/admin/xmover/__init__.py |   10 +
 cratedb_toolkit/admin/xmover/analyzer.py | 1005 +++++++++++++++
 cratedb_toolkit/admin/xmover/attic.py    |  118 ++
 cratedb_toolkit/admin/xmover/cli.py      | 1431 ++++++++++++++++++++++
 cratedb_toolkit/admin/xmover/database.py |  584 +++++++++
 cratedb_toolkit/cli.py                   |    2 +
 doc/admin/index.md                       |    7 +
 doc/admin/xmover/handbook.md             |  487 ++++++++
 doc/admin/xmover/index.md                |   29 +
 doc/admin/xmover/queries.md              |  212 ++++
 doc/admin/xmover/troubleshooting.md      |  424 +++++++
 doc/index.md                             |    1 +
 pyproject.toml                           |    2 +
 15 files changed, 4314 insertions(+)
 create mode 100644 cratedb_toolkit/admin/__init__.py
 create mode 100644 cratedb_toolkit/admin/xmover/__init__.py
 create mode 100644 cratedb_toolkit/admin/xmover/analyzer.py
 create mode 100644 cratedb_toolkit/admin/xmover/attic.py
 create mode 100644 cratedb_toolkit/admin/xmover/cli.py
 create mode 100644 cratedb_toolkit/admin/xmover/database.py
 create mode 100644 doc/admin/index.md
 create mode 100644 doc/admin/xmover/handbook.md
 create mode 100644 doc/admin/xmover/index.md
 create mode 100644 doc/admin/xmover/queries.md
 create mode 100644 doc/admin/xmover/troubleshooting.md

diff --git a/CHANGES.md b/CHANGES.md
index d47196a2..7ef70478 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,6 +1,8 @@
 # Changelog
 
 ## Unreleased
+- Admin: Added XMover - CrateDB Shard Analyzer and Movement Tool.
+  Thanks, @WalBeh.
 
 ## 2025/08/19 v0.0.41
 - I/O: Updated to `influxio-0.6.0`. Thanks, @ZillKhan.
diff --git a/cratedb_toolkit/admin/__init__.py b/cratedb_toolkit/admin/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/cratedb_toolkit/admin/xmover/__init__.py b/cratedb_toolkit/admin/xmover/__init__.py
new file mode 100644
index 00000000..b941f602
--- /dev/null
+++ b/cratedb_toolkit/admin/xmover/__init__.py
@@ -0,0 +1,10 @@
+"""
+XMover - CrateDB Shard Analyzer and Movement Tool
+
+A tool for analyzing CrateDB shard distribution across nodes and availability zones,
+and generating safe SQL commands for shard rebalancing.
+"""
+
+__version__ = "0.1.0"
+__author__ = "CrateDB Tools"
+__description__ = "CrateDB shard analyzer and movement tool"
\ No newline at end of file
diff --git a/cratedb_toolkit/admin/xmover/analyzer.py b/cratedb_toolkit/admin/xmover/analyzer.py
new file mode 100644
index 00000000..75af9090
--- /dev/null
+++ b/cratedb_toolkit/admin/xmover/analyzer.py
@@ -0,0 +1,1005 @@
+"""
+Shard analysis and rebalancing logic for CrateDB
+"""
+
+from typing import Dict, List, Optional, Set, Any, Tuple
+from dataclasses import dataclass
+from collections import defaultdict
+import math
+
+from .database import CrateDBClient, NodeInfo, ShardInfo, RecoveryInfo
+
+
+@dataclass
+class MoveRecommendation:
+    """Recommendation for moving a shard"""
+    table_name: str
+    schema_name: str
+    shard_id: int
+    from_node: str
+    to_node: str
+    from_zone: str
+    to_zone: str
+    shard_type: str
+    size_gb: float
+    reason: str
+
+    def to_sql(self) -> str:
+        """Generate the SQL command for this move"""
+        return (f'ALTER TABLE "{self.schema_name}"."{self.table_name}" '
+                f"REROUTE MOVE SHARD {self.shard_id} "
+                f"FROM '{self.from_node}' TO '{self.to_node}';")
+
+    @property
+    def safety_score(self) -> float:
+        """Calculate a safety score for this move (0-1, higher is safer)"""
+        score = 1.0
+
+        # Penalize if moving to same zone (not ideal for zone distribution)
+        if self.from_zone == self.to_zone:
+            score -= 0.3
+
+        # Bonus for zone balancing moves
+        if "rebalancing" in self.reason.lower():
+            score += 0.2
+
+        # Ensure score stays in valid range
+        return max(0.0, min(1.0, score))
+
+
+@dataclass
+class DistributionStats:
+    """Statistics about shard distribution"""
+    total_shards: int
+    total_size_gb: float
+    zones: Dict[str, int]
+    nodes: Dict[str, int]
+    zone_balance_score: float  # 0-100, higher is better
+    node_balance_score: float  # 0-100, higher is better
+
+
+class ShardAnalyzer:
+    """Analyzer for CrateDB shard distribution and rebalancing"""
+
+    def __init__(self, client: CrateDBClient):
+        self.client = client
+        self.nodes: List[NodeInfo] = []
+        self.shards: List[ShardInfo] = []
+        
+        # Initialize session-based caches for performance
+        self._zone_conflict_cache = {}
+        self._node_lookup_cache = {}
+        self._target_nodes_cache = {}
+        self._cache_hits = 0
+        self._cache_misses = 0
+        
+        self._refresh_data()
+
+    def _refresh_data(self):
+        """Refresh node and shard data from the database"""
+        self.nodes = self.client.get_nodes_info()
+        # For analysis, get all shards regardless of state
+        self.shards = self.client.get_shards_info(for_analysis=True)
+
+    def analyze_distribution(self, table_name: Optional[str] = None) -> DistributionStats:
+        """Analyze the current shard distribution"""
+        # Filter shards by table if specified
+        shards = self.shards
+        if table_name:
+            shards = [s for s in shards if s.table_name == table_name]
+
+        if not shards:
+            return DistributionStats(0, 0.0, {}, {}, 100.0, 100.0)
+
+        total_shards = len(shards)
+        total_size_gb = sum(s.size_gb for s in shards)
+
+        # Count by zone and node
+        zone_counts = defaultdict(int)
+        node_counts = defaultdict(int)
+
+        for shard in shards:
+            zone_counts[shard.zone] += 1
+            node_counts[shard.node_name] += 1
+
+        # Calculate balance scores
+        zone_balance_score = self._calculate_balance_score(list(zone_counts.values()))
+        node_balance_score = self._calculate_balance_score(list(node_counts.values()))
+
+        return DistributionStats(
+            total_shards=total_shards,
+            total_size_gb=total_size_gb,
+            zones=dict(zone_counts),
+            nodes=dict(node_counts),
+            zone_balance_score=zone_balance_score,
+            node_balance_score=node_balance_score
+        )
+
+    def _calculate_balance_score(self, counts: List[int]) -> float:
+        """Calculate a balance score (0-100) for a distribution"""
+        if not counts or len(counts) <= 1:
+            return 100.0
+
+        mean_count = sum(counts) / len(counts)
+        if mean_count == 0:
+            return 100.0
+
+        # Calculate coefficient of variation
+        variance = sum((count - mean_count) ** 2 for count in counts) / len(counts)
+        std_dev = math.sqrt(variance)
+        cv = std_dev / mean_count
+
+        # Convert to score (lower CV = higher score)
+        # CV of 0 = 100%, CV of 1 = ~37%, CV of 2 = ~14%
+        score = max(0, 100 * math.exp(-cv))
+        return round(score, 1)
+
+    def find_moveable_shards(self,
+                           min_size_gb: float = 40.0,
+                           max_size_gb: float = 60.0,
+                           table_name: Optional[str] = None) -> List[ShardInfo]:
+        """Find shards that are candidates for moving based on size
+
+        Only returns healthy shards that are safe to move.
+        Prioritizes shards from nodes with less available space.
+        """
+        # Get only healthy shards (STARTED + 100% recovered) for safe operations
+        healthy_shards = self.client.get_shards_info(
+            table_name=table_name,
+            min_size_gb=min_size_gb,
+            max_size_gb=max_size_gb,
+            for_analysis=False  # Only operational shards
+        )
+
+
+        # Create a mapping of node names to available space
+        node_space_map = {node.name: node.available_space_gb for node in self.nodes}
+
+        # Sort by node available space (ascending, so low space nodes first), then by shard size
+        healthy_shards.sort(key=lambda s: (node_space_map.get(s.node_name, float('inf')), s.size_gb))
+        return healthy_shards
+
+    def check_zone_balance(self,
+                          table_name: Optional[str] = None,
+                          tolerance_percent: float = 10.0) -> Dict[str, Dict[str, int]]:
+        """Check if zones are balanced within tolerance"""
+        # Filter shards by table if specified
+        shards = self.shards
+        if table_name:
+            shards = [s for s in shards if s.table_name == table_name]
+
+        # Count shards by zone and type
+        zone_stats = defaultdict(lambda: {'PRIMARY': 0, 'REPLICA': 0, 'TOTAL': 0})
+
+        for shard in shards:
+            shard_type = shard.shard_type
+            zone_stats[shard.zone][shard_type] += 1
+            zone_stats[shard.zone]['TOTAL'] += 1
+
+        return dict(zone_stats)
+
+    def find_nodes_with_capacity(self,
+                                required_space_gb: float,
+                                exclude_zones: Optional[Set[str]] = None,
+                                exclude_nodes: Optional[Set[str]] = None,
+                                min_free_space_gb: float = 100.0,
+                                max_disk_usage_percent: float = 85.0) -> List[NodeInfo]:
+        """Find nodes that have capacity for additional shards
+
+        Args:
+            required_space_gb: Minimum space needed for the shard
+            exclude_zones: Zones to exclude from consideration
+            exclude_nodes: Specific nodes to exclude
+            min_free_space_gb: Additional buffer space required
+            max_disk_usage_percent: Maximum disk usage percentage allowed
+        """
+        available_nodes = []
+
+        for node in self.nodes:
+            # Skip zones we want to exclude
+            if exclude_zones and node.zone in exclude_zones:
+                continue
+
+            # Skip specific nodes we want to exclude
+            if exclude_nodes and node.name in exclude_nodes:
+                continue
+
+            # Check disk usage threshold
+            if node.disk_usage_percent > max_disk_usage_percent:
+                continue
+
+            # Check if node has enough free space
+            free_space_gb = node.available_space_gb
+            if free_space_gb >= (required_space_gb + min_free_space_gb):
+                available_nodes.append(node)
+            else:
+                continue
+
+        # Sort by available space (most space first) - prioritize nodes with more free space
+        available_nodes.sort(key=lambda n: n.available_space_gb, reverse=True)
+        return available_nodes
+
+    def generate_rebalancing_recommendations(self,
+                                           table_name: Optional[str] = None,
+                                           min_size_gb: float = 40.0,
+                                           max_size_gb: float = 60.0,
+                                           zone_tolerance_percent: float = 10.0,
+                                           min_free_space_gb: float = 100.0,
+                                           max_recommendations: int = 10,
+                                           prioritize_space: bool = False,
+                                           source_node: Optional[str] = None,
+                                           max_disk_usage_percent: float = 90.0) -> List[MoveRecommendation]:
+        """Generate recommendations for rebalancing shards
+
+        Args:
+            prioritize_space: If True, prioritizes moving shards from nodes with less available space
+                             regardless of zone balance. If False, prioritizes zone balancing first.
+            source_node: If specified, only generate recommendations for shards on this node
+            max_disk_usage_percent: Maximum disk usage percentage for target nodes
+        """
+        recommendations = []
+
+        # Get moveable shards (only healthy ones for actual operations)
+        moveable_shards = self.find_moveable_shards(min_size_gb, max_size_gb, table_name)
+        
+        print(f"Analyzing {len(moveable_shards)} candidate shards in size range {min_size_gb}-{max_size_gb}GB...")
+
+        if not moveable_shards:
+            return recommendations
+
+        # Analyze current zone balance
+        zone_stats = self.check_zone_balance(table_name, zone_tolerance_percent)
+
+        # Calculate target distribution
+        total_shards = sum(stats['TOTAL'] for stats in zone_stats.values())
+        zones = list(zone_stats.keys())
+        target_per_zone = total_shards // len(zones) if zones else 0
+
+        # Find zones that are over/under capacity
+        overloaded_zones = []
+        underloaded_zones = []
+
+        for zone, stats in zone_stats.items():
+            current_count = stats['TOTAL']
+            threshold_high = target_per_zone * (1 + zone_tolerance_percent / 100)
+            threshold_low = target_per_zone * (1 - zone_tolerance_percent / 100)
+            
+            if current_count > threshold_high:
+                overloaded_zones.append(zone)
+            elif current_count < threshold_low:
+                underloaded_zones.append(zone)
+
+        # Optimize processing: if filtering by source node, only process those shards
+        if source_node:
+            processing_shards = [s for s in moveable_shards if s.node_name == source_node]
+            print(f"Focusing on {len(processing_shards)} shards from node {source_node}")
+        else:
+            processing_shards = moveable_shards
+
+        # Generate move recommendations
+        safe_recommendations = 0
+        total_evaluated = 0
+        
+        for i, shard in enumerate(processing_shards):
+            if len(recommendations) >= max_recommendations:
+                break
+
+            # Show progress every 50 shards when processing many
+            if len(processing_shards) > 100 and i > 0 and i % 50 == 0:
+                print(".", end="", flush=True)
+            
+            total_evaluated += 1
+            
+            # Skip based on priority mode
+            if not prioritize_space:
+                # Zone balancing mode: only move shards from overloaded zones
+                if shard.zone not in overloaded_zones:
+                    continue
+            # In space priority mode, consider all shards regardless of zone balance
+
+            # Find target nodes, excluding the source node and prioritizing by available space (with caching)
+            target_nodes = self._find_nodes_with_capacity_cached(
+                required_space_gb=shard.size_gb,
+                exclude_nodes={shard.node_name},  # Don't move to same node
+                min_free_space_gb=min_free_space_gb,
+                max_disk_usage_percent=max_disk_usage_percent
+            )
+
+            # Quick pre-filter to avoid expensive safety validations
+            # Only check nodes in different zones (for zone balancing)
+            if not prioritize_space:
+                target_nodes = [node for node in target_nodes if node.zone != shard.zone]
+            
+            # Limit to top 3 candidates to reduce validation overhead
+            target_nodes = target_nodes[:3]
+
+            # Filter target nodes to find safe candidates
+            safe_target_nodes = []
+            for candidate_node in target_nodes:
+                # Create a temporary recommendation to test safety
+                temp_rec = MoveRecommendation(
+                    table_name=shard.table_name,
+                    schema_name=shard.schema_name,
+                    shard_id=shard.shard_id,
+                    from_node=shard.node_name,
+                    to_node=candidate_node.name,
+                    from_zone=shard.zone,
+                    to_zone=candidate_node.zone,
+                    shard_type=shard.shard_type,
+                    size_gb=shard.size_gb,
+                    reason="Safety validation"
+                )
+                
+                # Check if this move would be safe
+                is_safe, safety_msg = self.validate_move_safety(temp_rec, max_disk_usage_percent)
+                if is_safe:
+                    safe_target_nodes.append(candidate_node)
+
+            if not safe_target_nodes:
+                continue  # No safe targets found, skip this shard
+
+            if prioritize_space:
+                # Space priority mode: choose node with most available space
+                target_node = safe_target_nodes[0]  # Already sorted by available space (desc)
+            else:
+                # Zone balance mode: prefer underloaded zones, then available space
+                target_zones = set(underloaded_zones) - {shard.zone}
+                preferred_nodes = [n for n in safe_target_nodes if n.zone in target_zones]
+                other_nodes = [n for n in safe_target_nodes if n.zone not in target_zones]
+
+                # Choose target node with intelligent priority:
+                # 1. If a node has significantly more space (2x) than zone-preferred nodes, prioritize space
+                # 2. Otherwise, prefer zone balancing first, then available space
+                target_node = None
+
+                if preferred_nodes and other_nodes:
+                    best_preferred = preferred_nodes[0]  # Most space in preferred zones
+                    best_other = other_nodes[0]  # Most space in other zones
+
+                    # If the best "other" node has significantly more space (2x), choose it
+                    if best_other.available_space_gb >= (best_preferred.available_space_gb * 2):
+                        target_node = best_other
+                    else:
+                        target_node = best_preferred
+                elif preferred_nodes:
+                    target_node = preferred_nodes[0]
+                elif other_nodes:
+                    target_node = other_nodes[0]
+                else:
+                    continue  # No suitable target found
+
+            # Determine the reason for the move
+            if prioritize_space:
+                if shard.zone == target_node.zone:
+                    reason = f"Space optimization within {shard.zone}"
+                else:
+                    reason = f"Space optimization: {shard.zone} -> {target_node.zone}"
+            else:
+                reason = f"Zone rebalancing: {shard.zone} -> {target_node.zone}"
+                if shard.zone == target_node.zone:
+                    reason = f"Node balancing within {shard.zone}"
+
+            recommendation = MoveRecommendation(
+                table_name=shard.table_name,
+                schema_name=shard.schema_name,
+                shard_id=shard.shard_id,
+                from_node=shard.node_name,
+                to_node=target_node.name,
+                from_zone=shard.zone,
+                to_zone=target_node.zone,
+                shard_type=shard.shard_type,
+                size_gb=shard.size_gb,
+                reason=reason
+            )
+
+            recommendations.append(recommendation)
+
+        if len(processing_shards) > 100:
+            print()  # New line after progress dots
+        print(f"Generated {len(recommendations)} move recommendations (evaluated {total_evaluated} shards)")
+        print(f"Performance: {self.get_cache_stats()}")
+        return recommendations
+
+    def validate_move_safety(self, recommendation: MoveRecommendation, 
+                           max_disk_usage_percent: float = 90.0) -> Tuple[bool, str]:
+        """Validate that a move recommendation is safe to execute"""
+        # Find target node (with caching)
+        target_node = self._get_node_cached(recommendation.to_node)
+
+        if not target_node:
+            return False, f"Target node '{recommendation.to_node}' not found"
+
+        # Check for zone conflicts (same shard already exists in target zone) - with caching
+        zone_conflict = self._check_zone_conflict_cached(recommendation)
+        if zone_conflict:
+            return False, zone_conflict
+
+        # Check available space
+        required_space_gb = recommendation.size_gb + 50  # 50GB buffer
+        if target_node.available_space_gb < required_space_gb:
+            return False, f"Insufficient space on target node (need {required_space_gb:.1f}GB, have {target_node.available_space_gb:.1f}GB)"
+
+        # Check disk usage
+        if target_node.disk_usage_percent > max_disk_usage_percent:
+            return False, f"Target node disk usage too high ({target_node.disk_usage_percent:.1f}%)"
+
+        return True, "Move appears safe"
+    
+    def _get_node_cached(self, node_name: str):
+        """Get node by name with caching"""
+        if node_name in self._node_lookup_cache:
+            self._cache_hits += 1
+            return self._node_lookup_cache[node_name]
+        
+        # Find node (cache miss)
+        self._cache_misses += 1
+        target_node = None
+        for node in self.nodes:
+            if node.name == node_name:
+                target_node = node
+                break
+        
+        self._node_lookup_cache[node_name] = target_node
+        return target_node
+    
+    def _check_zone_conflict_cached(self, recommendation: MoveRecommendation) -> Optional[str]:
+        """Check zone conflicts with caching"""
+        # Create cache key: table, shard, target zone
+        target_zone = self._get_node_zone(recommendation.to_node)
+        cache_key = (recommendation.table_name, recommendation.shard_id, target_zone)
+        
+        if cache_key in self._zone_conflict_cache:
+            self._cache_hits += 1
+            return self._zone_conflict_cache[cache_key]
+        
+        # Cache miss - do expensive check
+        self._cache_misses += 1
+        result = self._check_zone_conflict(recommendation)
+        self._zone_conflict_cache[cache_key] = result
+        return result
+    
+    def _get_node_zone(self, node_name: str) -> str:
+        """Get zone for a node name"""
+        node = self._get_node_cached(node_name)
+        return node.zone if node else "unknown"
+    
+    def get_cache_stats(self) -> str:
+        """Get cache performance statistics"""
+        total = self._cache_hits + self._cache_misses
+        if total == 0:
+            return "Cache stats: No operations yet"
+        
+        hit_rate = (self._cache_hits / total) * 100
+        return f"Cache stats: {hit_rate:.1f}% hit rate ({self._cache_hits} hits, {self._cache_misses} misses)"
+    
+    def _find_nodes_with_capacity_cached(self, required_space_gb: float, exclude_nodes: set, 
+                                       min_free_space_gb: float, max_disk_usage_percent: float) -> List[NodeInfo]:
+        """Find nodes with capacity using caching for repeated queries"""
+        # Create cache key based on parameters (rounded to avoid float precision issues)
+        cache_key = (
+            round(required_space_gb, 1),
+            frozenset(exclude_nodes),
+            round(min_free_space_gb, 1),
+            round(max_disk_usage_percent, 1)
+        )
+        
+        if cache_key in self._target_nodes_cache:
+            self._cache_hits += 1
+            return self._target_nodes_cache[cache_key]
+        
+        # Cache miss - do expensive calculation
+        self._cache_misses += 1
+        result = self.find_nodes_with_capacity(
+            required_space_gb=required_space_gb,
+            exclude_nodes=exclude_nodes,
+            min_free_space_gb=min_free_space_gb,
+            max_disk_usage_percent=max_disk_usage_percent
+        )
+        
+        self._target_nodes_cache[cache_key] = result
+        return result
+
+    def _check_zone_conflict(self, recommendation: MoveRecommendation) -> Optional[str]:
+        """Check if moving this shard would create a zone conflict
+
+        Performs comprehensive zone safety analysis:
+        - Checks if target node already has a copy of this shard
+        - Checks if target zone already has copies
+        - Analyzes zone allocation limits and CrateDB's zone awareness rules
+        - Ensures move doesn't violate zone-awareness principles
+        """
+        try:
+            # Query to get all copies of this shard across nodes and zones
+            query = """
+            SELECT
+                s.node['id'] as node_id,
+                s.node['name'] as node_name,
+                n.attributes['zone'] as zone,
+                s."primary" as is_primary,
+                s.routing_state,
+                s.state
+            FROM sys.shards s
+            JOIN sys.nodes n ON s.node['id'] = n.id
+            WHERE s.table_name = ?
+                AND s.schema_name = ?
+                AND s.id = ?
+            ORDER BY s."primary" DESC, zone, node_name
+            """
+
+            result = self.client.execute_query(query, [
+                recommendation.table_name,
+                recommendation.schema_name,
+                recommendation.shard_id
+            ])
+
+            if not result.get('rows'):
+                return f"Cannot find shard {recommendation.shard_id} for table {recommendation.schema_name}.{recommendation.table_name}"
+
+            # Analyze current distribution
+            zones_with_copies = set()
+            nodes_with_copies = set()
+            current_location = None
+            healthy_copies = 0
+            total_copies = 0
+            target_node_id = None
+
+            # Get target node ID for the recommendation
+            for node in self.nodes:
+                if node.name == recommendation.to_node:
+                    target_node_id = node.id
+                    break
+
+            if not target_node_id:
+                return f"Target node {recommendation.to_node} not found in cluster"
+
+            for row in result['rows']:
+                node_id, node_name, zone, is_primary, routing_state, state = row
+                zone = zone or 'unknown'
+                total_copies += 1
+
+                # Track the shard we're planning to move
+                if node_name == recommendation.from_node:
+                    current_location = {
+                        'zone': zone,
+                        'is_primary': is_primary,
+                        'routing_state': routing_state,
+                        'state': state
+                    }
+
+                # Track all copies for conflict detection
+                nodes_with_copies.add(node_id)
+                if routing_state == 'STARTED' and state == 'STARTED':
+                    healthy_copies += 1
+                    zones_with_copies.add(zone)
+
+            # Validate the shard we're trying to move exists and is healthy
+            if not current_location:
+                return f"Shard not found on source node {recommendation.from_node}"
+
+            if current_location['routing_state'] != 'STARTED':
+                return f"Source shard is not in STARTED state (current: {current_location['routing_state']})"
+
+            # CRITICAL CHECK 1: Target node already has a copy of this shard
+            if target_node_id in nodes_with_copies:
+                return f"Node conflict: Target node {recommendation.to_node} already has a copy of shard {recommendation.shard_id}"
+
+            # CRITICAL CHECK 2: Target zone already has a copy (zone allocation limits)
+            if recommendation.to_zone in zones_with_copies:
+                return f"Zone conflict: {recommendation.to_zone} already has a copy of shard {recommendation.shard_id}"
+
+            # CRITICAL CHECK 3: Ensure we're not creating a single point of failure
+            if len(zones_with_copies) == 1 and current_location['zone'] in zones_with_copies:
+                # This is the only zone with this shard - moving it is good for zone distribution
+                pass
+            elif len(zones_with_copies) <= 1 and healthy_copies <= 1:
+                return f"Safety concern: Only {healthy_copies} healthy copy(ies) exist. Moving might risk data availability."
+
+            # ADDITIONAL CHECK: Verify zone allocation constraints for this table
+            table_zone_query = """
+            SELECT
+                n.attributes['zone'] as zone,
+                COUNT(*) as shard_count
+            FROM sys.shards s
+            JOIN sys.nodes n ON s.node['id'] = n.id
+            WHERE s.table_name = ?
+                AND s.schema_name = ?
+                AND s.id = ?
+                AND s.routing_state = 'STARTED'
+            GROUP BY n.attributes['zone']
+            ORDER BY zone
+            """
+
+            zone_result = self.client.execute_query(table_zone_query, [
+                recommendation.table_name,
+                recommendation.schema_name,
+                recommendation.shard_id
+            ])
+
+            current_zone_counts = {}
+            for row in zone_result.get('rows', []):
+                zone_name, count = row
+                current_zone_counts[zone_name or 'unknown'] = count
+
+            # Check if adding to target zone would violate balance
+            target_zone_count = current_zone_counts.get(recommendation.to_zone, 0)
+            if target_zone_count > 0:
+                return f"Zone allocation violation: {recommendation.to_zone} would have {target_zone_count + 1} copies after move"
+
+            return None
+
+        except Exception as e:
+            # If we can't check, err on the side of caution
+            return f"Cannot verify zone safety: {str(e)}"
+
+    def get_cluster_overview(self) -> Dict[str, Any]:
+        """Get a comprehensive overview of the cluster"""
+        # Get cluster watermark settings
+        watermarks = self.client.get_cluster_watermarks()
+        
+        overview = {
+            'nodes': len(self.nodes),
+            'zones': len(set(node.zone for node in self.nodes)),
+            'total_shards': len(self.shards),
+            'primary_shards': len([s for s in self.shards if s.is_primary]),
+            'replica_shards': len([s for s in self.shards if not s.is_primary]),
+            'total_size_gb': sum(s.size_gb for s in self.shards),
+            'zone_distribution': defaultdict(int),
+            'node_health': [],
+            'watermarks': watermarks
+        }
+
+        # Zone distribution
+        for shard in self.shards:
+            overview['zone_distribution'][shard.zone] += 1
+        overview['zone_distribution'] = dict(overview['zone_distribution'])
+
+        # Node health with watermark calculations
+        for node in self.nodes:
+            node_shards = [s for s in self.shards if s.node_name == node.name]
+            watermark_info = self._calculate_node_watermark_remaining(node, watermarks)
+            
+            overview['node_health'].append({
+                'name': node.name,
+                'zone': node.zone,
+                'shards': len(node_shards),
+                'size_gb': sum(s.size_gb for s in node_shards),
+                'disk_usage_percent': node.disk_usage_percent,
+                'heap_usage_percent': node.heap_usage_percent,
+                'available_space_gb': node.available_space_gb,
+                'remaining_to_low_watermark_gb': watermark_info['remaining_to_low_gb'],
+                'remaining_to_high_watermark_gb': watermark_info['remaining_to_high_gb']
+            })
+
+        return overview
+
+    def _calculate_node_watermark_remaining(self, node: 'NodeInfo', watermarks: Dict[str, Any]) -> Dict[str, float]:
+        """Calculate remaining space until watermarks are reached"""
+        
+        # Parse watermark percentages
+        low_watermark = self._parse_watermark_percentage(watermarks.get('low', '85%'))
+        high_watermark = self._parse_watermark_percentage(watermarks.get('high', '90%'))
+        
+        # Calculate remaining space to each watermark
+        total_space_bytes = node.fs_total
+        current_used_bytes = node.fs_used
+        
+        # Space that would be used at each watermark
+        low_watermark_used_bytes = total_space_bytes * (low_watermark / 100.0)
+        high_watermark_used_bytes = total_space_bytes * (high_watermark / 100.0)
+        
+        # Remaining space until each watermark (negative if already exceeded)
+        remaining_to_low_gb = max(0, (low_watermark_used_bytes - current_used_bytes) / (1024**3))
+        remaining_to_high_gb = max(0, (high_watermark_used_bytes - current_used_bytes) / (1024**3))
+        
+        return {
+            'remaining_to_low_gb': remaining_to_low_gb,
+            'remaining_to_high_gb': remaining_to_high_gb
+        }
+    
+    def _parse_watermark_percentage(self, watermark_value: str) -> float:
+        """Parse watermark percentage from string like '85%' or '0.85'"""
+        if isinstance(watermark_value, str):
+            if watermark_value.endswith('%'):
+                return float(watermark_value[:-1])
+            else:
+                # Handle decimal format like '0.85'
+                decimal_value = float(watermark_value)
+                if decimal_value <= 1.0:
+                    return decimal_value * 100
+                return decimal_value
+        elif isinstance(watermark_value, (int, float)):
+            if watermark_value <= 1.0:
+                return watermark_value * 100
+            return watermark_value
+        else:
+            # Default to common values if parsing fails
+            return 85.0  # Default low watermark
+
+    def plan_node_decommission(self, node_name: str,
+                              min_free_space_gb: float = 100.0) -> Dict[str, Any]:
+        """Plan the decommissioning of a node by analyzing required shard moves
+
+        Args:
+            node_name: Name of the node to decommission
+            min_free_space_gb: Minimum free space required on target nodes
+
+        Returns:
+            Dictionary with decommission plan and analysis
+        """
+        # Find the node to decommission
+        target_node = None
+        for node in self.nodes:
+            if node.name == node_name:
+                target_node = node
+                break
+
+        if not target_node:
+            return {
+                'error': f"Node {node_name} not found in cluster",
+                'feasible': False
+            }
+
+        # Get all shards on this node (only healthy ones for safety)
+        node_shards = [s for s in self.shards
+                      if s.node_name == node_name
+                      and s.routing_state == 'STARTED']
+
+        if not node_shards:
+            return {
+                'node': node_name,
+                'zone': target_node.zone,
+                'feasible': True,
+                'shards_to_move': 0,
+                'total_size_gb': 0,
+                'recommendations': [],
+                'warnings': [],
+                'message': 'Node has no healthy shards - safe to decommission'
+            }
+
+        # Calculate space requirements
+        total_size_gb = sum(s.size_gb for s in node_shards)
+
+        # Find potential target nodes for each shard
+        move_plan = []
+        warnings = []
+        infeasible_moves = []
+
+        for shard in node_shards:
+            # Find nodes that can accommodate this shard
+            potential_targets = self.find_nodes_with_capacity(
+                shard.size_gb,
+                exclude_nodes={node_name},
+                min_free_space_gb=min_free_space_gb
+            )
+
+            if not potential_targets:
+                infeasible_moves.append({
+                    'shard': f"{shard.schema_name}.{shard.table_name}[{shard.shard_id}]",
+                    'size_gb': shard.size_gb,
+                    'reason': 'No nodes with sufficient capacity'
+                })
+                continue
+
+            # Check for zone conflicts
+            safe_targets = []
+            for target in potential_targets:
+                # Create a temporary recommendation to test zone safety
+                temp_rec = MoveRecommendation(
+                    table_name=shard.table_name,
+                    schema_name=shard.schema_name,
+                    shard_id=shard.shard_id,
+                    from_node=node_name,
+                    to_node=target.name,
+                    from_zone=shard.zone,
+                    to_zone=target.zone,
+                    shard_type=shard.shard_type,
+                    size_gb=shard.size_gb,
+                    reason=f"Node decommission: {node_name}"
+                )
+
+                zone_conflict = self._check_zone_conflict(temp_rec)
+                if not zone_conflict:
+                    safe_targets.append(target)
+                else:
+                    warnings.append(f"Zone conflict for {shard.schema_name}.{shard.table_name}[{shard.shard_id}]: {zone_conflict}")
+
+            if safe_targets:
+                # Choose the target with most available space
+                best_target = safe_targets[0]
+                move_plan.append(MoveRecommendation(
+                    table_name=shard.table_name,
+                    schema_name=shard.schema_name,
+                    shard_id=shard.shard_id,
+                    from_node=node_name,
+                    to_node=best_target.name,
+                    from_zone=shard.zone,
+                    to_zone=best_target.zone,
+                    shard_type=shard.shard_type,
+                    size_gb=shard.size_gb,
+                    reason=f"Node decommission: {node_name}"
+                ))
+            else:
+                infeasible_moves.append({
+                    'shard': f"{shard.schema_name}.{shard.table_name}[{shard.shard_id}]",
+                    'size_gb': shard.size_gb,
+                    'reason': 'Zone conflicts prevent safe move'
+                })
+
+        # Determine feasibility
+        feasible = len(infeasible_moves) == 0
+
+        # Add capacity warnings
+        if feasible:
+            # Check if remaining cluster capacity is sufficient after decommission
+            remaining_capacity = sum(n.available_space_gb for n in self.nodes if n.name != node_name)
+            if remaining_capacity < total_size_gb * 1.2:  # 20% safety margin
+                warnings.append(f"Low remaining capacity after decommission. Only {remaining_capacity:.1f}GB available for {total_size_gb:.1f}GB of data")
+
+        return {
+            'node': node_name,
+            'zone': target_node.zone,
+            'feasible': feasible,
+            'shards_to_move': len(node_shards),
+            'moveable_shards': len(move_plan),
+            'total_size_gb': total_size_gb,
+            'recommendations': move_plan,
+            'infeasible_moves': infeasible_moves,
+            'warnings': warnings,
+            'estimated_time_hours': len(move_plan) * 0.1,  # Rough estimate: 6 minutes per move
+            'message': 'Decommission plan generated' if feasible else 'Decommission not currently feasible'
+        }
+
+
+class RecoveryMonitor:
+    """Monitor shard recovery operations"""
+    
+    def __init__(self, client: CrateDBClient):
+        self.client = client
+    
+    def get_cluster_recovery_status(self, 
+                                  table_name: Optional[str] = None,
+                                  node_name: Optional[str] = None,
+                                  recovery_type_filter: str = 'all',
+                                  include_transitioning: bool = False) -> List[RecoveryInfo]:
+        """Get comprehensive recovery status with minimal cluster impact"""
+        
+        # Get all recovering shards using the efficient combined query
+        recoveries = self.client.get_all_recovering_shards(table_name, node_name, include_transitioning)
+        
+        # Apply recovery type filter
+        if recovery_type_filter != 'all':
+            recoveries = [r for r in recoveries if r.recovery_type.upper() == recovery_type_filter.upper()]
+        
+        return recoveries
+    
+    def get_recovery_summary(self, recoveries: List[RecoveryInfo]) -> Dict[str, Any]:
+        """Generate a summary of recovery operations"""
+        
+        if not recoveries:
+            return {
+                'total_recoveries': 0,
+                'by_type': {},
+                'by_stage': {},
+                'avg_progress': 0.0,
+                'total_size_gb': 0.0
+            }
+        
+        # Group by recovery type
+        by_type = {}
+        by_stage = {}
+        total_progress = 0.0
+        total_size_gb = 0.0
+        
+        for recovery in recoveries:
+            # By type
+            if recovery.recovery_type not in by_type:
+                by_type[recovery.recovery_type] = {
+                    'count': 0,
+                    'total_size_gb': 0.0,
+                    'avg_progress': 0.0
+                }
+            by_type[recovery.recovery_type]['count'] += 1
+            by_type[recovery.recovery_type]['total_size_gb'] += recovery.size_gb
+            
+            # By stage
+            if recovery.stage not in by_stage:
+                by_stage[recovery.stage] = 0
+            by_stage[recovery.stage] += 1
+            
+            # Totals
+            total_progress += recovery.overall_progress
+            total_size_gb += recovery.size_gb
+        
+        # Calculate averages
+        for type_name, rec_type in by_type.items():
+            if rec_type['count'] > 0:
+                type_recoveries = [r for r in recoveries if r.recovery_type == type_name]
+                if type_recoveries:
+                    rec_type['avg_progress'] = sum(r.overall_progress for r in type_recoveries) / len(type_recoveries)
+        
+        return {
+            'total_recoveries': len(recoveries),
+            'by_type': by_type,
+            'by_stage': by_stage,
+            'avg_progress': total_progress / len(recoveries) if recoveries else 0.0,
+            'total_size_gb': total_size_gb
+        }
+    
+    def format_recovery_display(self, recoveries: List[RecoveryInfo]) -> str:
+        """Format recovery information for display"""
+        
+        if not recoveries:
+            return "✅ No active shard recoveries found"
+        
+        # Group by recovery type
+        peer_recoveries = [r for r in recoveries if r.recovery_type == 'PEER']
+        disk_recoveries = [r for r in recoveries if r.recovery_type == 'DISK']
+        other_recoveries = [r for r in recoveries if r.recovery_type not in ['PEER', 'DISK']]
+        
+        output = [f"\n🔄 Active Shard Recoveries ({len(recoveries)} total)"]
+        output.append("=" * 80)
+        
+        if peer_recoveries:
+            output.append(f"\n📡 PEER Recoveries ({len(peer_recoveries)})")
+            output.append(self._format_recovery_table(peer_recoveries))
+        
+        if disk_recoveries:
+            output.append(f"\n💾 DISK Recoveries ({len(disk_recoveries)})")
+            output.append(self._format_recovery_table(disk_recoveries))
+        
+        if other_recoveries:
+            output.append(f"\n🔧 Other Recoveries ({len(other_recoveries)})")
+            output.append(self._format_recovery_table(other_recoveries))
+        
+        # Add summary
+        summary = self.get_recovery_summary(recoveries)
+        output.append(f"\n📊 Summary:")
+        output.append(f"   Total size: {summary['total_size_gb']:.1f} GB")
+        output.append(f"   Average progress: {summary['avg_progress']:.1f}%")
+        
+        return "\n".join(output)
+    
+    def _format_recovery_table(self, recoveries: List[RecoveryInfo]) -> str:
+        """Format a table of recovery information"""
+        
+        if not recoveries:
+            return "   No recoveries of this type"
+        
+        # Table headers
+        headers = ["Table", "Shard", "Node", "Type", "Stage", "Progress", "Size(GB)", "Time(s)"]
+        
+        # Calculate column widths
+        col_widths = [len(h) for h in headers]
+        
+        rows = []
+        for recovery in recoveries:
+            row = [
+                f"{recovery.schema_name}.{recovery.table_name}",
+                str(recovery.shard_id),
+                recovery.node_name,
+                recovery.shard_type,
+                recovery.stage,
+                f"{recovery.overall_progress:.1f}%",
+                f"{recovery.size_gb:.1f}",
+                f"{recovery.total_time_seconds:.1f}"
+            ]
+            rows.append(row)
+            
+            # Update column widths
+            for i, cell in enumerate(row):
+                col_widths[i] = max(col_widths[i], len(cell))
+        
+        # Format table
+        output = []
+        
+        # Header row
+        header_row = "   " + " | ".join(h.ljust(w) for h, w in zip(headers, col_widths))
+        output.append(header_row)
+        output.append("   " + "-" * (len(header_row) - 3))
+        
+        # Data rows
+        for row in rows:
+            data_row = "   " + " | ".join(cell.ljust(w) for cell, w in zip(row, col_widths))
+            output.append(data_row)
+        
+        return "\n".join(output)
diff --git a/cratedb_toolkit/admin/xmover/attic.py b/cratedb_toolkit/admin/xmover/attic.py
new file mode 100644
index 00000000..3cbfc3ee
--- /dev/null
+++ b/cratedb_toolkit/admin/xmover/attic.py
@@ -0,0 +1,118 @@
+# ruff: noqa
+
+# @main.command()
+# @click.argument('node_name')
+# @click.option('--min-free-space', default=100.0, help='Minimum free space required on target nodes in GB (default: 100)')
+# @click.option('--dry-run/--execute', default=True, help='Show decommission plan without generating SQL commands (default: True)')
+# @click.pass_context
+# def decommission(ctx, node_name: str, min_free_space: float, dry_run: bool):
+#     """Plan decommissioning of a node by analyzing required shard moves
+#
+#     NODE_NAME: Name of the node to decommission
+#     """
+#     client = ctx.obj['client']
+#     analyzer = ShardAnalyzer(client)
+#
+#     mode_text = "PLANNING MODE" if dry_run else "EXECUTION MODE"
+#     console.print(Panel.fit(f"[bold blue]Node Decommission Analysis[/bold blue] - [bold {'green' if dry_run else 'red'}]{mode_text}[/bold {'green' if dry_run else 'red'}]"))
+#     console.print(f"[dim]Analyzing decommission plan for node: {node_name}[/dim]")
+#     console.print()
+#
+#     # Generate decommission plan
+#     plan = analyzer.plan_node_decommission(node_name, min_free_space)
+#
+#     if 'error' in plan:
+#         console.print(f"[red]Error: {plan['error']}[/red]")
+#         return
+#
+#     # Display plan summary
+#     summary_table = Table(title=f"Decommission Plan for {node_name}", box=box.ROUNDED)
+#     summary_table.add_column("Metric", style="cyan")
+#     summary_table.add_column("Value", style="magenta")
+#
+#     summary_table.add_row("Node", plan['node'])
+#     summary_table.add_row("Zone", plan['zone'])
+#     summary_table.add_row("Feasible", "[green]✓ Yes[/green]" if plan['feasible'] else "[red]✗ No[/red]")
+#     summary_table.add_row("Shards to Move", str(plan['shards_to_move']))
+#     summary_table.add_row("Moveable Shards", str(plan['moveable_shards']))
+#     summary_table.add_row("Total Data Size", format_size(plan['total_size_gb']))
+#     summary_table.add_row("Estimated Time", f"{plan['estimated_time_hours']:.1f} hours")
+#
+#     console.print(summary_table)
+#     console.print()
+#
+#     # Show warnings if any
+#     if plan['warnings']:
+#         console.print("[bold yellow]⚠ Warnings:[/bold yellow]")
+#         for warning in plan['warnings']:
+#             console.print(f"  • [yellow]{warning}[/yellow]")
+#         console.print()
+#
+#     # Show infeasible moves if any
+#     if plan['infeasible_moves']:
+#         console.print("[bold red]✗ Cannot Move:[/bold red]")
+#         infeasible_table = Table(box=box.ROUNDED)
+#         infeasible_table.add_column("Shard", style="cyan")
+#         infeasible_table.add_column("Size", style="magenta")
+#         infeasible_table.add_column("Reason", style="red")
+#
+#         for move in plan['infeasible_moves']:
+#             infeasible_table.add_row(
+#                 move['shard'],
+#                 format_size(move['size_gb']),
+#                 move['reason']
+#             )
+#         console.print(infeasible_table)
+#         console.print()
+#
+#     # Show move recommendations
+#     if plan['recommendations']:
+#         move_table = Table(title="Required Shard Moves", box=box.ROUNDED)
+#         move_table.add_column("Table", style="cyan")
+#         move_table.add_column("Shard", justify="right", style="magenta")
+#         move_table.add_column("Type", style="blue")
+#         move_table.add_column("Size", style="green")
+#         move_table.add_column("From Zone", style="yellow")
+#         move_table.add_column("To Node", style="cyan")
+#         move_table.add_column("To Zone", style="yellow")
+#
+#         for rec in plan['recommendations']:
+#             move_table.add_row(
+#                 f"{rec.schema_name}.{rec.table_name}",
+#                 str(rec.shard_id),
+#                 rec.shard_type,
+#                 format_size(rec.size_gb),
+#                 rec.from_zone,
+#                 rec.to_node,
+#                 rec.to_zone
+#             )
+#
+#         console.print(move_table)
+#         console.print()
+#
+#         # Generate SQL commands if not in dry-run mode
+#         if not dry_run and plan['feasible']:
+#             console.print(Panel.fit("[bold green]Decommission SQL Commands[/bold green]"))
+#             console.print("[dim]# Execute these commands in order to prepare for node decommission[/dim]")
+#             console.print("[dim]# ALWAYS test in a non-production environment first![/dim]")
+#             console.print("[dim]# Monitor shard health after each move before proceeding[/dim]")
+#             console.print()
+#
+#             for i, rec in enumerate(plan['recommendations'], 1):
+#                 console.print(f"-- Move {i}: {rec.reason}")
+#                 console.print(f"{rec.to_sql()}")
+#                 console.print()
+#
+#             console.print(f"-- After all moves complete, the node {node_name} can be safely removed")
+#             console.print(f"-- Total moves required: {len(plan['recommendations'])}")
+#         elif dry_run:
+#             console.print("[green]✓ Decommission plan ready. Use --execute to generate SQL commands.[/green]")
+#
+#     # Final status
+#     if not plan['feasible']:
+#         console.print(f"[red]⚠ Node {node_name} cannot be safely decommissioned at this time.[/red]")
+#         console.print("[dim]Address the issues above before attempting decommission.[/dim]")
+#     elif plan['shards_to_move'] == 0:
+#         console.print(f"[green]✓ Node {node_name} is ready for immediate decommission (no shards to move).[/green]")
+#     else:
+#         console.print(f"[green]✓ Node {node_name} can be safely decommissioned after moving {len(plan['recommendations'])} shards.[/green]")
diff --git a/cratedb_toolkit/admin/xmover/cli.py b/cratedb_toolkit/admin/xmover/cli.py
new file mode 100644
index 00000000..e4f5800d
--- /dev/null
+++ b/cratedb_toolkit/admin/xmover/cli.py
@@ -0,0 +1,1431 @@
+"""
+Command line interface for XMover - CrateDB Shard Analyzer and Movement Tool
+"""
+
+import sys
+import time
+import os
+from typing import Optional
+try:
+    import click
+    from rich.console import Console
+    from rich.table import Table
+    from rich.panel import Panel
+    from rich.text import Text
+    from rich import box
+except ImportError as e:
+    print(f"Missing required dependency: {e}")
+    print("Please install dependencies with: pip install -e .")
+    sys.exit(1)
+
+from .database import CrateDBClient
+from .analyzer import ShardAnalyzer, RecoveryMonitor
+
+
+console = Console()
+
+
+def format_size(size_gb: float) -> str:
+    """Format size in GB with appropriate precision"""
+    if size_gb >= 1000:
+        return f"{size_gb/1000:.1f}TB"
+    elif size_gb >= 1:
+        return f"{size_gb:.1f}GB"
+    else:
+        return f"{size_gb*1000:.0f}MB"
+
+
+def format_percentage(value: float) -> str:
+    """Format percentage with color coding"""
+    color = "green"
+    if value > 80:
+        color = "red"
+    elif value > 70:
+        color = "yellow"
+    return f"[{color}]{value:.1f}%[/{color}]"
+
+
+def format_translog_info(recovery_info) -> str:
+    """Format translog size information with color coding"""
+    tl_bytes = recovery_info.translog_size_bytes
+    
+    # Only show if significant (>10MB for production)
+    if tl_bytes < 10 * 1024 * 1024:  # 10MB for production
+        return ""
+    
+    tl_gb = recovery_info.translog_size_gb
+    
+    # Color coding based on size
+    if tl_gb >= 5.0:
+        color = "red"
+    elif tl_gb >= 1.0:
+        color = "yellow"
+    else:
+        color = "green"
+    
+    # Format size
+    if tl_gb >= 1.0:
+        size_str = f"{tl_gb:.1f}GB"
+    else:
+        size_str = f"{tl_gb*1000:.0f}MB"
+    
+    return f" [dim]([{color}]TL:{size_str}[/{color}])[/dim]"
+
+
+@click.group()
+@click.version_option()
+@click.pass_context
+def main(ctx):
+    """XMover - CrateDB Shard Analyzer and Movement Tool
+
+    A tool for analyzing CrateDB shard distribution across nodes and availability zones,
+    and generating safe SQL commands for shard rebalancing.
+    """
+    ctx.ensure_object(dict)
+
+    # Test connection on startup
+    try:
+        client = CrateDBClient()
+        if not client.test_connection():
+            console.print("[red]Error: Could not connect to CrateDB[/red]")
+            console.print("Please check your CRATE_CONNECTION_STRING in .env file")
+            sys.exit(1)
+        ctx.obj['client'] = client
+    except Exception as e:
+        console.print(f"[red]Error connecting to CrateDB: {e}[/red]")
+        sys.exit(1)
+
+
+@main.command()
+@click.option('--table', '-t', help='Analyze specific table only')
+@click.pass_context
+def analyze(ctx, table: Optional[str]):
+    """Analyze current shard distribution across nodes and zones"""
+    client = ctx.obj['client']
+    analyzer = ShardAnalyzer(client)
+
+    console.print(Panel.fit("[bold blue]CrateDB Cluster Analysis[/bold blue]"))
+
+    # Get cluster overview (includes all shards for complete analysis)
+    overview = analyzer.get_cluster_overview()
+
+    # Cluster summary table
+    summary_table = Table(title="Cluster Summary", box=box.ROUNDED)
+    summary_table.add_column("Metric", style="cyan")
+    summary_table.add_column("Value", style="magenta")
+
+    summary_table.add_row("Nodes", str(overview['nodes']))
+    summary_table.add_row("Availability Zones", str(overview['zones']))
+    summary_table.add_row("Total Shards", str(overview['total_shards']))
+    summary_table.add_row("Primary Shards", str(overview['primary_shards']))
+    summary_table.add_row("Replica Shards", str(overview['replica_shards']))
+    summary_table.add_row("Total Size", format_size(overview['total_size_gb']))
+
+    console.print(summary_table)
+    console.print()
+
+    # Disk watermarks table
+    if overview.get('watermarks'):
+        watermarks_table = Table(title="Disk Allocation Watermarks", box=box.ROUNDED)
+        watermarks_table.add_column("Setting", style="cyan")
+        watermarks_table.add_column("Value", style="magenta")
+
+        watermarks = overview['watermarks']
+        watermarks_table.add_row("Low Watermark", str(watermarks.get('low', 'Not set')))
+        watermarks_table.add_row("High Watermark", str(watermarks.get('high', 'Not set')))
+        watermarks_table.add_row("Flood Stage", str(watermarks.get('flood_stage', 'Not set')))
+        watermarks_table.add_row("Enable for Single Node", str(watermarks.get('enable_for_single_data_node', 'Not set')))
+
+        console.print(watermarks_table)
+        console.print()
+
+    # Zone distribution table
+    zone_table = Table(title="Zone Distribution", box=box.ROUNDED)
+    zone_table.add_column("Zone", style="cyan")
+    zone_table.add_column("Shards", justify="right", style="magenta")
+    zone_table.add_column("Percentage", justify="right", style="green")
+
+    total_shards = overview['total_shards']
+    for zone, count in overview['zone_distribution'].items():
+        percentage = (count / total_shards * 100) if total_shards > 0 else 0
+        zone_table.add_row(zone, str(count), f"{percentage:.1f}%")
+
+    console.print(zone_table)
+    console.print()
+
+    # Node health table
+    node_table = Table(title="Node Health", box=box.ROUNDED)
+    node_table.add_column("Node", style="cyan")
+    node_table.add_column("Zone", style="blue")
+    node_table.add_column("Shards", justify="right", style="magenta")
+    node_table.add_column("Size", justify="right", style="green")
+    node_table.add_column("Disk Usage", justify="right")
+    node_table.add_column("Available Space", justify="right", style="green")
+    node_table.add_column("Until Low WM", justify="right", style="yellow")
+    node_table.add_column("Until High WM", justify="right", style="red")
+
+    for node_info in overview['node_health']:
+        # Format watermark remaining capacity
+        low_wm_remaining = format_size(node_info['remaining_to_low_watermark_gb']) if node_info['remaining_to_low_watermark_gb'] > 0 else "[red]Exceeded[/red]"
+        high_wm_remaining = format_size(node_info['remaining_to_high_watermark_gb']) if node_info['remaining_to_high_watermark_gb'] > 0 else "[red]Exceeded[/red]"
+
+        node_table.add_row(
+            node_info['name'],
+            node_info['zone'],
+            str(node_info['shards']),
+            format_size(node_info['size_gb']),
+            format_percentage(node_info['disk_usage_percent']),
+            format_size(node_info['available_space_gb']),
+            low_wm_remaining,
+            high_wm_remaining
+        )
+
+    console.print(node_table)
+
+    # Table-specific analysis if requested
+    if table:
+        console.print()
+        console.print(Panel.fit(f"[bold blue]Analysis for table: {table}[/bold blue]"))
+
+        stats = analyzer.analyze_distribution(table)
+
+        table_summary = Table(title=f"Table {table} Distribution", box=box.ROUNDED)
+        table_summary.add_column("Metric", style="cyan")
+        table_summary.add_column("Value", style="magenta")
+
+        table_summary.add_row("Total Shards", str(stats.total_shards))
+        table_summary.add_row("Total Size", format_size(stats.total_size_gb))
+        table_summary.add_row("Zone Balance Score", f"{stats.zone_balance_score:.1f}/100")
+        table_summary.add_row("Node Balance Score", f"{stats.node_balance_score:.1f}/100")
+
+        console.print(table_summary)
+
+
+@main.command()
+@click.option('--table', '-t', help='Find candidates for specific table only')
+@click.option('--min-size', default=40.0, help='Minimum shard size in GB (default: 40)')
+@click.option('--max-size', default=60.0, help='Maximum shard size in GB (default: 60)')
+@click.option('--limit', default=20, help='Maximum number of candidates to show (default: 20)')
+@click.option('--node', help='Only show candidates from this specific source node (e.g., data-hot-4)')
+@click.pass_context
+def find_candidates(ctx, table: Optional[str], min_size: float, max_size: float, limit: int, node: Optional[str]):
+    """Find shard candidates for movement based on size criteria
+
+    Results are sorted by nodes with least available space first,
+    then by shard size (smallest first) for easier moves.
+    """
+    client = ctx.obj['client']
+    analyzer = ShardAnalyzer(client)
+
+    console.print(Panel.fit(f"[bold blue]Finding Moveable Shards ({min_size}-{max_size}GB)[/bold blue]"))
+
+    if node:
+        console.print(f"[dim]Filtering: Only showing candidates from source node '{node}'[/dim]")
+
+    # Find moveable candidates (only healthy shards suitable for operations)
+    candidates = analyzer.find_moveable_shards(min_size, max_size, table)
+
+    # Filter by node if specified
+    if node:
+        candidates = [c for c in candidates if c.node_name == node]
+
+    if not candidates:
+        if node:
+            console.print(f"[yellow]No moveable shards found on node '{node}' in the specified size range.[/yellow]")
+            console.print(f"[dim]Tip: Try different size ranges or remove --node filter to see all candidates[/dim]")
+        else:
+            console.print("[yellow]No moveable shards found in the specified size range.[/yellow]")
+        return
+
+    # Show limited results
+    shown_candidates = candidates[:limit]
+
+    candidates_table = Table(title=f"Moveable Shard Candidates (showing {len(shown_candidates)} of {len(candidates)})", box=box.ROUNDED)
+    candidates_table.add_column("Table", style="cyan")
+    candidates_table.add_column("Shard ID", justify="right", style="magenta")
+    candidates_table.add_column("Type", style="blue")
+    candidates_table.add_column("Node", style="green")
+    candidates_table.add_column("Zone", style="yellow")
+    candidates_table.add_column("Size", justify="right", style="red")
+    candidates_table.add_column("Node Free Space", justify="right", style="white")
+    candidates_table.add_column("Documents", justify="right", style="dim")
+
+    # Create a mapping of node names to available space for display
+    node_space_map = {node.name: node.available_space_gb for node in analyzer.nodes}
+
+    for shard in shown_candidates:
+        node_free_space = node_space_map.get(shard.node_name, 0)
+        candidates_table.add_row(
+            f"{shard.schema_name}.{shard.table_name}",
+            str(shard.shard_id),
+            shard.shard_type,
+            shard.node_name,
+            shard.zone,
+            format_size(shard.size_gb),
+            format_size(node_free_space),
+            f"{shard.num_docs:,}"
+        )
+
+    console.print(candidates_table)
+
+    if len(candidates) > limit:
+        console.print(f"\n[dim]... and {len(candidates) - limit} more candidates[/dim]")
+
+
+@main.command()
+@click.option('--table', '-t', help='Generate recommendations for specific table only')
+@click.option('--min-size', default=40.0, help='Minimum shard size in GB (default: 40)')
+@click.option('--max-size', default=60.0, help='Maximum shard size in GB (default: 60)')
+@click.option('--zone-tolerance', default=10.0, help='Zone balance tolerance percentage (default: 10)')
+@click.option('--min-free-space', default=100.0, help='Minimum free space required on target nodes in GB (default: 100)')
+@click.option('--max-moves', default=10, help='Maximum number of move recommendations (default: 10)')
+@click.option('--max-disk-usage', default=90.0, help='Maximum disk usage percentage for target nodes (default: 90)')
+
+@click.option('--validate/--no-validate', default=True, help='Validate move safety (default: True)')
+@click.option('--prioritize-space/--prioritize-zones', default=False, help='Prioritize available space over zone balancing (default: False)')
+@click.option('--dry-run/--execute', default=True, help='Show what would be done without generating SQL commands (default: True)')
+@click.option('--auto-execute', is_flag=True, default=False, help='DANGER: Automatically execute the SQL commands (requires --execute, asks for confirmation)')
+@click.option('--node', help='Only recommend moves from this specific source node (e.g., data-hot-4)')
+@click.pass_context
+def recommend(ctx, table: Optional[str], min_size: float, max_size: float,
+              zone_tolerance: float, min_free_space: float, max_moves: int, max_disk_usage: float, validate: bool, prioritize_space: bool, dry_run: bool, auto_execute: bool, node: Optional[str]):
+    """Generate shard movement recommendations for rebalancing"""
+    client = ctx.obj['client']
+    analyzer = ShardAnalyzer(client)
+    
+    # Safety check for auto-execute
+    if auto_execute and dry_run:
+        console.print("[red]❌ Error: --auto-execute requires --execute flag[/red]")
+        console.print("[dim]Use: --execute --auto-execute[/dim]")
+        return
+
+    mode_text = "DRY RUN - Analysis Only" if dry_run else "EXECUTION MODE"
+    console.print(Panel.fit(f"[bold blue]Generating Rebalancing Recommendations[/bold blue] - [bold {'green' if dry_run else 'red'}]{mode_text}[/bold {'green' if dry_run else 'red'}]"))
+    console.print("[dim]Note: Only analyzing healthy shards (STARTED + 100% recovered) for safe operations[/dim]")
+    console.print("[dim]Zone conflict detection: Prevents moves that would violate CrateDB's zone awareness[/dim]")
+    if prioritize_space:
+        console.print("[dim]Mode: Prioritizing available space over zone balancing[/dim]")
+    else:
+        console.print("[dim]Mode: Prioritizing zone balancing over available space[/dim]")
+
+    if node:
+        console.print(f"[dim]Filtering: Only showing moves from source node '{node}'[/dim]")
+
+    console.print(f"[dim]Safety thresholds: Max disk usage {max_disk_usage}%, Min free space {min_free_space}GB[/dim]")
+
+    if dry_run:
+        console.print("[green]Running in DRY RUN mode - no SQL commands will be generated[/green]")
+    else:
+        console.print("[red]EXECUTION MODE - SQL commands will be generated for actual moves[/red]")
+    console.print()
+
+    recommendations = analyzer.generate_rebalancing_recommendations(
+        table_name=table,
+        min_size_gb=min_size,
+        max_size_gb=max_size,
+        zone_tolerance_percent=zone_tolerance,
+        min_free_space_gb=min_free_space,
+        max_recommendations=max_moves,
+        prioritize_space=prioritize_space,
+        source_node=node,
+        max_disk_usage_percent=max_disk_usage
+    )
+
+    if not recommendations:
+        if node:
+            console.print(f"[yellow]No safe recommendations found for node '{node}'[/yellow]")
+            console.print(f"[dim]This could be due to:[/dim]")
+            console.print(f"[dim]  • Zone conflicts preventing safe moves[/dim]")
+            console.print(f"[dim]  • Target nodes exceeding {max_disk_usage}% disk usage threshold[/dim]")
+            console.print(f"[dim]  • Insufficient free space on target nodes (need {min_free_space}GB)[/dim]")
+            console.print(f"[dim]  • No shards in size range {min_size}-{max_size}GB[/dim]")
+            console.print(f"[dim]Suggestions:[/dim]")
+            console.print(f"[dim]  • Try: --max-disk-usage 95 (allow higher disk usage)[/dim]")
+            console.print(f"[dim]  • Try: --min-free-space 50 (reduce space requirements)[/dim]")
+            console.print(f"[dim]  • Try: different size ranges or remove --node filter[/dim]")
+        else:
+            console.print("[green]No rebalancing recommendations needed. Cluster appears well balanced![/green]")
+        return
+
+    # Show recommendations table
+    rec_table = Table(title=f"Rebalancing Recommendations ({len(recommendations)} moves)", box=box.ROUNDED)
+    rec_table.add_column("Table", style="cyan")
+    rec_table.add_column("Shard", justify="right", style="magenta")
+    rec_table.add_column("Type", style="blue")
+    rec_table.add_column("From Node", style="red")
+    rec_table.add_column("To Node", style="green")
+    rec_table.add_column("Target Free Space", justify="right", style="cyan")
+    rec_table.add_column("Zone Change", style="yellow")
+    rec_table.add_column("Size", justify="right", style="white")
+    rec_table.add_column("Reason", style="dim")
+    if validate:
+        rec_table.add_column("Safety Check", style="bold")
+
+    # Create a mapping of node names to available space for display
+    node_space_map = {node.name: node.available_space_gb for node in analyzer.nodes}
+
+    for rec in recommendations:
+        zone_change = f"{rec.from_zone} → {rec.to_zone}" if rec.from_zone != rec.to_zone else rec.from_zone
+        target_free_space = node_space_map.get(rec.to_node, 0)
+
+        row = [
+            f"{rec.schema_name}.{rec.table_name}",
+            str(rec.shard_id),
+            rec.shard_type,
+            rec.from_node,
+            rec.to_node,
+            format_size(target_free_space),
+            zone_change,
+            format_size(rec.size_gb),
+            rec.reason
+        ]
+
+        if validate:
+            is_safe, safety_msg = analyzer.validate_move_safety(rec, max_disk_usage_percent=max_disk_usage)
+            safety_status = "[green]✓ SAFE[/green]" if is_safe else f"[red]✗ {safety_msg}[/red]"
+            row.append(safety_status)
+
+        rec_table.add_row(*row)
+
+    console.print(rec_table)
+    console.print()
+
+    # Generate SQL commands or show dry-run analysis
+    if dry_run:
+        console.print(Panel.fit("[bold yellow]Dry Run Analysis - No Commands Generated[/bold yellow]"))
+        console.print("[dim]# This is a dry run - showing what would be recommended[/dim]")
+        console.print("[dim]# Use --execute flag to generate actual SQL commands[/dim]")
+        console.print()
+
+        safe_moves = 0
+        zone_conflicts = 0
+        space_issues = 0
+
+        for i, rec in enumerate(recommendations, 1):
+            if validate:
+                is_safe, safety_msg = analyzer.validate_move_safety(rec, max_disk_usage_percent=max_disk_usage)
+                if not is_safe:
+                    if "zone conflict" in safety_msg.lower():
+                        zone_conflicts += 1
+                        console.print(f"[yellow]⚠ Move {i}: WOULD BE SKIPPED - {safety_msg}[/yellow]")
+                    elif "space" in safety_msg.lower():
+                        space_issues += 1
+                        console.print(f"[yellow]⚠ Move {i}: WOULD BE SKIPPED - {safety_msg}[/yellow]")
+                    else:
+                        console.print(f"[yellow]⚠ Move {i}: WOULD BE SKIPPED - {safety_msg}[/yellow]")
+                    continue
+                safe_moves += 1
+
+            console.print(f"[green]✓ Move {i}: WOULD EXECUTE - {rec.reason}[/green]")
+            console.print(f"[dim]  Target SQL: {rec.to_sql()}[/dim]")
+
+        console.print()
+        console.print(f"[bold]Dry Run Summary:[/bold]")
+        console.print(f"  • Safe moves that would execute: [green]{safe_moves}[/green]")
+        console.print(f"  • Zone conflicts prevented: [yellow]{zone_conflicts}[/yellow]")
+        console.print(f"  • Space-related issues: [yellow]{space_issues}[/yellow]")
+        if safe_moves > 0:
+            console.print(f"\n[green]✓ Ready to execute {safe_moves} safe moves. Use --execute to generate SQL commands.[/green]")
+        else:
+            console.print(f"\n[yellow]⚠ No safe moves identified. Review cluster balance or adjust parameters.[/yellow]")
+    else:
+        console.print(Panel.fit("[bold green]Generated SQL Commands[/bold green]"))
+        console.print("[dim]# Copy and paste these commands to execute the moves[/dim]")
+        console.print("[dim]# ALWAYS test in a non-production environment first![/dim]")
+        console.print("[dim]# These commands only operate on healthy shards (STARTED + fully recovered)[/dim]")
+        console.print("[dim]# Commands use quoted identifiers for schema and table names[/dim]")
+        console.print()
+
+        safe_moves = 0
+        zone_conflicts = 0
+        for i, rec in enumerate(recommendations, 1):
+            if validate:
+                is_safe, safety_msg = analyzer.validate_move_safety(rec, max_disk_usage_percent=max_disk_usage)
+                if not is_safe:
+                    if "Zone conflict" in safety_msg:
+                        zone_conflicts += 1
+                        console.print(f"-- Move {i}: SKIPPED - {safety_msg}")
+                        console.print(f"--   Tip: Try moving to a different zone or check existing shard distribution")
+                    else:
+                        console.print(f"-- Move {i}: SKIPPED - {safety_msg}")
+                    continue
+                safe_moves += 1
+
+            console.print(f"-- Move {i}: {rec.reason}")
+            console.print(f"{rec.to_sql()}")
+        console.print()
+
+        # Auto-execution if requested
+        if auto_execute:
+            _execute_recommendations_safely(client, recommendations, validate)
+
+    if validate and safe_moves < len(recommendations):
+        if zone_conflicts > 0:
+            console.print(f"[yellow]Warning: {zone_conflicts} moves skipped due to zone conflicts[/yellow]")
+            console.print(f"[yellow]Tip: Use 'find-candidates' to see current shard distribution across zones[/yellow]")
+        console.print(f"[yellow]Warning: Only {safe_moves} of {len(recommendations)} moves passed safety validation[/yellow]")
+
+
+@main.command()
+@click.option('--connection-string', help='Override connection string from .env')
+@click.pass_context
+def test_connection(ctx, connection_string: Optional[str]):
+    """Test connection to CrateDB cluster"""
+    try:
+        if connection_string:
+            client = CrateDBClient(connection_string)
+        else:
+            client = CrateDBClient()
+
+        if client.test_connection():
+            console.print("[green]✓ Connection successful![/green]")
+
+            # Get basic cluster info
+            nodes = client.get_nodes_info()
+            console.print(f"Connected to cluster with {len(nodes)} nodes:")
+            for node in nodes:
+                console.print(f"  • {node.name} (zone: {node.zone})")
+        else:
+            console.print("[red]✗ Connection failed[/red]")
+            sys.exit(1)
+
+    except Exception as e:
+        console.print(f"[red]✗ Connection error: {e}[/red]")
+        sys.exit(1)
+
+
+@main.command()
+@click.option('--table', '-t', help='Check balance for specific table only')
+@click.option('--tolerance', default=10.0, help='Zone balance tolerance percentage (default: 10)')
+@click.pass_context
+def check_balance(ctx, table: Optional[str], tolerance: float):
+    """Check zone balance for shards"""
+    client = ctx.obj['client']
+    analyzer = ShardAnalyzer(client)
+
+    console.print(Panel.fit("[bold blue]Zone Balance Check[/bold blue]"))
+    console.print("[dim]Note: Analyzing all shards regardless of state for complete cluster view[/dim]")
+    console.print()
+
+    zone_stats = analyzer.check_zone_balance(table, tolerance)
+
+    if not zone_stats:
+        console.print("[yellow]No shards found for analysis[/yellow]")
+        return
+
+    # Calculate totals and targets
+    total_shards = sum(stats['TOTAL'] for stats in zone_stats.values())
+    zones = list(zone_stats.keys())
+    target_per_zone = total_shards // len(zones) if zones else 0
+    tolerance_range = (
+        target_per_zone * (1 - tolerance / 100),
+        target_per_zone * (1 + tolerance / 100)
+    )
+
+    balance_table = Table(title=f"Zone Balance Analysis (Target: {target_per_zone} ±{tolerance}%)", box=box.ROUNDED)
+    balance_table.add_column("Zone", style="cyan")
+    balance_table.add_column("Primary", justify="right", style="blue")
+    balance_table.add_column("Replica", justify="right", style="green")
+    balance_table.add_column("Total", justify="right", style="magenta")
+    balance_table.add_column("Status", style="bold")
+
+    for zone, stats in zone_stats.items():
+        total = stats['TOTAL']
+
+        if tolerance_range[0] <= total <= tolerance_range[1]:
+            status = "[green]✓ Balanced[/green]"
+        elif total < tolerance_range[0]:
+            status = f"[yellow]⚠ Under ({total - target_per_zone:+})[/yellow]"
+        else:
+            status = f"[red]⚠ Over ({total - target_per_zone:+})[/red]"
+
+        balance_table.add_row(
+            zone,
+            str(stats['PRIMARY']),
+            str(stats['REPLICA']),
+            str(total),
+            status
+        )
+
+    console.print(balance_table)
+
+
+@main.command()
+@click.option('--table', '-t', help='Analyze zones for specific table only')
+@click.option('--show-shards/--no-show-shards', default=False, help='Show individual shard details (default: False)')
+@click.pass_context
+def zone_analysis(ctx, table: Optional[str], show_shards: bool):
+    """Detailed analysis of zone distribution and potential conflicts"""
+    client = ctx.obj['client']
+
+    console.print(Panel.fit("[bold blue]Detailed Zone Analysis[/bold blue]"))
+    console.print("[dim]Comprehensive zone distribution analysis for CrateDB cluster[/dim]")
+    console.print()
+
+    # Get all shards for analysis
+    shards = client.get_shards_info(table_name=table, for_analysis=True)
+
+    if not shards:
+        console.print("[yellow]No shards found for analysis[/yellow]")
+        return
+
+    # Organize by table and shard
+    tables = {}
+    for shard in shards:
+        table_key = f"{shard.schema_name}.{shard.table_name}"
+        if table_key not in tables:
+            tables[table_key] = {}
+
+        shard_key = shard.shard_id
+        if shard_key not in tables[table_key]:
+            tables[table_key][shard_key] = []
+
+        tables[table_key][shard_key].append(shard)
+
+    # Analyze each table
+    zone_conflicts = 0
+    under_replicated = 0
+
+    for table_name, table_shards in tables.items():
+        console.print(f"\n[bold cyan]Table: {table_name}[/bold cyan]")
+
+        # Create analysis table
+        analysis_table = Table(title=f"Shard Distribution for {table_name}", box=box.ROUNDED)
+        analysis_table.add_column("Shard ID", justify="right", style="magenta")
+        analysis_table.add_column("Primary Zone", style="blue")
+        analysis_table.add_column("Replica Zones", style="green")
+        analysis_table.add_column("Total Copies", justify="right", style="cyan")
+        analysis_table.add_column("Status", style="bold")
+
+        for shard_id, shard_copies in sorted(table_shards.items()):
+            primary_zone = "Unknown"
+            replica_zones = set()
+            total_copies = len(shard_copies)
+            zones_with_copies = set()
+
+            for shard_copy in shard_copies:
+                zones_with_copies.add(shard_copy.zone)
+                if shard_copy.is_primary:
+                    primary_zone = shard_copy.zone
+                else:
+                    replica_zones.add(shard_copy.zone)
+
+            # Determine status
+            status_parts = []
+            if len(zones_with_copies) == 1:
+                zone_conflicts += 1
+                status_parts.append("[red]⚠ ZONE CONFLICT[/red]")
+
+            if total_copies < 2:  # Assuming we want at least 1 replica
+                under_replicated += 1
+                status_parts.append("[yellow]⚠ Under-replicated[/yellow]")
+
+            if not status_parts:
+                status_parts.append("[green]✓ Good[/green]")
+
+            replica_zones_str = ", ".join(sorted(replica_zones)) if replica_zones else "None"
+
+            analysis_table.add_row(
+                str(shard_id),
+                primary_zone,
+                replica_zones_str,
+                str(total_copies),
+                " ".join(status_parts)
+            )
+
+            # Show individual shard details if requested
+            if show_shards:
+                for shard_copy in shard_copies:
+                    health_indicator = "✓" if shard_copy.routing_state == 'STARTED' else "⚠"
+                    console.print(f"    {health_indicator} {shard_copy.shard_type} on {shard_copy.node_name} ({shard_copy.zone}) - {shard_copy.routing_state}")
+
+        console.print(analysis_table)
+
+    # Summary
+    console.print(f"\n[bold]Zone Analysis Summary:[/bold]")
+    console.print(f"  • Tables analyzed: [cyan]{len(tables)}[/cyan]")
+    console.print(f"  • Zone conflicts detected: [red]{zone_conflicts}[/red]")
+    console.print(f"  • Under-replicated shards: [yellow]{under_replicated}[/yellow]")
+
+    if zone_conflicts > 0:
+        console.print(f"\n[red]⚠ Found {zone_conflicts} zone conflicts that need attention![/red]")
+        console.print("[dim]Zone conflicts occur when all copies of a shard are in the same zone.[/dim]")
+        console.print("[dim]This violates CrateDB's zone-awareness and creates availability risks.[/dim]")
+
+    if under_replicated > 0:
+        console.print(f"\n[yellow]⚠ Found {under_replicated} under-replicated shards.[/yellow]")
+        console.print("[dim]Consider increasing replication for better availability.[/dim]")
+
+    if zone_conflicts == 0 and under_replicated == 0:
+        console.print("\n[green]✓ No critical zone distribution issues detected![/green]")
+
+
+# @main.command()
+# @click.argument('node_name')
+# @click.option('--min-free-space', default=100.0, help='Minimum free space required on target nodes in GB (default: 100)')
+# @click.option('--dry-run/--execute', default=True, help='Show decommission plan without generating SQL commands (default: True)')
+# @click.pass_context
+# def decommission(ctx, node_name: str, min_free_space: float, dry_run: bool):
+#     """Plan decommissioning of a node by analyzing required shard moves
+#
+#     NODE_NAME: Name of the node to decommission
+#     """
+#     client = ctx.obj['client']
+#     analyzer = ShardAnalyzer(client)
+#
+#     mode_text = "PLANNING MODE" if dry_run else "EXECUTION MODE"
+#     console.print(Panel.fit(f"[bold blue]Node Decommission Analysis[/bold blue] - [bold {'green' if dry_run else 'red'}]{mode_text}[/bold {'green' if dry_run else 'red'}]"))
+#     console.print(f"[dim]Analyzing decommission plan for node: {node_name}[/dim]")
+#     console.print()
+#
+#     # Generate decommission plan
+#     plan = analyzer.plan_node_decommission(node_name, min_free_space)
+#
+#     if 'error' in plan:
+#         console.print(f"[red]Error: {plan['error']}[/red]")
+#         return
+#
+#     # Display plan summary
+#     summary_table = Table(title=f"Decommission Plan for {node_name}", box=box.ROUNDED)
+#     summary_table.add_column("Metric", style="cyan")
+#     summary_table.add_column("Value", style="magenta")
+#
+#     summary_table.add_row("Node", plan['node'])
+#     summary_table.add_row("Zone", plan['zone'])
+#     summary_table.add_row("Feasible", "[green]✓ Yes[/green]" if plan['feasible'] else "[red]✗ No[/red]")
+#     summary_table.add_row("Shards to Move", str(plan['shards_to_move']))
+#     summary_table.add_row("Moveable Shards", str(plan['moveable_shards']))
+#     summary_table.add_row("Total Data Size", format_size(plan['total_size_gb']))
+#     summary_table.add_row("Estimated Time", f"{plan['estimated_time_hours']:.1f} hours")
+#
+#     console.print(summary_table)
+#     console.print()
+#
+#     # Show warnings if any
+#     if plan['warnings']:
+#         console.print("[bold yellow]⚠ Warnings:[/bold yellow]")
+#         for warning in plan['warnings']:
+#             console.print(f"  • [yellow]{warning}[/yellow]")
+#         console.print()
+#
+#     # Show infeasible moves if any
+#     if plan['infeasible_moves']:
+#         console.print("[bold red]✗ Cannot Move:[/bold red]")
+#         infeasible_table = Table(box=box.ROUNDED)
+#         infeasible_table.add_column("Shard", style="cyan")
+#         infeasible_table.add_column("Size", style="magenta")
+#         infeasible_table.add_column("Reason", style="red")
+#
+#         for move in plan['infeasible_moves']:
+#             infeasible_table.add_row(
+#                 move['shard'],
+#                 format_size(move['size_gb']),
+#                 move['reason']
+#             )
+#         console.print(infeasible_table)
+#         console.print()
+#
+#     # Show move recommendations
+#     if plan['recommendations']:
+#         move_table = Table(title="Required Shard Moves", box=box.ROUNDED)
+#         move_table.add_column("Table", style="cyan")
+#         move_table.add_column("Shard", justify="right", style="magenta")
+#         move_table.add_column("Type", style="blue")
+#         move_table.add_column("Size", style="green")
+#         move_table.add_column("From Zone", style="yellow")
+#         move_table.add_column("To Node", style="cyan")
+#         move_table.add_column("To Zone", style="yellow")
+#
+#         for rec in plan['recommendations']:
+#             move_table.add_row(
+#                 f"{rec.schema_name}.{rec.table_name}",
+#                 str(rec.shard_id),
+#                 rec.shard_type,
+#                 format_size(rec.size_gb),
+#                 rec.from_zone,
+#                 rec.to_node,
+#                 rec.to_zone
+#             )
+#
+#         console.print(move_table)
+#         console.print()
+#
+#         # Generate SQL commands if not in dry-run mode
+#         if not dry_run and plan['feasible']:
+#             console.print(Panel.fit("[bold green]Decommission SQL Commands[/bold green]"))
+#             console.print("[dim]# Execute these commands in order to prepare for node decommission[/dim]")
+#             console.print("[dim]# ALWAYS test in a non-production environment first![/dim]")
+#             console.print("[dim]# Monitor shard health after each move before proceeding[/dim]")
+#             console.print()
+#
+#             for i, rec in enumerate(plan['recommendations'], 1):
+#                 console.print(f"-- Move {i}: {rec.reason}")
+#                 console.print(f"{rec.to_sql()}")
+#                 console.print()
+#
+#             console.print(f"-- After all moves complete, the node {node_name} can be safely removed")
+#             console.print(f"-- Total moves required: {len(plan['recommendations'])}")
+#         elif dry_run:
+#             console.print("[green]✓ Decommission plan ready. Use --execute to generate SQL commands.[/green]")
+#
+#     # Final status
+#     if not plan['feasible']:
+#         console.print(f"[red]⚠ Node {node_name} cannot be safely decommissioned at this time.[/red]")
+#         console.print("[dim]Address the issues above before attempting decommission.[/dim]")
+#     elif plan['shards_to_move'] == 0:
+#         console.print(f"[green]✓ Node {node_name} is ready for immediate decommission (no shards to move).[/green]")
+#     else:
+#         console.print(f"[green]✓ Node {node_name} can be safely decommissioned after moving {len(plan['recommendations'])} shards.[/green]")
+
+
+@main.command()
+@click.argument('schema_table')
+@click.argument('shard_id', type=int)
+@click.argument('from_node')
+@click.argument('to_node')
+@click.option('--max-disk-usage', default=90.0, help='Maximum disk usage percentage for target node (default: 90)')
+
+@click.pass_context
+def validate_move(ctx, schema_table: str, shard_id: int, from_node: str, to_node: str, max_disk_usage: float):
+    """Validate a specific shard move before execution
+
+    SCHEMA_TABLE: Schema and table name (format: schema.table)
+    SHARD_ID: Shard ID to move
+    FROM_NODE: Source node name
+    TO_NODE: Target node name
+
+    Example: xmover validate-move CUROV.maddoxxFormfactor 4 data-hot-1 data-hot-3
+    """
+    client = ctx.obj['client']
+    analyzer = ShardAnalyzer(client)
+
+    # Parse schema and table
+    if '.' not in schema_table:
+        console.print("[red]Error: Schema and table must be in format 'schema.table'[/red]")
+        return
+
+    schema_name, table_name = schema_table.split('.', 1)
+
+    console.print(Panel.fit(f"[bold blue]Validating Shard Move[/bold blue]"))
+    console.print(f"[dim]Move: {schema_name}.{table_name}[{shard_id}] from {from_node} to {to_node}[/dim]")
+    console.print()
+
+    # Find the nodes
+    from_node_info = None
+    to_node_info = None
+    for node in analyzer.nodes:
+        if node.name == from_node:
+            from_node_info = node
+        if node.name == to_node:
+            to_node_info = node
+
+    if not from_node_info:
+        console.print(f"[red]✗ Source node '{from_node}' not found in cluster[/red]")
+        return
+
+    if not to_node_info:
+        console.print(f"[red]✗ Target node '{to_node}' not found in cluster[/red]")
+        return
+
+    # Find the specific shard
+    target_shard = None
+    for shard in analyzer.shards:
+        if (shard.schema_name == schema_name and
+            shard.table_name == table_name and
+            shard.shard_id == shard_id and
+            shard.node_name == from_node):
+            target_shard = shard
+            break
+
+    if not target_shard:
+        console.print(f"[red]✗ Shard {shard_id} not found on node {from_node}[/red]")
+        console.print(f"[dim]Use 'xmover find-candidates' to see available shards[/dim]")
+        return
+
+    # Create a move recommendation for validation
+    recommendation = MoveRecommendation(
+        table_name=table_name,
+        schema_name=schema_name,
+        shard_id=shard_id,
+        from_node=from_node,
+        to_node=to_node,
+        from_zone=from_node_info.zone,
+        to_zone=to_node_info.zone,
+        shard_type=target_shard.shard_type,
+        size_gb=target_shard.size_gb,
+        reason="Manual validation"
+    )
+
+    # Display shard details
+    details_table = Table(title="Shard Details", box=box.ROUNDED)
+    details_table.add_column("Property", style="cyan")
+    details_table.add_column("Value", style="magenta")
+
+    details_table.add_row("Table", f"{schema_name}.{table_name}")
+    details_table.add_row("Shard ID", str(shard_id))
+    details_table.add_row("Type", target_shard.shard_type)
+    details_table.add_row("Size", format_size(target_shard.size_gb))
+    details_table.add_row("Documents", f"{target_shard.num_docs:,}")
+    details_table.add_row("State", target_shard.state)
+    details_table.add_row("Routing State", target_shard.routing_state)
+    details_table.add_row("From Node", f"{from_node} ({from_node_info.zone})")
+    details_table.add_row("To Node", f"{to_node} ({to_node_info.zone})")
+    details_table.add_row("Zone Change", "Yes" if from_node_info.zone != to_node_info.zone else "No")
+
+    console.print(details_table)
+    console.print()
+
+    # Perform comprehensive validation
+    is_safe, safety_msg = analyzer.validate_move_safety(recommendation, max_disk_usage_percent=max_disk_usage)
+
+    if is_safe:
+        console.print("[green]✓ VALIDATION PASSED - Move appears safe[/green]")
+        console.print(f"[green]✓ {safety_msg}[/green]")
+        console.print()
+
+        # Show the SQL command
+        console.print(Panel.fit("[bold green]Ready to Execute[/bold green]"))
+        console.print("[dim]# Copy and paste this command to execute the move[/dim]")
+        console.print()
+        console.print(f"{recommendation.to_sql()}")
+        console.print()
+        console.print("[dim]# Monitor shard health after execution[/dim]")
+        console.print("[dim]# Check with: SELECT * FROM sys.shards WHERE table_name = '{table_name}' AND id = {shard_id};[/dim]")
+    else:
+        console.print("[red]✗ VALIDATION FAILED - Move not safe[/red]")
+        console.print(f"[red]✗ {safety_msg}[/red]")
+        console.print()
+
+        # Provide troubleshooting guidance
+        if "zone conflict" in safety_msg.lower():
+            console.print("[yellow]💡 Troubleshooting Zone Conflicts:[/yellow]")
+            console.print("  • Check current shard distribution: xmover zone-analysis --show-shards")
+            console.print("  • Try moving to a different zone")
+            console.print("  • Verify cluster has proper zone-awareness configuration")
+        elif "node conflict" in safety_msg.lower():
+            console.print("[yellow]💡 Troubleshooting Node Conflicts:[/yellow]")
+            console.print("  • The target node already has a copy of this shard")
+            console.print("  • Choose a different target node")
+            console.print("  • Check shard distribution: xmover analyze")
+        elif "space" in safety_msg.lower():
+            console.print("[yellow]💡 Troubleshooting Space Issues:[/yellow]")
+            console.print("  • Free up space on the target node")
+            console.print("  • Choose a node with more available capacity")
+            console.print("  • Check node capacity: xmover analyze")
+        elif "usage" in safety_msg.lower():
+            console.print("[yellow]💡 Troubleshooting High Disk Usage:[/yellow]")
+            console.print("  • Wait for target node disk usage to decrease")
+            console.print("  • Choose a node with lower disk usage")
+            console.print("  • Check cluster health: xmover analyze")
+            console.print("  • Consider using --max-disk-usage option for urgent moves")
+
+
+@main.command()
+@click.argument('error_message', required=False)
+@click.pass_context
+def explain_error(ctx, error_message: Optional[str]):
+    """Explain CrateDB allocation error messages and provide solutions
+
+    ERROR_MESSAGE: The CrateDB error message to analyze (optional - can be provided interactively)
+
+    Example: xmover explain-error "NO(a copy of this shard is already allocated to this node)"
+    """
+    console.print(Panel.fit("[bold blue]CrateDB Error Message Decoder[/bold blue]"))
+    console.print("[dim]Helps decode and troubleshoot CrateDB shard allocation errors[/dim]")
+    console.print()
+
+    if not error_message:
+        console.print("Please paste the CrateDB error message (press Enter twice when done):")
+        lines = []
+        while True:
+            try:
+                line = input()
+                if line.strip() == "" and lines:
+                    break
+                lines.append(line)
+            except (EOFError, KeyboardInterrupt):
+                break
+        error_message = "\n".join(lines)
+
+    if not error_message.strip():
+        console.print("[yellow]No error message provided[/yellow]")
+        return
+
+    console.print(f"[dim]Analyzing error message...[/dim]")
+    console.print()
+
+    # Common CrateDB allocation error patterns and solutions
+    error_patterns = [
+        {
+            "pattern": "a copy of this shard is already allocated to this node",
+            "title": "Node Already Has Shard Copy",
+            "explanation": "The target node already contains a copy (primary or replica) of this shard.",
+            "solutions": [
+                "Choose a different target node that doesn't have this shard",
+                "Use 'xmover zone-analysis --show-shards' to see current distribution",
+                "Verify the shard ID and table name are correct"
+            ],
+            "prevention": "Always check current shard locations before moving"
+        },
+        {
+            "pattern": "there are too many copies of the shard allocated to nodes with attribute",
+            "title": "Zone Allocation Limit Exceeded",
+            "explanation": "CrateDB's zone awareness prevents too many copies in the same zone.",
+            "solutions": [
+                "Move the shard to a different availability zone",
+                "Check zone balance with 'xmover check-balance'",
+                "Ensure target zone doesn't already have copies of this shard"
+            ],
+            "prevention": "Use 'xmover recommend' which respects zone constraints"
+        },
+        {
+            "pattern": "not enough disk space",
+            "title": "Insufficient Disk Space",
+            "explanation": "The target node doesn't have enough free disk space for the shard.",
+            "solutions": [
+                "Free up space on the target node",
+                "Choose a node with more available capacity",
+                "Check available space with 'xmover analyze'"
+            ],
+            "prevention": "Use '--min-free-space' parameter in recommendations"
+        },
+        {
+            "pattern": "shard recovery limit",
+            "title": "Recovery Limit Exceeded",
+            "explanation": "Too many shards are currently being moved/recovered simultaneously.",
+            "solutions": [
+                "Wait for current recoveries to complete",
+                "Check recovery status in CrateDB admin UI",
+                "Reduce concurrent recoveries in cluster settings"
+            ],
+            "prevention": "Move shards gradually, monitor recovery progress"
+        },
+        {
+            "pattern": "allocation is disabled",
+            "title": "Allocation Disabled",
+            "explanation": "Shard allocation is temporarily disabled in the cluster.",
+            "solutions": [
+                "Re-enable allocation: PUT /_cluster/settings {\"persistent\":{\"cluster.routing.allocation.enable\":\"all\"}}",
+                "Check if allocation was disabled for maintenance",
+                "Verify cluster health before re-enabling"
+            ],
+            "prevention": "Check allocation status before performing moves"
+        }
+    ]
+
+    # Find matching patterns
+    matches = []
+    error_lower = error_message.lower()
+
+    for pattern_info in error_patterns:
+        if pattern_info["pattern"].lower() in error_lower:
+            matches.append(pattern_info)
+
+    if matches:
+        for i, match in enumerate(matches):
+            if i > 0:
+                console.print("\n" + "─" * 60 + "\n")
+
+            console.print(f"[bold red]🚨 {match['title']}[/bold red]")
+            console.print(f"[yellow]📝 Explanation:[/yellow] {match['explanation']}")
+            console.print()
+
+            console.print("[green]💡 Solutions:[/green]")
+            for j, solution in enumerate(match['solutions'], 1):
+                console.print(f"  {j}. {solution}")
+            console.print()
+
+            console.print(f"[blue]🛡️ Prevention:[/blue] {match['prevention']}")
+    else:
+        console.print("[yellow]⚠ No specific pattern match found[/yellow]")
+        console.print()
+        console.print("[bold]General Troubleshooting Steps:[/bold]")
+        console.print("1. Check current shard distribution: [cyan]xmover analyze[/cyan]")
+        console.print("2. Validate the specific move: [cyan]xmover validate-move schema.table shard_id from_node to_node[/cyan]")
+        console.print("3. Check zone conflicts: [cyan]xmover zone-analysis --show-shards[/cyan]")
+        console.print("4. Verify node capacity: [cyan]xmover analyze[/cyan]")
+        console.print("5. Review CrateDB documentation on shard allocation")
+
+    console.print()
+    console.print("[dim]💡 Tip: Use 'xmover validate-move' to check moves before execution[/dim]")
+    console.print("[dim]📚 For more help: https://crate.io/docs/crate/reference/en/latest/admin/system-information.html[/dim]")
+
+
+@main.command()
+@click.option('--table', '-t', help='Monitor recovery for specific table only')
+@click.option('--node', '-n', help='Monitor recovery on specific node only')
+@click.option('--watch', '-w', is_flag=True, help='Continuously monitor (refresh every 10s)')
+@click.option('--refresh-interval', default=10, help='Refresh interval for watch mode (seconds)')
+@click.option('--recovery-type', type=click.Choice(['PEER', 'DISK', 'all']), default='all', help='Filter by recovery type')
+@click.option('--include-transitioning', is_flag=True, help='Include completed recoveries still in transitioning state')
+@click.pass_context
+def monitor_recovery(ctx, table: str, node: str, watch: bool, refresh_interval: int, recovery_type: str, include_transitioning: bool):
+    """Monitor active shard recovery operations on the cluster
+
+    This command monitors ongoing shard recoveries by querying sys.allocations
+    and sys.shards tables. It shows recovery progress, type (PEER/DISK), and timing.
+
+    By default, only shows actively progressing recoveries. Use --include-transitioning
+    to also see completed recoveries that haven't fully transitioned to STARTED state.
+
+    Examples:
+        xmover monitor-recovery                        # Show active recoveries only
+        xmover monitor-recovery --include-transitioning # Show active + transitioning
+        xmover monitor-recovery --table myTable       # Monitor specific table
+        xmover monitor-recovery --watch                # Continuous monitoring
+        xmover monitor-recovery --recovery-type PEER  # Only PEER recoveries
+    """
+    try:
+        client = ctx.obj['client']
+        recovery_monitor = RecoveryMonitor(client)
+
+        if watch:
+
+            console.print(f"🔄 Monitoring shard recoveries (refreshing every {refresh_interval}s)")
+            console.print("Press Ctrl+C to stop")
+            console.print()
+
+            try:
+                # Show header once
+                console.print("📊 Recovery Progress Monitor")
+                console.print("=" * 80)
+
+                # Track previous state for change detection
+                previous_recoveries = {}
+                previous_timestamp = None
+                first_run = True
+
+                while True:
+                    # Get current recovery status
+                    recoveries = recovery_monitor.get_cluster_recovery_status(
+                        table_name=table,
+                        node_name=node,
+                        recovery_type_filter=recovery_type,
+                        include_transitioning=include_transitioning
+                    )
+
+                    # Display current time
+                    from datetime import datetime
+                    current_time = datetime.now().strftime("%H:%M:%S")
+
+                    # Check for any changes
+                    changes = []
+                    active_count = 0
+                    completed_count = 0
+
+                    for recovery in recoveries:
+                        recovery_key = f"{recovery.schema_name}.{recovery.table_name}.{recovery.shard_id}.{recovery.node_name}"
+
+                        # Create complete table name
+                        if recovery.schema_name == "doc":
+                            table_display = recovery.table_name
+                        else:
+                            table_display = f"{recovery.schema_name}.{recovery.table_name}"
+
+                        # Count active vs completed
+                        if recovery.stage == "DONE" and recovery.overall_progress >= 100.0:
+                            completed_count += 1
+                        else:
+                            active_count += 1
+
+                        # Check for changes since last update
+                        if recovery_key in previous_recoveries:
+                            prev = previous_recoveries[recovery_key]
+                            if prev['progress'] != recovery.overall_progress:
+                                diff = recovery.overall_progress - prev['progress']
+                                # Create node route display
+                                node_route = ""
+                                if recovery.recovery_type == "PEER" and recovery.source_node_name:
+                                    node_route = f" {recovery.source_node_name} → {recovery.node_name}"
+                                elif recovery.recovery_type == "DISK":
+                                    node_route = f" disk → {recovery.node_name}"
+
+                                # Add translog info
+                                translog_info = format_translog_info(recovery)
+                                
+                                if diff > 0:
+                                    changes.append(f"[green]📈[/green] {table_display} S{recovery.shard_id} {recovery.overall_progress:.1f}% (+{diff:.1f}%) {recovery.size_gb:.1f}GB{translog_info}{node_route}")
+                                else:
+                                    changes.append(f"[yellow]📉[/yellow] {table_display} S{recovery.shard_id} {recovery.overall_progress:.1f}% ({diff:.1f}%) {recovery.size_gb:.1f}GB{translog_info}{node_route}")
+                            elif prev['stage'] != recovery.stage:
+                                # Create node route display
+                                node_route = ""
+                                if recovery.recovery_type == "PEER" and recovery.source_node_name:
+                                    node_route = f" {recovery.source_node_name} → {recovery.node_name}"
+                                elif recovery.recovery_type == "DISK":
+                                    node_route = f" disk → {recovery.node_name}"
+
+                                # Add translog info
+                                translog_info = format_translog_info(recovery)
+                                
+                                changes.append(f"[blue]🔄[/blue] {table_display} S{recovery.shard_id} {prev['stage']}→{recovery.stage} {recovery.size_gb:.1f}GB{translog_info}{node_route}")
+                        else:
+                            # New recovery - show based on include_transitioning flag or first run
+                            if first_run or include_transitioning or (recovery.overall_progress < 100.0 or recovery.stage != "DONE"):
+                                # Create node route display
+                                node_route = ""
+                                if recovery.recovery_type == "PEER" and recovery.source_node_name:
+                                    node_route = f" {recovery.source_node_name} → {recovery.node_name}"
+                                elif recovery.recovery_type == "DISK":
+                                    node_route = f" disk → {recovery.node_name}"
+
+                                status_icon = "[cyan]🆕[/cyan]" if not first_run else "[blue]📋[/blue]"
+                                # Add translog info
+                                translog_info = format_translog_info(recovery)
+                                
+                                changes.append(f"{status_icon} {table_display} S{recovery.shard_id} {recovery.stage} {recovery.overall_progress:.1f}% {recovery.size_gb:.1f}GB{translog_info}{node_route}")
+
+                        # Store current state for next comparison
+                        previous_recoveries[recovery_key] = {
+                            'progress': recovery.overall_progress,
+                            'stage': recovery.stage
+                        }
+
+                    # Always show a status line
+                    if not recoveries:
+                        console.print(f"{current_time} | [green]No recoveries - cluster stable[/green]")
+                        previous_recoveries.clear()
+                    else:
+                        # Build status message
+                        status = ""
+                        if active_count > 0:
+                            status = f"{active_count} active"
+                        if completed_count > 0:
+                            status += f", {completed_count} done" if status else f"{completed_count} done"
+
+                        # Show status line with changes or periodic update
+                        if changes:
+                            console.print(f"{current_time} | {status}")
+                            for change in changes:
+                                console.print(f"         | {change}")
+                        else:
+                            # Show periodic status even without changes
+                            if include_transitioning and completed_count > 0:
+                                console.print(f"{current_time} | {status} (transitioning)")
+                            elif active_count > 0:
+                                console.print(f"{current_time} | {status} (no changes)")
+
+                    previous_timestamp = current_time
+                    first_run = False
+                    time.sleep(refresh_interval)
+
+            except KeyboardInterrupt:
+                console.print("\n\n[yellow]⏹  Monitoring stopped by user[/yellow]")
+
+                # Show final summary
+                final_recoveries = recovery_monitor.get_cluster_recovery_status(
+                    table_name=table,
+                    node_name=node,
+                    recovery_type_filter=recovery_type,
+                    include_transitioning=include_transitioning
+                )
+
+                if final_recoveries:
+                    console.print("\n📊 [bold]Final Recovery Summary:[/bold]")
+                    summary = recovery_monitor.get_recovery_summary(final_recoveries)
+
+                    # Count active vs completed
+                    active_count = len([r for r in final_recoveries if r.overall_progress < 100.0 or r.stage != "DONE"])
+                    completed_count = len(final_recoveries) - active_count
+
+                    console.print(f"   Total recoveries: {summary['total_recoveries']}")
+                    console.print(f"   Active: {active_count}, Completed: {completed_count}")
+                    console.print(f"   Total size: {summary['total_size_gb']:.1f} GB")
+                    console.print(f"   Average progress: {summary['avg_progress']:.1f}%")
+
+                    if summary['by_type']:
+                        console.print(f"   By recovery type:")
+                        for rec_type, stats in summary['by_type'].items():
+                            console.print(f"     {rec_type}: {stats['count']} recoveries, {stats['avg_progress']:.1f}% avg progress")
+                else:
+                    console.print("\n[green]✅ No active recoveries at exit[/green]")
+
+                return
+
+        else:
+            # Single status check
+            recoveries = recovery_monitor.get_cluster_recovery_status(
+                table_name=table,
+                node_name=node,
+                recovery_type_filter=recovery_type,
+                include_transitioning=include_transitioning
+            )
+
+            display_output = recovery_monitor.format_recovery_display(recoveries)
+            console.print(display_output)
+
+            if not recoveries:
+                if include_transitioning:
+                    console.print("\n[green]✅ No recoveries found (active or transitioning)[/green]")
+                else:
+                    console.print("\n[green]✅ No active recoveries found[/green]")
+                    console.print("[dim]💡 Use --include-transitioning to see completed recoveries still transitioning[/dim]")
+            else:
+                # Show summary
+                summary = recovery_monitor.get_recovery_summary(recoveries)
+                console.print(f"\n📊 [bold]Recovery Summary:[/bold]")
+                console.print(f"   Total recoveries: {summary['total_recoveries']}")
+                console.print(f"   Total size: {summary['total_size_gb']:.1f} GB")
+                console.print(f"   Average progress: {summary['avg_progress']:.1f}%")
+
+                # Show breakdown by type
+                if summary['by_type']:
+                    console.print(f"\n   By recovery type:")
+                    for rec_type, stats in summary['by_type'].items():
+                        console.print(f"     {rec_type}: {stats['count']} recoveries, {stats['avg_progress']:.1f}% avg progress")
+
+                console.print(f"\n[dim]💡 Use --watch flag for continuous monitoring[/dim]")
+
+    except Exception as e:
+        console.print(f"[red]❌ Error monitoring recoveries: {e}[/red]")
+        if ctx.obj.get('debug'):
+            raise
+
+
+def _wait_for_recovery_capacity(client, max_concurrent_recoveries: int = 5):
+    """Wait until active recovery count is below threshold"""
+    from xmover.analyzer import RecoveryMonitor
+    from time import sleep
+    
+    recovery_monitor = RecoveryMonitor(client)
+    wait_time = 0
+    
+    while True:
+        # Check active recoveries (including transitioning)
+        recoveries = recovery_monitor.get_cluster_recovery_status(include_transitioning=True)
+        active_count = len([r for r in recoveries if r.overall_progress < 100.0 or r.stage != "DONE"])
+        
+        if active_count < max_concurrent_recoveries:
+            if wait_time > 0:
+                console.print(f"    [green]✓ Recovery capacity available ({active_count}/{max_concurrent_recoveries} active)[/green]")
+            break
+        else:
+            if wait_time == 0:
+                console.print(f"    [yellow]⏳ Waiting for recovery capacity... ({active_count}/{max_concurrent_recoveries} active)[/yellow]")
+            elif wait_time % 30 == 0:  # Update every 30 seconds
+                console.print(f"    [yellow]⏳ Still waiting... ({active_count}/{max_concurrent_recoveries} active)[/yellow]")
+            
+            sleep(10)  # Check every 10 seconds
+            wait_time += 10
+
+
+def _execute_recommendations_safely(client, recommendations, validate: bool):
+    """Execute recommendations with extensive safety measures"""
+    from time import sleep
+    import sys
+    from xmover.analyzer import ShardAnalyzer
+    
+    # Filter to only safe recommendations
+    safe_recommendations = []
+    if validate:
+        analyzer = ShardAnalyzer(client)
+        for rec in recommendations:
+            is_safe, safety_msg = analyzer.validate_move_safety(rec, max_disk_usage_percent=95.0)
+            if is_safe:
+                safe_recommendations.append(rec)
+    else:
+        safe_recommendations = recommendations
+    
+    if not safe_recommendations:
+        console.print("[yellow]⚠ No safe recommendations to execute[/yellow]")
+        return
+    
+    console.print(f"\n[bold red]🚨 AUTO-EXECUTION MODE 🚨[/bold red]")
+    console.print(f"About to execute {len(safe_recommendations)} shard moves automatically:")
+    console.print()
+    
+    # Show what will be executed
+    for i, rec in enumerate(safe_recommendations, 1):
+        table_display = f"{rec.schema_name}.{rec.table_name}" if rec.schema_name != "doc" else rec.table_name
+        console.print(f"  {i}. {table_display} S{rec.shard_id} ({rec.size_gb:.1f}GB) {rec.from_node} → {rec.to_node}")
+    
+    console.print()
+    console.print("[bold yellow]⚠ SAFETY WARNINGS:[/bold yellow]")
+    console.print("  • These commands will immediately start shard movements")
+    console.print("  • Each move will temporarily impact cluster performance")
+    console.print("  • Recovery time depends on shard size and network speed")
+    console.print("  • You should monitor progress with: xmover monitor-recovery --watch")
+    console.print()
+    
+    # Double confirmation
+    try:
+        response1 = input("Type 'EXECUTE' to proceed with automatic execution: ").strip()
+        if response1 != "EXECUTE":
+            console.print("[yellow]❌ Execution cancelled[/yellow]")
+            return
+        
+        response2 = input(f"Confirm: Execute {len(safe_recommendations)} shard moves? (yes/no): ").strip().lower()
+        if response2 not in ['yes', 'y']:
+            console.print("[yellow]❌ Execution cancelled[/yellow]")
+            return
+            
+    except KeyboardInterrupt:
+        console.print("\n[yellow]❌ Execution cancelled by user[/yellow]")
+        return
+    
+    console.print(f"\n🚀 [bold green]Executing {len(safe_recommendations)} shard moves...[/bold green]")
+    console.print()
+    
+    successful_moves = 0
+    failed_moves = 0
+    
+    for i, rec in enumerate(safe_recommendations, 1):
+        table_display = f"{rec.schema_name}.{rec.table_name}" if rec.schema_name != "doc" else rec.table_name
+        sql_command = rec.to_sql()
+        
+        console.print(f"[{i}/{len(safe_recommendations)}] Executing: {table_display} S{rec.shard_id} ({rec.size_gb:.1f}GB)")
+        console.print(f"    {rec.from_node} → {rec.to_node}")
+        
+        try:
+            # Execute the SQL command
+            result = client.execute_query(sql_command)
+            
+            if result.get('rowcount', 0) >= 0:  # Success indicator for ALTER statements
+                console.print(f"    [green]✅ SUCCESS[/green] - Move initiated")
+                successful_moves += 1
+                
+                # Smart delay: check active recoveries before next move
+                if i < len(safe_recommendations):
+                    _wait_for_recovery_capacity(client, max_concurrent_recoveries=5)
+            else:
+                console.print(f"    [red]❌ FAILED[/red] - Unexpected result: {result}")
+                failed_moves += 1
+                
+        except Exception as e:
+            console.print(f"    [red]❌ FAILED[/red] - Error: {e}")
+            failed_moves += 1
+            
+            # Ask whether to continue after a failure
+            if i < len(safe_recommendations):
+                try:
+                    continue_response = input(f"    Continue with remaining {len(safe_recommendations) - i} moves? (yes/no): ").strip().lower()
+                    if continue_response not in ['yes', 'y']:
+                        console.print("[yellow]⏹ Execution stopped by user[/yellow]")
+                        break
+                except KeyboardInterrupt:
+                    console.print("\n[yellow]⏹ Execution stopped by user[/yellow]")
+                    break
+        
+        console.print()
+    
+    # Final summary
+    console.print(f"📊 [bold]Execution Summary:[/bold]")
+    console.print(f"   Successful moves: [green]{successful_moves}[/green]")
+    console.print(f"   Failed moves: [red]{failed_moves}[/red]")
+    console.print(f"   Total attempted: {successful_moves + failed_moves}")
+    
+    if successful_moves > 0:
+        console.print()
+        console.print("[green]✅ Shard moves initiated successfully![/green]")
+        console.print("[dim]💡 Monitor progress with:[/dim]")
+        console.print("[dim]   xmover monitor-recovery --watch[/dim]")
+        console.print("[dim]💡 Check cluster status with:[/dim]")
+        console.print("[dim]   xmover analyze[/dim]")
+    
+    if failed_moves > 0:
+        console.print()
+        console.print(f"[yellow]⚠ {failed_moves} moves failed - check cluster status and retry if needed[/yellow]")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/cratedb_toolkit/admin/xmover/database.py b/cratedb_toolkit/admin/xmover/database.py
new file mode 100644
index 00000000..ec3a0098
--- /dev/null
+++ b/cratedb_toolkit/admin/xmover/database.py
@@ -0,0 +1,584 @@
+"""
+Database connection and query functions for CrateDB
+"""
+
+import os
+import json
+import requests
+from typing import Dict, List, Optional, Any
+from dataclasses import dataclass
+from dotenv import load_dotenv
+
+
+@dataclass
+class NodeInfo:
+    """Information about a CrateDB node"""
+    id: str
+    name: str
+    zone: str
+    heap_used: int
+    heap_max: int
+    fs_total: int
+    fs_used: int
+    fs_available: int
+    
+    @property
+    def heap_usage_percent(self) -> float:
+        return (self.heap_used / self.heap_max) * 100 if self.heap_max > 0 else 0
+    
+    @property
+    def disk_usage_percent(self) -> float:
+        return (self.fs_used / self.fs_total) * 100 if self.fs_total > 0 else 0
+    
+    @property
+    def available_space_gb(self) -> float:
+        return self.fs_available / (1024**3)
+
+
+@dataclass
+class ShardInfo:
+    """Information about a shard"""
+    table_name: str
+    schema_name: str
+    shard_id: int
+    node_id: str
+    node_name: str
+    zone: str
+    is_primary: bool
+    size_bytes: int
+    size_gb: float
+    num_docs: int
+    state: str
+    routing_state: str
+    
+    @property
+    def shard_type(self) -> str:
+        return "PRIMARY" if self.is_primary else "REPLICA"
+
+
+@dataclass
+class RecoveryInfo:
+    """Information about an active shard recovery"""
+    schema_name: str
+    table_name: str
+    shard_id: int
+    node_name: str
+    node_id: str
+    recovery_type: str  # PEER, DISK, etc.
+    stage: str  # INIT, INDEX, VERIFY_INDEX, TRANSLOG, FINALIZE, DONE
+    files_percent: float
+    bytes_percent: float
+    total_time_ms: int
+    routing_state: str  # INITIALIZING, RELOCATING, etc.
+    current_state: str  # from allocations
+    is_primary: bool
+    size_bytes: int
+    source_node_name: Optional[str] = None  # Source node for PEER recoveries
+    translog_size_bytes: int = 0  # Translog size in bytes
+    
+    @property
+    def overall_progress(self) -> float:
+        """Calculate overall progress percentage"""
+        return max(self.files_percent, self.bytes_percent)
+    
+    @property
+    def size_gb(self) -> float:
+        """Size in GB"""
+        return self.size_bytes / (1024**3)
+    
+    @property
+    def shard_type(self) -> str:
+        return "PRIMARY" if self.is_primary else "REPLICA"
+    
+    @property
+    def total_time_seconds(self) -> float:
+        """Total time in seconds"""
+        return self.total_time_ms / 1000.0
+    
+    @property
+    def translog_size_gb(self) -> float:
+        """Translog size in GB"""
+        return self.translog_size_bytes / (1024**3)
+    
+    @property
+    def translog_percentage(self) -> float:
+        """Translog size as percentage of shard size"""
+        return (self.translog_size_bytes / self.size_bytes * 100) if self.size_bytes > 0 else 0
+
+
+class CrateDBClient:
+    """Client for connecting to CrateDB and executing queries"""
+    
+    def __init__(self, connection_string: Optional[str] = None):
+        load_dotenv()
+        
+        self.connection_string = connection_string or os.getenv('CRATE_CONNECTION_STRING')
+        if not self.connection_string:
+            raise ValueError("CRATE_CONNECTION_STRING not found in environment or provided")
+        
+        self.username = os.getenv('CRATE_USERNAME')
+        self.password = os.getenv('CRATE_PASSWORD')
+        self.ssl_verify = os.getenv('CRATE_SSL_VERIFY', 'true').lower() == 'true'
+        
+        # Ensure connection string ends with _sql endpoint
+        if not self.connection_string.endswith('/_sql'):
+            self.connection_string = self.connection_string.rstrip('/') + '/_sql'
+    
+    def execute_query(self, query: str, parameters: Optional[List] = None) -> Dict[str, Any]:
+        """Execute a SQL query against CrateDB"""
+        payload = {
+            'stmt': query
+        }
+        
+        if parameters:
+            payload['args'] = parameters
+        
+        auth = None
+        if self.username and self.password:
+            auth = (self.username, self.password)
+        
+        try:
+            response = requests.post(
+                self.connection_string,
+                json=payload,
+                auth=auth,
+                verify=self.ssl_verify,
+                timeout=30
+            )
+            response.raise_for_status()
+            return response.json()
+        except requests.exceptions.RequestException as e:
+            raise Exception(f"Failed to execute query: {e}")
+    
+    def get_nodes_info(self) -> List[NodeInfo]:
+        """Get information about all nodes in the cluster"""
+        query = """
+        SELECT 
+            id,
+            name,
+            attributes['zone'] as zone,
+            heap['used'] as heap_used,
+            heap['max'] as heap_max,
+            fs['total']['size'] as fs_total,
+            fs['total']['used'] as fs_used,
+            fs['total']['available'] as fs_available
+        FROM sys.nodes
+        WHERE name IS NOT NULL
+        ORDER BY name
+        """
+        
+        result = self.execute_query(query)
+        nodes = []
+        
+        for row in result.get('rows', []):
+            nodes.append(NodeInfo(
+                id=row[0],
+                name=row[1],
+                zone=row[2] or 'unknown',
+                heap_used=row[3] or 0,
+                heap_max=row[4] or 0,
+                fs_total=row[5] or 0,
+                fs_used=row[6] or 0,
+                fs_available=row[7] or 0
+            ))
+        
+        return nodes
+    
+    def get_shards_info(self, table_name: Optional[str] = None, 
+                       min_size_gb: Optional[float] = None,
+                       max_size_gb: Optional[float] = None,
+                       for_analysis: bool = False) -> List[ShardInfo]:
+        """Get information about shards, optionally filtered by table and size
+        
+        Args:
+            table_name: Filter by specific table
+            min_size_gb: Minimum shard size in GB
+            max_size_gb: Maximum shard size in GB
+            for_analysis: If True, includes all shards regardless of state (for cluster analysis)
+                         If False, only includes healthy shards suitable for operations
+        """
+        
+        where_conditions = []
+        if not for_analysis:
+            # For operations, only include healthy shards
+            where_conditions.extend([
+                "s.routing_state = 'STARTED'",
+                "s.recovery['files']['percent'] = 100.0"
+            ])
+        parameters = []
+        
+        if table_name:
+            where_conditions.append("s.table_name = ?")
+            parameters.append(table_name)
+        
+        if min_size_gb is not None:
+            where_conditions.append("s.size >= ?")
+            parameters.append(int(min_size_gb * 1024**3))  # Convert GB to bytes
+        
+        if max_size_gb is not None:
+            where_conditions.append("s.size <= ?")
+            parameters.append(int(max_size_gb * 1024**3))  # Convert GB to bytes
+        
+        where_clause = ""
+        if where_conditions:
+            where_clause = f"WHERE {' AND '.join(where_conditions)}"
+        
+        query = f"""
+        SELECT 
+            s.table_name,
+            s.schema_name,
+            s.id as shard_id,
+            s.node['id'] as node_id,
+            s.node['name'] as node_name,
+            n.attributes['zone'] as zone,
+            s."primary" as is_primary,
+            s.size as size_bytes,
+            s.size / 1024.0^3 as size_gb,
+            s.num_docs,
+            s.state,
+            s.routing_state
+        FROM sys.shards s
+        JOIN sys.nodes n ON s.node['id'] = n.id
+        {where_clause}
+        ORDER BY s.table_name, s.schema_name, s.id, s."primary" DESC
+        """
+        
+        result = self.execute_query(query, parameters)
+        shards = []
+        
+        for row in result.get('rows', []):
+            shards.append(ShardInfo(
+                table_name=row[0],
+                schema_name=row[1],
+                shard_id=row[2],
+                node_id=row[3],
+                node_name=row[4],
+                zone=row[5] or 'unknown',
+                is_primary=row[6],
+                size_bytes=row[7] or 0,
+                size_gb=float(row[8] or 0),
+                num_docs=row[9] or 0,
+                state=row[10],
+                routing_state=row[11]
+            ))
+        
+        return shards
+    
+    def get_shard_distribution_summary(self, for_analysis: bool = True) -> Dict[str, Any]:
+        """Get a summary of shard distribution across nodes and zones
+        
+        Args:
+            for_analysis: If True, includes all shards for complete cluster analysis
+                         If False, only includes operational shards
+        """
+        where_clause = ""
+        if not for_analysis:
+            where_clause = """
+        WHERE s.routing_state = 'STARTED'
+            AND s.recovery['files']['percent'] = 100.0"""
+        
+        query = f"""
+        SELECT 
+            n.attributes['zone'] as zone,
+            s.node['name'] as node_name,
+            CASE WHEN s."primary" = true THEN 'PRIMARY' ELSE 'REPLICA' END as shard_type,
+            COUNT(*) as shard_count,
+            SUM(s.size) / 1024.0^3 as total_size_gb,
+            AVG(s.size) / 1024.0^3 as avg_size_gb
+        FROM sys.shards s
+        JOIN sys.nodes n ON s.node['id'] = n.id{where_clause}
+        GROUP BY n.attributes['zone'], s.node['name'], s."primary"
+        ORDER BY zone, node_name, shard_type DESC
+        """
+        
+        result = self.execute_query(query)
+        
+        summary = {
+            'by_zone': {},
+            'by_node': {},
+            'totals': {'primary': 0, 'replica': 0, 'total_size_gb': 0}
+        }
+        
+        for row in result.get('rows', []):
+            zone = row[0] or 'unknown'
+            node_name = row[1]
+            shard_type = row[2]
+            shard_count = row[3]
+            total_size_gb = float(row[4] or 0)
+            avg_size_gb = float(row[5] or 0)
+            
+            # By zone summary
+            if zone not in summary['by_zone']:
+                summary['by_zone'][zone] = {'PRIMARY': 0, 'REPLICA': 0, 'total_size_gb': 0}
+            summary['by_zone'][zone][shard_type] += shard_count
+            summary['by_zone'][zone]['total_size_gb'] += total_size_gb
+            
+            # By node summary
+            if node_name not in summary['by_node']:
+                summary['by_node'][node_name] = {
+                    'zone': zone,
+                    'PRIMARY': 0,
+                    'REPLICA': 0,
+                    'total_size_gb': 0
+                }
+            summary['by_node'][node_name][shard_type] += shard_count
+            summary['by_node'][node_name]['total_size_gb'] += total_size_gb
+            
+            # Overall totals
+            if shard_type == 'PRIMARY':
+                summary['totals']['primary'] += shard_count
+            else:
+                summary['totals']['replica'] += shard_count
+            summary['totals']['total_size_gb'] += total_size_gb
+        
+        return summary
+    
+    def test_connection(self) -> bool:
+        """Test the connection to CrateDB"""
+        try:
+            result = self.execute_query("SELECT 1")
+            return result.get('rowcount', 0) >= 0
+        except Exception:
+            return False
+    
+    def get_cluster_watermarks(self) -> Dict[str, Any]:
+        """Get cluster disk watermark settings"""
+        query = """
+        SELECT settings['cluster']['routing']['allocation']['disk']['watermark']
+        FROM sys.cluster
+        """
+        
+        try:
+            result = self.execute_query(query)
+            if result.get('rows'):
+                watermarks = result['rows'][0][0] or {}
+                return {
+                    'low': watermarks.get('low', 'Not set'),
+                    'high': watermarks.get('high', 'Not set'),
+                    'flood_stage': watermarks.get('flood_stage', 'Not set'),
+                    'enable_for_single_data_node': watermarks.get('enable_for_single_data_node', 'Not set')
+                }
+            return {}
+        except Exception:
+            return {}
+    
+    def get_active_recoveries(self, table_name: Optional[str] = None, 
+                            node_name: Optional[str] = None) -> List[Dict[str, Any]]:
+        """Get shards that are currently in recovery states from sys.allocations"""
+        
+        where_conditions = ["current_state != 'STARTED'"]
+        parameters = []
+        
+        if table_name:
+            where_conditions.append("table_name = ?")
+            parameters.append(table_name)
+        
+        if node_name:
+            where_conditions.append("node_id = (SELECT id FROM sys.nodes WHERE name = ?)")
+            parameters.append(node_name)
+        
+        where_clause = f"WHERE {' AND '.join(where_conditions)}"
+        
+        query = f"""
+        SELECT 
+            table_name,
+            shard_id,
+            current_state,
+            explanation,
+            node_id
+        FROM sys.allocations
+        {where_clause}
+        ORDER BY current_state, table_name, shard_id
+        """
+        
+        result = self.execute_query(query, parameters)
+        
+        allocations = []
+        for row in result.get('rows', []):
+            allocations.append({
+                'schema_name': 'doc',  # Default schema since not available in sys.allocations
+                'table_name': row[0], 
+                'shard_id': row[1],
+                'current_state': row[2],
+                'explanation': row[3],
+                'node_id': row[4]
+            })
+        
+        return allocations
+    
+    def get_recovery_details(self, schema_name: str, table_name: str, shard_id: int) -> Optional[Dict[str, Any]]:
+        """Get detailed recovery information for a specific shard from sys.shards"""
+        
+        # Query for shards that are actively recovering (not completed)
+        query = """
+        SELECT 
+            s.table_name,
+            s.schema_name,
+            s.id as shard_id,
+            s.node['name'] as node_name,
+            s.node['id'] as node_id,
+            s.routing_state,
+            s.state,
+            s.recovery,
+            s.size,
+            s."primary",
+            s.translog_stats['size'] as translog_size
+        FROM sys.shards s
+        WHERE s.table_name = ? AND s.id = ?
+        AND (s.state = 'RECOVERING' OR s.routing_state IN ('INITIALIZING', 'RELOCATING'))
+        ORDER BY s.schema_name
+        LIMIT 1
+        """
+        
+        result = self.execute_query(query, [table_name, shard_id])
+        
+        if not result.get('rows'):
+            return None
+            
+        row = result['rows'][0]
+        return {
+            'table_name': row[0],
+            'schema_name': row[1],
+            'shard_id': row[2],
+            'node_name': row[3],
+            'node_id': row[4],
+            'routing_state': row[5],
+            'state': row[6],
+            'recovery': row[7],
+            'size': row[8],
+            'primary': row[9],
+            'translog_size': row[10] or 0
+        }
+    
+    def get_all_recovering_shards(self, table_name: Optional[str] = None, 
+                                node_name: Optional[str] = None,
+                                include_transitioning: bool = False) -> List[RecoveryInfo]:
+        """Get comprehensive recovery information by combining sys.allocations and sys.shards data"""
+        
+        # Step 1: Get active recoveries from allocations (efficient)
+        active_allocations = self.get_active_recoveries(table_name, node_name)
+        
+        if not active_allocations:
+            return []
+        
+        recoveries = []
+        
+        # Step 2: Get detailed recovery info for each active recovery
+        for allocation in active_allocations:
+            recovery_detail = self.get_recovery_details(
+                allocation['schema_name'],  # This will be 'doc' default
+                allocation['table_name'], 
+                allocation['shard_id']
+            )
+            
+            if recovery_detail and recovery_detail.get('recovery'):
+                # Update allocation with actual schema from sys.shards
+                allocation['schema_name'] = recovery_detail['schema_name']
+                recovery_info = self._parse_recovery_info(allocation, recovery_detail)
+                
+                # Filter out completed recoveries unless include_transitioning is True
+                if include_transitioning or not self._is_recovery_completed(recovery_info):
+                    recoveries.append(recovery_info)
+        
+        # Sort by recovery type, then by progress
+        return sorted(recoveries, key=lambda r: (r.recovery_type, -r.overall_progress))
+    
+    def _parse_recovery_info(self, allocation: Dict[str, Any], 
+                           shard_detail: Dict[str, Any]) -> RecoveryInfo:
+        """Parse recovery information from allocation and shard data"""
+        
+        recovery = shard_detail.get('recovery', {})
+        
+        # Extract recovery progress information
+        files_info = recovery.get('files', {})
+        size_info = recovery.get('size', {})
+        
+        files_percent = float(files_info.get('percent', 0.0))
+        bytes_percent = float(size_info.get('percent', 0.0))
+        
+        # Calculate actual progress based on recovered vs used
+        files_recovered = files_info.get('recovered', 0)
+        files_used = files_info.get('used', 1)  # Avoid division by zero
+        size_recovered = size_info.get('recovered', 0)
+        size_used = size_info.get('used', 1)  # Avoid division by zero
+        
+        # Use actual progress if different from reported percent
+        actual_files_percent = (files_recovered / files_used * 100.0) if files_used > 0 else files_percent
+        actual_size_percent = (size_recovered / size_used * 100.0) if size_used > 0 else bytes_percent
+        
+        # Use the more conservative (lower) progress value
+        final_files_percent = min(files_percent, actual_files_percent)
+        final_bytes_percent = min(bytes_percent, actual_size_percent)
+        
+        # Get source node for PEER recoveries
+        source_node = None
+        if recovery.get('type') == 'PEER':
+            source_node = self._find_source_node_for_recovery(
+                shard_detail['schema_name'],
+                shard_detail['table_name'], 
+                shard_detail['shard_id'],
+                shard_detail['node_id']
+            )
+
+        return RecoveryInfo(
+            schema_name=shard_detail['schema_name'],
+            table_name=shard_detail['table_name'],
+            shard_id=shard_detail['shard_id'],
+            node_name=shard_detail['node_name'],
+            node_id=shard_detail['node_id'],
+            recovery_type=recovery.get('type', 'UNKNOWN'),
+            stage=recovery.get('stage', 'UNKNOWN'),
+            files_percent=final_files_percent,
+            bytes_percent=final_bytes_percent,
+            total_time_ms=recovery.get('total_time', 0),
+            routing_state=shard_detail['routing_state'],
+            current_state=allocation['current_state'],
+            is_primary=shard_detail['primary'],
+            size_bytes=shard_detail.get('size', 0),
+            source_node_name=source_node,
+            translog_size_bytes=shard_detail.get('translog_size', 0)
+        )
+    
+    def _find_source_node_for_recovery(self, schema_name: str, table_name: str, shard_id: int, target_node_id: str) -> Optional[str]:
+        """Find source node for PEER recovery by looking for primary or other replicas"""
+        try:
+            # First try to find the primary shard of the same table/shard
+            query = """
+            SELECT node['name'] as node_name
+            FROM sys.shards
+            WHERE schema_name = ? AND table_name = ? AND id = ?
+            AND state = 'STARTED' AND node['id'] != ?
+            AND "primary" = true
+            LIMIT 1
+            """
+            
+            result = self.execute_query(query, [schema_name, table_name, shard_id, target_node_id])
+            
+            if result.get('rows'):
+                return result['rows'][0][0]
+            
+            # If no primary found, look for any started replica
+            query_replica = """
+            SELECT node['name'] as node_name
+            FROM sys.shards
+            WHERE schema_name = ? AND table_name = ? AND id = ?
+            AND state = 'STARTED' AND node['id'] != ?
+            LIMIT 1
+            """
+            
+            result = self.execute_query(query_replica, [schema_name, table_name, shard_id, target_node_id])
+            
+            if result.get('rows'):
+                return result['rows'][0][0]
+                
+        except Exception:
+            # If query fails, just return None
+            pass
+            
+        return None
+
+    def _is_recovery_completed(self, recovery_info: RecoveryInfo) -> bool:
+        """Check if a recovery is completed but still transitioning"""
+        return (recovery_info.stage == 'DONE' and 
+                recovery_info.files_percent >= 100.0 and 
+                recovery_info.bytes_percent >= 100.0)
\ No newline at end of file
diff --git a/cratedb_toolkit/cli.py b/cratedb_toolkit/cli.py
index 4e8e17c2..2410d5ec 100644
--- a/cratedb_toolkit/cli.py
+++ b/cratedb_toolkit/cli.py
@@ -3,6 +3,7 @@
 
 from cratedb_toolkit.util.cli import boot_click
 
+from .admin.xmover.cli import main as admin_xmover_cli
 from .adapter.rockset.cli import cli as rockset_cli
 from .cfr.cli import cli as cfr_cli
 from .cluster.cli import cli as cloud_cli
@@ -27,6 +28,7 @@ def cli(ctx: click.Context, verbose: bool, debug: bool):
     return boot_click(ctx, verbose, debug)
 
 
+cli.add_command(admin_xmover_cli, name="xmover")
 cli.add_command(info_cli, name="info")
 cli.add_command(cfr_cli, name="cfr")
 cli.add_command(cloud_cli, name="cluster")
diff --git a/doc/admin/index.md b/doc/admin/index.md
new file mode 100644
index 00000000..d36c00e1
--- /dev/null
+++ b/doc/admin/index.md
@@ -0,0 +1,7 @@
+# Administrative Utilities
+
+```{toctree}
+:maxdepth: 1
+
+xmover/index
+```
diff --git a/doc/admin/xmover/handbook.md b/doc/admin/xmover/handbook.md
new file mode 100644
index 00000000..c103c6f2
--- /dev/null
+++ b/doc/admin/xmover/handbook.md
@@ -0,0 +1,487 @@
+(xmover-handbook)=
+# XMover Handbook
+
+## Installation
+
+Install using uv (recommended) or pip:
+```bash
+uv tool install cratedb-toolkit
+
+# Alternatively use `pip`.
+# pip install --user cratedb-toolkit
+```
+
+Create an `.env` file with your CrateDB connection details:
+```bash
+CRATE_CONNECTION_STRING=https://your-cluster.cratedb.net:4200
+CRATE_USERNAME=your-username
+CRATE_PASSWORD=your-password
+CRATE_SSL_VERIFY=true
+```
+
+## Quick Start
+
+### Test Connection
+```bash
+xmover test-connection
+```
+
+### Analyze Cluster
+```bash
+# Complete cluster analysis
+xmover analyze
+
+# Analyze specific table
+xmover analyze --table my_table
+```
+
+### Find Movement Candidates
+```bash
+# Find shards that can be moved (40-60GB by default)
+xmover find-candidates
+
+# Custom size range
+xmover find-candidates --min-size 20 --max-size 100
+```
+
+### Generate Recommendations
+```bash
+# Dry run (default) - shows what would be recommended
+xmover recommend
+
+# Generate actual SQL commands
+xmover recommend --execute
+
+# Prioritize space over zone balancing
+xmover recommend --prioritize-space
+```
+
+### Zone Analysis
+```bash
+# Check zone balance
+xmover check-balance
+
+# Detailed zone analysis with shard-level details
+xmover zone-analysis --show-shards
+```
+
+### Advanced Troubleshooting
+```bash
+# Validate specific moves before execution
+xmover validate-move SCHEMA.TABLE SHARD_ID FROM_NODE TO_NODE
+
+# Explain CrateDB error messages
+xmover explain-error "your error message here"
+```
+
+## Commands Reference
+
+### `analyze`
+Analyzes current shard distribution across nodes and zones.
+
+**Options:**
+- `--table, -t`: Analyze specific table only
+
+**Example:**
+```bash
+xmover analyze --table events
+```
+
+### `find-candidates`
+Finds shards suitable for movement based on size and health criteria.
+
+**Options:**
+- `--table, -t`: Find candidates in specific table only
+- `--min-size`: Minimum shard size in GB (default: 40)
+- `--max-size`: Maximum shard size in GB (default: 60)
+- `--node`: Only show candidates from this specific source node (e.g., data-hot-4)
+
+**Examples:**
+```bash
+# Find candidates in size range for specific table
+xmover find-candidates --min-size 20 --max-size 50 --table logs
+
+# Find candidates on a specific node
+xmover find-candidates --min-size 30 --max-size 60 --node data-hot-4
+```
+
+### `recommend`
+Generates intelligent shard movement recommendations for cluster rebalancing.
+
+**Options:**
+- `--table, -t`: Generate recommendations for specific table only
+- `--min-size`: Minimum shard size in GB (default: 40)
+- `--max-size`: Maximum shard size in GB (default: 60)
+- `--zone-tolerance`: Zone balance tolerance percentage (default: 10)
+- `--min-free-space`: Minimum free space required on target nodes in GB (default: 100)
+- `--max-moves`: Maximum number of move recommendations (default: 10)
+- `--max-disk-usage`: Maximum disk usage percentage for target nodes (default: 85)
+- `--validate/--no-validate`: Validate move safety (default: True)
+- `--prioritize-space/--prioritize-zones`: Prioritize available space over zone balancing (default: False)
+- `--dry-run/--execute`: Show what would be done without generating SQL commands (default: True)
+- `--node`: Only recommend moves from this specific source node (e.g., data-hot-4)
+
+**Examples:**
+```bash
+# Dry run with zone balancing priority
+xmover recommend --prioritize-zones
+
+# Generate SQL for space optimization
+xmover recommend --prioritize-space --execute
+
+# Focus on specific table with custom parameters
+xmover recommend --table events --min-size 10 --max-size 30 --execute
+
+# Target space relief for a specific node
+xmover recommend --prioritize-space --min-size 30 --max-size 60 --node data-hot-4
+
+# Allow higher disk usage for urgent moves
+xmover recommend --prioritize-space --max-disk-usage 90
+```
+
+### `zone-analysis`
+Provides detailed analysis of zone distribution and potential conflicts.
+
+**Options:**
+- `--table, -t`: Analyze zones for specific table only
+- `--show-shards/--no-show-shards`: Show individual shard details (default: False)
+
+**Example:**
+```bash
+xmover zone-analysis --show-shards --table critical_data
+```
+
+### `check-balance`
+Checks zone balance for shards with configurable tolerance.
+
+**Options:**
+- `--table, -t`: Check balance for specific table only
+- `--tolerance`: Zone balance tolerance percentage (default: 10)
+
+**Example:**
+```bash
+xmover check-balance --tolerance 15
+```
+
+
+
+### `validate-move`
+Validates a specific shard move before execution to prevent errors.
+
+**Arguments:**
+- `SCHEMA_TABLE`: Schema and table name (format: schema.table)
+- `SHARD_ID`: Shard ID to move
+- `FROM_NODE`: Source node name
+- `TO_NODE`: Target node name
+
+**Examples:**
+```bash
+# Standard validation
+xmover validate-move CUROV.maddoxxxS 4 data-hot-1 data-hot-3
+
+# Allow higher disk usage for urgent moves
+xmover validate-move CUROV.tendedero 4 data-hot-1 data-hot-3 --max-disk-usage 90
+```
+
+### `explain-error`
+Explains CrateDB allocation error messages and provides troubleshooting guidance.
+
+**Arguments:**
+- `ERROR_MESSAGE`: The CrateDB error message to analyze (optional - can be provided interactively)
+
+**Examples:**
+```bash
+# Interactive mode
+xmover explain-error
+
+# Direct analysis
+xmover explain-error "NO(a copy of this shard is already allocated to this node)"
+```
+
+### `monitor-recovery`
+Monitors active shard recovery operations on the cluster.
+
+**Options:**
+- `--table, -t`: Monitor recovery for specific table only
+- `--node, -n`: Monitor recovery on specific node only
+- `--watch, -w`: Continuously monitor (refresh every 10s)
+- `--refresh-interval`: Refresh interval for watch mode in seconds (default: 10)
+- `--recovery-type`: Filter by recovery type - PEER, DISK, or all (default: all)
+- `--include-transitioning`: Include recently completed recoveries (DONE stage)
+
+**Examples:**
+```bash
+# Check current recovery status
+xmover monitor-recovery
+
+# Monitor specific table recoveries
+xmover monitor-recovery --table PartioffD
+
+# Continuous monitoring with custom refresh rate
+xmover monitor-recovery --watch --refresh-interval 5
+
+# Monitor only PEER recoveries on specific node
+xmover monitor-recovery --node data-hot-1 --recovery-type PEER
+
+# Include completed recoveries still transitioning
+xmover monitor-recovery --watch --include-transitioning
+```
+
+**Recovery Types:**
+- **PEER**: Copying shard data from another node (replication/relocation)
+- **DISK**: Rebuilding shard from local data (after restart/disk issues)
+
+### `test-connection`
+Tests the connection to CrateDB and displays basic cluster information.
+
+## Operation Modes
+
+### Analysis vs Operational Views
+
+XMover provides two distinct views of your cluster:
+
+1. **Analysis View** (`analyze`, `zone-analysis`): Includes ALL shards regardless of state for complete cluster visibility
+2. **Operational View** (`find-candidates`, `recommend`): Only includes healthy shards (STARTED + 100% recovered) for safe operations
+
+### Prioritization Modes
+
+When generating recommendations, you can choose between two prioritization strategies:
+
+1. **Zone Balancing Priority** (default): Focuses on achieving optimal zone distribution first, then considers available space
+2. **Space Priority**: Prioritizes moving shards to nodes with more available space, regardless of zone balance
+
+### Safety Features
+
+- **Zone Conflict Detection**: Prevents moves that would place multiple copies of the same shard in the same zone
+- **Capacity Validation**: Ensures target nodes have sufficient free space
+- **Health Checks**: Only operates on healthy shards (STARTED routing state + 100% recovery)
+- **SQL Quoting**: Properly quotes schema and table names in generated SQL commands
+
+## Example Workflows
+
+### Regular Cluster Maintenance
+
+1. Analyze current state:
+```bash
+xmover analyze
+```
+
+2. Check for zone imbalances:
+```bash
+xmover check-balance
+```
+
+3. Generate and review recommendations:
+```bash
+xmover recommend --dry-run
+```
+
+4. Execute safe moves:
+```bash
+xmover recommend --execute
+```
+
+### Targeted Node Relief
+
+When a specific node is running low on space:
+
+1. Check which node needs relief:
+```bash
+xmover analyze
+```
+
+2. Generate recommendations for that specific node:
+```bash
+xmover recommend --prioritize-space --node data-hot-4 --dry-run
+```
+
+3. Execute the moves:
+```bash
+xmover recommend --prioritize-space --node data-hot-4 --execute
+```
+
+### Monitoring Shard Recovery Operations
+
+After executing shard moves, monitor the recovery progress:
+
+1. Execute moves and monitor recovery:
+```bash
+# Execute moves
+xmover recommend --node data-hot-1 --execute
+
+# Monitor the resulting recoveries
+xmover monitor-recovery --watch
+```
+
+2. Monitor specific table or node recovery:
+```bash
+# Monitor specific table
+xmover monitor-recovery --table shipmentFormFieldData --watch
+
+# Monitor specific node
+xmover monitor-recovery --node data-hot-4 --watch
+
+# Monitor including completed recoveries
+xmover monitor-recovery --watch --include-transitioning
+```
+
+3. Check recovery after node maintenance:
+```bash
+# After bringing a node back online
+xmover monitor-recovery --node data-hot-3 --recovery-type DISK
+```
+
+### Manual Shard Movement
+
+1. Validate the move first:
+```bash
+xmover validate-move SCHEMA.TABLE SHARD_ID FROM_NODE TO_NODE
+```
+
+2. Generate safe recommendations:
+```bash
+xmover recommend --prioritize-space --execute
+```
+
+3. Monitor shard health after moves
+
+### Troubleshooting Zone Conflicts
+
+1. Identify conflicts:
+```bash
+xmover zone-analysis --show-shards
+```
+
+2. Generate targeted fixes:
+```bash
+xmover recommend --prioritize-zones --execute
+```
+
+## Configuration
+
+### Environment Variables
+
+- `CRATE_CONNECTION_STRING`: CrateDB HTTP endpoint (required)
+- `CRATE_USERNAME`: Username for authentication (optional)
+- `CRATE_PASSWORD`: Password for authentication (optional)
+- `CRATE_SSL_VERIFY`: Enable SSL certificate verification (default: true)
+
+### Connection String Format
+
+```
+https://hostname:port
+```
+
+The tool automatically appends `/_sql` to the endpoint.
+
+## Safety Considerations
+
+⚠️ **Important Safety Notes:**
+
+1. **Always test in non-production environments first**
+2. **Monitor shard health after each move before proceeding with additional moves**
+3. **Ensure adequate cluster capacity before decommissioning nodes**
+4. **Verify zone distribution after rebalancing operations**
+5. **Keep backups current before performing large-scale moves**
+
+## Troubleshooting
+
+XMover provides comprehensive troubleshooting tools to help diagnose and resolve shard movement issues.
+
+### Quick Diagnosis Commands
+
+```bash
+# Validate a specific move before execution
+xmover validate-move SCHEMA.TABLE SHARD_ID FROM_NODE TO_NODE
+
+# Explain CrateDB error messages
+xmover explain-error "your error message here"
+
+# Check zone distribution for conflicts
+xmover zone-analysis --show-shards
+
+# Verify overall cluster health
+xmover analyze
+```
+
+### Common Issues and Solutions
+
+1. **Zone Conflicts**
+   ```
+   Error: "NO(a copy of this shard is already allocated to this node)"
+   ```
+   - **Cause**: Target node already has a copy of the shard
+   - **Solution**: Use `xmover zone-analysis --show-shards` to find alternative targets
+   - **Prevention**: Always use `xmover validate-move` before executing moves
+
+2. **Zone Allocation Limits**
+   ```
+   Error: "too many copies of the shard allocated to nodes with attribute [zone]"
+   ```
+   - **Cause**: CrateDB's zone awareness prevents too many copies in same zone
+   - **Solution**: Move shard to a different availability zone
+   - **Prevention**: Use `xmover recommend` which respects zone constraints
+
+3. **Insufficient Space**
+   ```
+   Error: "not enough disk space"
+   ```
+   - **Cause**: Target node lacks sufficient free space
+   - **Solution**: Choose node with more capacity or free up space
+   - **Check**: `xmover analyze` to see available space per node
+
+4. **High Disk Usage Blocking Moves**
+   ```
+   Error: "Target node disk usage too high (85.3%)"
+   ```
+   - **Cause**: Target node exceeds default 85% disk usage threshold
+   - **Solution**: Use `--max-disk-usage` to allow higher usage for urgent moves
+   - **Example**: `xmover recommend --max-disk-usage 90 --prioritize-space`
+
+5. **No Recommendations Generated**
+   - **Cause**: Cluster may already be well balanced
+   - **Solution**: Adjust size filters or check `xmover check-balance`
+   - **Try**: `--prioritize-space` mode for capacity-based moves
+
+### Error Message Decoder
+
+Use the built-in error decoder for complex CrateDB messages:
+
+```bash
+# Interactive mode - paste your error message
+xmover explain-error
+
+# Direct analysis
+xmover explain-error "NO(a copy of this shard is already allocated to this node)"
+```
+
+### Configurable Safety Thresholds
+
+XMover uses configurable safety thresholds to prevent risky moves:
+
+**Disk Usage Threshold (default: 85%)**
+```bash
+# Allow moves to nodes with higher disk usage
+xmover recommend --max-disk-usage 90 --prioritize-space
+
+# For urgent space relief
+xmover validate-move SCHEMA.TABLE SHARD_ID FROM TO --max-disk-usage 95
+```
+
+**When to Adjust Thresholds:**
+- **Emergency situations**: Increase to 90-95% for critical space relief
+- **Conservative operations**: Decrease to 75-80% for safer moves
+- **Staging environments**: Can be more aggressive (90%+)
+- **Production**: Keep conservative (80-85%)
+
+### Advanced Troubleshooting
+
+For detailed troubleshooting procedures, see {ref}`xmover-troubleshooting` which covers:
+- Step-by-step diagnostic procedures
+- Emergency recovery procedures
+- Best practices for safe operations
+- Complete error reference guide
+
+### Debug Information
+
+All commands provide detailed safety validation messages and explanations for any issues detected.
diff --git a/doc/admin/xmover/index.md b/doc/admin/xmover/index.md
new file mode 100644
index 00000000..7b522310
--- /dev/null
+++ b/doc/admin/xmover/index.md
@@ -0,0 +1,29 @@
+# XMover
+
+:::{div} sd-text-muted
+CrateDB Shard Analyzer and Movement Tool.
+:::
+
+A comprehensive looking-glass utility for analyzing CrateDB shard
+distribution across nodes and availability zones. It generates safe
+SQL commands for shard rebalancing and node decommissioning.
+
+## Features
+
+- **Cluster Analysis**: Complete overview of shard distribution across nodes and zones
+- **Shard Movement Recommendations**: Intelligent suggestions for rebalancing with safety validation
+- **Recovery Monitoring**: Track ongoing shard recovery operations with progress details
+- **Zone Conflict Detection**: Prevents moves that would violate CrateDB's zone awareness
+- **Node Decommissioning**: Plan safe node removal with automated shard relocation
+- **Dry Run Mode**: Test recommendations without generating actual SQL commands
+- **Safety Validation**: Comprehensive checks to ensure data availability during moves
+
+## Documentation
+
+```{toctree}
+:maxdepth: 1
+
+Handbook <handbook>
+Troubleshooting <troubleshooting>
+Query gallery <queries>
+```
diff --git a/doc/admin/xmover/queries.md b/doc/admin/xmover/queries.md
new file mode 100644
index 00000000..4600038c
--- /dev/null
+++ b/doc/admin/xmover/queries.md
@@ -0,0 +1,212 @@
+(xmover-queries)=
+# XMover Query Gallery
+
+## Shard Distribution over Nodes
+
+```sql
+select node['name'], sum(size) / 1024^3, count(id)  from sys.shards  group by 1  order by 1 asc;
++--------------+-----------------------------+-----------+
+| node['name'] | (sum(size) / 1.073741824E9) | count(id) |
++--------------+-----------------------------+-----------+
+| data-hot-0   |          1862.5866614403203 |       680 |
+| data-hot-1   |          1866.0331328986213 |       684 |
+| data-hot-2   |          1856.6581886671484 |      1043 |
+| data-hot-3   |          1208.932889252901  |       477 |
+| data-hot-4   |          1861.7727940855548 |       674 |
+| data-hot-5   |          1863.4315695902333 |       744 |
+| data-hot-6   |          1851.3522544233128 |       948 |
+| NULL         |             0.0             |        35 |
++--------------+-----------------------------+-----------+
+SELECT 8 rows in set (0.061 sec)
+```
+## Shard Distribution PRIMARY/REPLICAS over nodes
+
+```sql
+
+select node['name'], primary,  sum(size) / 1024^3, count(id)  from sys.shards  group by 1,2  order by 1 asc;
++--------------+---------+-----------------------------+-----------+
+| node['name'] | primary | (sum(size) / 1.073741824E9) | count(id) |
++--------------+---------+-----------------------------+-----------+
+| data-hot-0   | TRUE    |       1459.3267894154415    |       447 |
+| data-hot-0   | FALSE   |        403.25987202487886   |       233 |
+| data-hot-1   | TRUE    |       1209.6781993638724    |       374 |
+| data-hot-1   | FALSE   |        656.3549335347489    |       310 |
+| data-hot-2   | TRUE    |       1624.9012612393126    |       995 |
+| data-hot-2   | FALSE   |        231.5014410642907    |        48 |
+| data-hot-3   | TRUE    |          6.339549297466874  |        58 |
+| data-hot-3   | FALSE   |       1202.486775631085     |       419 |
+| data-hot-4   | FALSE   |        838.5498185381293    |       225 |
+| data-hot-4   | TRUE    |       1023.1511942362413    |       449 |
+| data-hot-5   | FALSE   |       1002.365406149067     |       422 |
+| data-hot-5   | TRUE    |        860.9174101138487    |       322 |
+| data-hot-6   | FALSE   |       1850.3959310995415    |       940 |
+| data-hot-6   | TRUE    |          0.9159421799704432 |         8 |
+| NULL         | FALSE   |          0.0                |        35 |
++--------------+---------+-----------------------------+-----------+
+
+```
+
+## Nodes available Space
+
+```sql
++------------+--------------------+-----------------------------------------------+
+| name       | attributes['zone'] | (fs[1]['disks']['available'] / 1.073741824E9) |
++------------+--------------------+-----------------------------------------------+
+| data-hot-5 | us-west-2a         |                            142.3342628479004  |
+| data-hot-0 | us-west-2a         |                            142.03089141845703 |
+| data-hot-6 | us-west-2b         |                            159.68728256225586 |
+| data-hot-3 | us-west-2b         |                            798.8147850036621  |
+| data-hot-2 | us-west-2b         |                            156.79160690307617 |
+| data-hot-1 | us-west-2c         |                            145.73613739013672 |
+| data-hot-4 | us-west-2c         |                            148.39511108398438 |
++------------+--------------------+-----------------------------------------------+
+```
+
+## List biggest SHARDS on a particular Nodes
+
+```sql
+select node['name'], table_name, schema_name, id,  sum(size) / 1024^3 from sys.shards
+    where node['name'] = 'data-hot-2'
+    AND routing_state = 'STARTED'
+    AND recovery['files']['percent'] = 0
+    group by 1,2,3,4  order by 5  desc limit 8;
++--------------+-----------------------+-------------+----+-----------------------------+
+| node['name'] | table_name            | schema_name | id | (sum(size) / 1.073741824E9) |
++--------------+-----------------------+-------------+----+-----------------------------+
+| data-hot-2   | bottleFieldData    | curvo          |  5 |         135.568662205711    |
+| data-hot-2   | bottleFieldData    | curvo          |  8 |         134.813782049343    |
+| data-hot-2   | bottleFieldData    | curvo          |  3 |         133.43549298401922  |
+| data-hot-2   | bottleFieldData    | curvo          | 11 |         130.10448653809726  |
+| data-hot-2   | turtleFieldData    | curvo          | 31 |          54.642812703736126 |
+| data-hot-2   | turtleFieldData    | curvo          | 29 |          54.06101848650724  |
+| data-hot-2   | turtleFieldData    | curvo          |  5 |          53.96749582327902  |
+| data-hot-2   | turtleFieldData    | curvo          | 21 |          53.72262619435787  |
++--------------+-----------------------+-------------+----+-----------------------------+
+SELECT 8 rows in set (0.062 sec)
+```
+
+## Move REROUTE
+```sql
+
+alter table "curvo"."bottlefieldData" reroute move shard 21 from 'data-hot-2' to 'data-hot-3';
+```
+---
+
+```sql
+
+WITH shard_summary AS (
+    SELECT
+        node['name'] AS node_name,
+        table_name,
+        schema_name,
+        CASE
+            WHEN "primary" = true THEN 'PRIMARY'
+            ELSE 'REPLICA'
+        END AS shard_type,
+        COUNT(*) AS shard_count,
+        SUM(size) / 1024^3 AS total_size_gb
+    FROM sys.shards
+    WHERE table_name = 'orderffD'
+        AND routing_state = 'STARTED'
+        AND recovery['files']['percent'] = 0
+    GROUP BY node['name'], table_name, schema_name, "primary"
+)
+SELECT
+    node_name,
+    table_name,
+    schema_name,
+    shard_type,
+    shard_count,
+    ROUND(total_size_gb, 2) AS total_size_gb,
+    ROUND(total_size_gb / shard_count, 2) AS avg_shard_size_gb
+FROM shard_summary
+ORDER BY node_name, shard_type DESC, total_size_gb DESC;
+```
+
+```sql
+-- Comprehensive shard distribution showing both node and zone details
+SELECT
+    n.attributes['zone'] AS zone,
+    s.node['name'] AS node_name,
+    s.table_name,
+    s.schema_name,
+    CASE
+        WHEN s."primary" = true THEN 'PRIMARY'
+        ELSE 'REPLICA'
+    END AS shard_type,
+    s.id AS shard_id,
+    s.size / 1024^3 AS shard_size_gb,
+    s.num_docs,
+    s.state
+FROM sys.shards s
+JOIN sys.nodes n ON s.node['id'] = n.id
+WHERE s.table_name = 'your_table_name'  -- Replace with your specific table name
+    AND s.routing_state = 'STARTED'
+    AND s.recovery['files']['percent'] = 0
+ORDER BY
+    n.attributes['zone'],
+    s.node['name'],
+    s."primary" DESC,  -- Primary shards first
+    s.id;
+
+-- Summary by zone and shard type
+SELECT
+    n.attributes['zone'] AS zone,
+    CASE
+        WHEN s."primary" = true THEN 'PRIMARY'
+        ELSE 'REPLICA'
+    END AS shard_type,
+    COUNT(*) AS shard_count,
+    COUNT(DISTINCT s.node['name']) AS nodes_with_shards,
+    ROUND(SUM(s.size) / 1024^3, 2) AS total_size_gb,
+    ROUND(AVG(s.size) / 1024^3, 3) AS avg_shard_size_gb,
+    SUM(s.num_docs) AS total_documents
+FROM sys.shards s
+JOIN sys.nodes n ON s.node['id'] = n.id
+WHERE s.table_name = 'orderffD'  -- Replace with your specific table name
+    AND s.routing_state = 'STARTED'
+    AND s.recovery['files']['percent'] = 0
+GROUP BY n.attributes['zone'], s."primary"
+ORDER BY zone, shard_type DESC;
+
+```
+
+## Relocation
+
+```sql
+SELECT
+        table_name,
+        shard_id,
+        current_state,
+        explanation,
+        node_id
+    FROM sys.allocations
+    WHERE current_state != 'STARTED' and table_name = 'dispatchio'            and shard_id = 19
+    ORDER BY current_state, table_name, shard_id;
+
++-----------------------+----------+---------------+-------------+------------------------+
+| table_name            | shard_id | current_state | explanation | node_id                |
++-----------------------+----------+---------------+-------------+------------------------+
+| dispatchio            |       19 | RELOCATING    |        NULL | ZH6fBanGSjanGqeSh-sw0A |
++-----------------------+----------+---------------+-------------+------------------------+
+```
+
+```sql
+SELECT
+        COUNT(*) as recovering_shards
+    FROM sys.shards
+    WHERE state = 'RECOVERING' OR routing_state IN ('INITIALIZING', 'RELOCATING');
+
+```
+
+```sql
+SELECT
+        table_name,
+        shard_id,
+        current_state,
+        explanation,
+        node_id
+    FROM sys.allocations
+    WHERE current_state != 'STARTED' and table_name = 'dispatchio' and shard_id = 19
+    ORDER BY current_state, table_name, shard_id;
+```
diff --git a/doc/admin/xmover/troubleshooting.md b/doc/admin/xmover/troubleshooting.md
new file mode 100644
index 00000000..14567586
--- /dev/null
+++ b/doc/admin/xmover/troubleshooting.md
@@ -0,0 +1,424 @@
+(xmover-troubleshooting)=
+# Troubleshooting CrateDB using XMover
+
+This guide helps you diagnose and resolve common issues when using XMover for CrateDB shard management.
+
+## Quick Diagnosis Commands
+
+Before troubleshooting, run these commands to understand your cluster state:
+
+```bash
+# Check overall cluster health
+xmover analyze
+
+# Check zone distribution for conflicts
+xmover zone-analysis --show-shards
+
+# Validate a specific move before execution
+xmover validate-move SCHEMA.TABLE SHARD_ID FROM_NODE TO_NODE
+
+# Explain CrateDB error messages
+xmover explain-error "your error message here"
+```
+
+## Common Issues and Solutions
+
+### 1. Zone Conflicts
+
+#### Symptoms
+- Error: `NO(a copy of this shard is already allocated to this node)`
+- Error: `NO(there are too many copies of the shard allocated to nodes with attribute [zone])`
+- Recommendations show zone conflicts in safety validation
+
+#### Root Causes
+- Target node already has a copy of the shard (primary or replica)
+- Target zone already has copies, violating CrateDB's zone awareness
+- Incorrect understanding of current shard distribution
+
+#### Solutions
+
+**Step 1: Analyze Current Distribution**
+```bash
+# See exactly where shard copies are located
+xmover zone-analysis --show-shards --table YOUR_TABLE
+
+# Check overall zone balance
+xmover check-balance
+```
+
+**Step 2: Find Alternative Targets**
+```bash
+# Find nodes with available capacity in different zones
+xmover analyze
+
+# Get movement candidates with size filters
+xmover find-candidates --min-size 20 --max-size 30
+```
+
+**Step 3: Validate Before Moving**
+```bash
+# Always validate moves before execution
+xmover validate-move SCHEMA.TABLE SHARD_ID FROM_NODE TO_NODE
+```
+
+#### Prevention
+- Always use `xmover recommend` instead of manual moves
+- Enable dry-run mode by default: `xmover recommend --dry-run`
+- Check zone distribution before planning moves
+
+### 2. Insufficient Space Issues
+
+#### Symptoms
+- Error: `not enough disk space`
+- Safety validation fails with space warnings
+- High disk usage percentages in cluster analysis
+
+#### Root Causes
+- Target node doesn't have enough free space for the shard
+- High disk usage on target nodes (>85%)
+- Insufficient buffer space for safe operations
+
+#### Solutions
+
+**Step 1: Check Available Space**
+```bash
+# Review node capacity and usage
+xmover analyze
+
+# Look for nodes with more available space
+xmover find-candidates --min-size 0 --max-size 100
+```
+
+**Step 2: Adjust Parameters**
+```bash
+# Increase minimum free space requirement
+xmover recommend --min-free-space 200
+
+# Focus on smaller shards
+xmover recommend --max-size 50
+```
+
+**Step 3: Free Up Space**
+- Delete old snapshots and unused data
+- Move other shards away from constrained nodes
+- Consider adding nodes to the cluster
+
+#### Prevention
+- Monitor disk usage regularly with `xmover analyze`
+- Set conservative `--min-free-space` values (default: 100GB)
+- Plan capacity expansion before reaching 80% disk usage
+
+### 3. Node Performance Issues
+
+#### Symptoms
+- Error: `shard recovery limit`
+- High heap usage warnings
+- Slow shard movement operations
+
+#### Root Causes
+- Too many concurrent shard movements
+- High heap usage on target nodes (>80%)
+- Resource contention during moves
+
+#### Solutions
+
+**Step 1: Check Node Health**
+```bash
+# Review heap and disk usage
+xmover analyze
+
+# Check for overloaded nodes
+xmover check-balance
+```
+
+**Step 2: Reduce Concurrent Operations**
+```bash
+# Move fewer shards at once
+xmover recommend --max-moves 3
+
+# Wait between moves for recovery completion
+# Monitor with CrateDB Admin UI
+```
+
+**Step 3: Target Less Loaded Nodes**
+```bash
+# Prioritize nodes with better resources
+xmover recommend --prioritize-space
+```
+
+#### Prevention
+- Move shards gradually (5-10 at a time)
+- Monitor heap usage and wait for recovery completion
+- Avoid moves during high-traffic periods
+
+### 4. Zone Imbalance Issues
+
+#### Symptoms
+- `check-balance` shows zones marked as "Over" or "Under"
+- Zone distribution is uneven
+- Some zones have significantly more shards
+
+#### Root Causes
+- Historical data distribution patterns
+- Node additions/removals without rebalancing
+- Tables created with poor initial distribution
+
+#### Solutions
+
+**Step 1: Assess Imbalance**
+```bash
+# Check current zone balance
+xmover check-balance --tolerance 15
+
+# Get detailed zone analysis
+xmover zone-analysis
+```
+
+**Step 2: Generate Rebalancing Plan**
+```bash
+# Prioritize zone balancing
+xmover recommend --prioritize-zones --dry-run
+
+# Review recommendations carefully
+xmover recommend --prioritize-zones --max-moves 10
+```
+
+**Step 3: Execute Gradually**
+```bash
+# Execute in small batches
+xmover recommend --prioritize-zones --max-moves 5 --execute
+
+# Monitor progress and repeat
+```
+
+#### Prevention
+- Run regular balance checks: `xmover check-balance`
+- Use zone-aware table creation with proper shard allocation
+- Plan rebalancing during maintenance windows
+
+### 5. Connection and Authentication Issues
+
+#### Symptoms
+- "Connection failed" errors
+- Authentication failures
+- SSL/TLS errors
+
+#### Root Causes
+- Incorrect connection string in `.env`
+- Wrong credentials
+- Network connectivity issues
+- SSL certificate problems
+
+#### Solutions
+
+**Step 1: Verify Connection**
+```bash
+# Test basic connectivity
+xmover test-connection
+```
+
+**Step 2: Check Configuration**
+```bash
+# Verify .env file contents
+cat .env
+
+# Example correct format:
+CRATE_CONNECTION_STRING=https://cluster.cratedb.net:4200
+CRATE_USERNAME=admin
+CRATE_PASSWORD=your-password
+CRATE_SSL_VERIFY=true
+```
+
+**Step 3: Test Network Access**
+```bash
+# Test HTTP connectivity
+curl -u username:password https://your-cluster:4200/_sql -d '{"stmt":"SELECT 1"}'
+```
+
+#### Prevention
+- Use `.env.example` as a template
+- Verify credentials with CrateDB admin
+- Test connectivity from deployment environment
+
+## Error Message Decoder
+
+### CrateDB Allocation Errors
+
+Use `xmover explain-error` to decode complex CrateDB error messages:
+
+```bash
+# Interactive mode
+xmover explain-error
+
+# Direct analysis
+xmover explain-error "your error message here"
+```
+
+### Common Error Patterns
+
+| Error Pattern | Meaning | Quick Fix |
+|---------------|---------|-----------|
+| `copy of this shard is already allocated` | Node already has shard | Choose different target node |
+| `too many copies...with attribute [zone]` | Zone limit exceeded | Move to different zone |
+| `not enough disk space` | Insufficient space | Free space or choose different node |
+| `shard recovery limit` | Too many concurrent moves | Wait and retry with fewer moves |
+| `allocation is disabled` | Cluster allocation disabled | Re-enable allocation settings |
+
+## Best Practices for Safe Operations
+
+### Pre-Move Checklist
+
+1. **Analyze cluster state**
+   ```bash
+   xmover analyze
+   ```
+
+2. **Check zone distribution**
+   ```bash
+   xmover zone-analysis
+   ```
+
+3. **Generate recommendations**
+   ```bash
+   xmover recommend --dry-run
+   ```
+
+4. **Validate specific moves**
+   ```bash
+   xmover validate-move SCHEMA.TABLE SHARD_ID FROM TO
+   ```
+
+5. **Execute gradually**
+   ```bash
+   xmover recommend --max-moves 5 --execute
+   ```
+
+### During Operations
+
+1. **Monitor shard health**
+   - Check CrateDB Admin UI for recovery progress
+   - Watch for failed or stuck shards
+   - Verify routing state changes to STARTED
+
+2. **Track resource usage**
+   - Monitor disk and heap usage on target nodes
+   - Watch for network saturation during moves
+   - Check cluster performance metrics
+
+3. **Maintain documentation**
+   - Record moves performed and reasons
+   - Note any issues encountered
+   - Document lessons learned
+
+### Post-Move Verification
+
+1. **Verify shard health**
+   ```sql
+   SELECT table_name, id, "primary", node['name'], routing_state 
+   FROM sys.shards 
+   WHERE table_name = 'your_table' AND routing_state != 'STARTED';
+   ```
+
+2. **Check zone balance**
+   ```bash
+   xmover check-balance
+   ```
+
+3. **Monitor cluster performance**
+   - Query response times
+   - Resource utilization
+   - Error rates
+
+## Emergency Procedures
+
+### Stuck Shard Recovery
+
+If a shard gets stuck during movement:
+
+1. **Check shard status**
+   ```sql
+   SELECT * FROM sys.shards WHERE routing_state != 'STARTED';
+   ```
+
+2. **Cancel problematic moves**
+   ```sql
+   ALTER TABLE "schema"."table" REROUTE CANCEL SHARD <shard_id> ON '<node_name>';
+   ```
+
+3. **Retry allocation**
+   ```sql
+   ALTER TABLE "schema"."table" REROUTE RETRY FAILED;
+   ```
+
+### Cluster Health Issues
+
+If moves cause cluster problems:
+
+1. **Disable allocation temporarily**
+   ```text
+   PUT /_cluster/settings
+   {
+     "persistent": {
+       "cluster.routing.allocation.enable": "primaries"
+     }
+   }
+   ```
+
+2. **Wait for stabilization**
+   - Monitor cluster health
+   - Check node resource usage
+   - Verify no failed shards
+
+3. **Re-enable allocation**
+   ```text
+   PUT /_cluster/settings
+   {
+     "persistent": {
+       "cluster.routing.allocation.enable": "all"
+     }
+   }
+   ```
+
+## Getting Help
+
+### Built-in Help
+
+```bash
+# Command help
+xmover --help
+xmover COMMAND --help
+
+# Error explanation
+xmover explain-error
+
+# Move validation
+xmover validate-move SCHEMA.TABLE SHARD_ID FROM TO
+```
+
+### Additional Resources
+
+- **CrateDB Documentation**: https://crate.io/docs/
+- **Shard Allocation Guide**: https://crate.io/docs/crate/reference/en/latest/admin/system-information.html
+- **Cluster Settings**: https://crate.io/docs/crate/reference/en/latest/config/cluster.html
+
+### Reporting Issues
+
+When reporting issues, include:
+
+1. **XMover version and command used**
+2. **Complete error message**
+3. **Cluster information** (`xmover analyze` output)
+4. **Zone analysis** (`xmover zone-analysis` output)
+5. **CrateDB version and configuration**
+
+### Support Checklist
+
+Before contacting support:
+
+- [ ] Tried `xmover validate-move` for the specific operation
+- [ ] Checked zone distribution with `xmover zone-analysis`
+- [ ] Reviewed cluster health with `xmover analyze`
+- [ ] Used `xmover explain-error` to decode error messages
+- [ ] Verified connection and authentication with `xmover test-connection`
+- [ ] Read through this troubleshooting guide
+- [ ] Checked CrateDB documentation for allocation settings
diff --git a/doc/index.md b/doc/index.md
index 56b849ec..17c55514 100644
--- a/doc/index.md
+++ b/doc/index.md
@@ -30,6 +30,7 @@ changes
 :caption: Diagnostics
 :hidden:
 
+admin/index
 Cluster Information <info/index>
 Cluster Flight Recorder (CFR) <cfr/index>
 ```
diff --git a/pyproject.toml b/pyproject.toml
index ca6c86cc..c12d4c32 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -110,6 +110,7 @@ dependencies = [
   "python-slugify<9",
   "pyyaml<7",
   "requests>=2.28,<3",
+  "rich<14",
   "sqlalchemy-cratedb>=0.41.0",
   "sqlparse<0.6",
   "tqdm<5",
@@ -263,6 +264,7 @@ scripts.cratedb-retention = "cratedb_toolkit.retention.cli:cli"
 scripts.cratedb-toolkit = "cratedb_toolkit.cli:cli"
 scripts.ctk = "cratedb_toolkit.cli:cli"
 scripts.migr8 = "cratedb_toolkit.io.mongodb.cli:main"
+scripts.xmover = "cratedb_toolkit.admin.xmover.cli:main"
 entry-points.pytest11.cratedb_service = "cratedb_toolkit.testing.pytest"
 
 [tool.setuptools.packages.find]

From 70a3bf6f7e776887bed0070c6b19419640caf9b2 Mon Sep 17 00:00:00 2001
From: Andreas Motl <andreas.motl@crate.io>
Date: Wed, 20 Aug 2025 00:50:43 +0200
Subject: [PATCH 02/18] Admin/XMover: Code formatting and linting

---
 cratedb_toolkit/admin/xmover/__init__.py |   2 +-
 cratedb_toolkit/admin/xmover/analyzer.py | 524 +++++++++---------
 cratedb_toolkit/admin/xmover/cli.py      | 667 +++++++++++------------
 cratedb_toolkit/admin/xmover/database.py | 495 ++++++++---------
 cratedb_toolkit/cli.py                   |   2 +-
 pyproject.toml                           |   9 +-
 6 files changed, 836 insertions(+), 863 deletions(-)

diff --git a/cratedb_toolkit/admin/xmover/__init__.py b/cratedb_toolkit/admin/xmover/__init__.py
index b941f602..92e9ee84 100644
--- a/cratedb_toolkit/admin/xmover/__init__.py
+++ b/cratedb_toolkit/admin/xmover/__init__.py
@@ -7,4 +7,4 @@
 
 __version__ = "0.1.0"
 __author__ = "CrateDB Tools"
-__description__ = "CrateDB shard analyzer and movement tool"
\ No newline at end of file
+__description__ = "CrateDB shard analyzer and movement tool"
diff --git a/cratedb_toolkit/admin/xmover/analyzer.py b/cratedb_toolkit/admin/xmover/analyzer.py
index 75af9090..36d43618 100644
--- a/cratedb_toolkit/admin/xmover/analyzer.py
+++ b/cratedb_toolkit/admin/xmover/analyzer.py
@@ -2,17 +2,18 @@
 Shard analysis and rebalancing logic for CrateDB
 """
 
-from typing import Dict, List, Optional, Set, Any, Tuple
-from dataclasses import dataclass
-from collections import defaultdict
 import math
+from collections import defaultdict
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional, Set, Tuple
 
-from .database import CrateDBClient, NodeInfo, ShardInfo, RecoveryInfo
+from .database import CrateDBClient, NodeInfo, RecoveryInfo, ShardInfo
 
 
 @dataclass
 class MoveRecommendation:
     """Recommendation for moving a shard"""
+
     table_name: str
     schema_name: str
     shard_id: int
@@ -26,9 +27,11 @@ class MoveRecommendation:
 
     def to_sql(self) -> str:
         """Generate the SQL command for this move"""
-        return (f'ALTER TABLE "{self.schema_name}"."{self.table_name}" '
-                f"REROUTE MOVE SHARD {self.shard_id} "
-                f"FROM '{self.from_node}' TO '{self.to_node}';")
+        return (
+            f'ALTER TABLE "{self.schema_name}"."{self.table_name}" '
+            f"REROUTE MOVE SHARD {self.shard_id} "
+            f"FROM '{self.from_node}' TO '{self.to_node}';"
+        )
 
     @property
     def safety_score(self) -> float:
@@ -50,6 +53,7 @@ def safety_score(self) -> float:
 @dataclass
 class DistributionStats:
     """Statistics about shard distribution"""
+
     total_shards: int
     total_size_gb: float
     zones: Dict[str, int]
@@ -65,14 +69,14 @@ def __init__(self, client: CrateDBClient):
         self.client = client
         self.nodes: List[NodeInfo] = []
         self.shards: List[ShardInfo] = []
-        
+
         # Initialize session-based caches for performance
         self._zone_conflict_cache = {}
         self._node_lookup_cache = {}
         self._target_nodes_cache = {}
         self._cache_hits = 0
         self._cache_misses = 0
-        
+
         self._refresh_data()
 
     def _refresh_data(self):
@@ -112,7 +116,7 @@ def analyze_distribution(self, table_name: Optional[str] = None) -> Distribution
             zones=dict(zone_counts),
             nodes=dict(node_counts),
             zone_balance_score=zone_balance_score,
-            node_balance_score=node_balance_score
+            node_balance_score=node_balance_score,
         )
 
     def _calculate_balance_score(self, counts: List[int]) -> float:
@@ -134,10 +138,9 @@ def _calculate_balance_score(self, counts: List[int]) -> float:
         score = max(0, 100 * math.exp(-cv))
         return round(score, 1)
 
-    def find_moveable_shards(self,
-                           min_size_gb: float = 40.0,
-                           max_size_gb: float = 60.0,
-                           table_name: Optional[str] = None) -> List[ShardInfo]:
+    def find_moveable_shards(
+        self, min_size_gb: float = 40.0, max_size_gb: float = 60.0, table_name: Optional[str] = None
+    ) -> List[ShardInfo]:
         """Find shards that are candidates for moving based on size
 
         Only returns healthy shards that are safe to move.
@@ -148,20 +151,19 @@ def find_moveable_shards(self,
             table_name=table_name,
             min_size_gb=min_size_gb,
             max_size_gb=max_size_gb,
-            for_analysis=False  # Only operational shards
+            for_analysis=False,  # Only operational shards
         )
 
-
         # Create a mapping of node names to available space
         node_space_map = {node.name: node.available_space_gb for node in self.nodes}
 
         # Sort by node available space (ascending, so low space nodes first), then by shard size
-        healthy_shards.sort(key=lambda s: (node_space_map.get(s.node_name, float('inf')), s.size_gb))
+        healthy_shards.sort(key=lambda s: (node_space_map.get(s.node_name, float("inf")), s.size_gb))
         return healthy_shards
 
-    def check_zone_balance(self,
-                          table_name: Optional[str] = None,
-                          tolerance_percent: float = 10.0) -> Dict[str, Dict[str, int]]:
+    def check_zone_balance(
+        self, table_name: Optional[str] = None, tolerance_percent: float = 10.0
+    ) -> Dict[str, Dict[str, int]]:
         """Check if zones are balanced within tolerance"""
         # Filter shards by table if specified
         shards = self.shards
@@ -169,21 +171,23 @@ def check_zone_balance(self,
             shards = [s for s in shards if s.table_name == table_name]
 
         # Count shards by zone and type
-        zone_stats = defaultdict(lambda: {'PRIMARY': 0, 'REPLICA': 0, 'TOTAL': 0})
+        zone_stats = defaultdict(lambda: {"PRIMARY": 0, "REPLICA": 0, "TOTAL": 0})
 
         for shard in shards:
             shard_type = shard.shard_type
             zone_stats[shard.zone][shard_type] += 1
-            zone_stats[shard.zone]['TOTAL'] += 1
+            zone_stats[shard.zone]["TOTAL"] += 1
 
         return dict(zone_stats)
 
-    def find_nodes_with_capacity(self,
-                                required_space_gb: float,
-                                exclude_zones: Optional[Set[str]] = None,
-                                exclude_nodes: Optional[Set[str]] = None,
-                                min_free_space_gb: float = 100.0,
-                                max_disk_usage_percent: float = 85.0) -> List[NodeInfo]:
+    def find_nodes_with_capacity(
+        self,
+        required_space_gb: float,
+        exclude_zones: Optional[Set[str]] = None,
+        exclude_nodes: Optional[Set[str]] = None,
+        min_free_space_gb: float = 100.0,
+        max_disk_usage_percent: float = 85.0,
+    ) -> List[NodeInfo]:
         """Find nodes that have capacity for additional shards
 
         Args:
@@ -219,16 +223,18 @@ def find_nodes_with_capacity(self,
         available_nodes.sort(key=lambda n: n.available_space_gb, reverse=True)
         return available_nodes
 
-    def generate_rebalancing_recommendations(self,
-                                           table_name: Optional[str] = None,
-                                           min_size_gb: float = 40.0,
-                                           max_size_gb: float = 60.0,
-                                           zone_tolerance_percent: float = 10.0,
-                                           min_free_space_gb: float = 100.0,
-                                           max_recommendations: int = 10,
-                                           prioritize_space: bool = False,
-                                           source_node: Optional[str] = None,
-                                           max_disk_usage_percent: float = 90.0) -> List[MoveRecommendation]:
+    def generate_rebalancing_recommendations(
+        self,
+        table_name: Optional[str] = None,
+        min_size_gb: float = 40.0,
+        max_size_gb: float = 60.0,
+        zone_tolerance_percent: float = 10.0,
+        min_free_space_gb: float = 100.0,
+        max_recommendations: int = 10,
+        prioritize_space: bool = False,
+        source_node: Optional[str] = None,
+        max_disk_usage_percent: float = 90.0,
+    ) -> List[MoveRecommendation]:
         """Generate recommendations for rebalancing shards
 
         Args:
@@ -241,7 +247,7 @@ def generate_rebalancing_recommendations(self,
 
         # Get moveable shards (only healthy ones for actual operations)
         moveable_shards = self.find_moveable_shards(min_size_gb, max_size_gb, table_name)
-        
+
         print(f"Analyzing {len(moveable_shards)} candidate shards in size range {min_size_gb}-{max_size_gb}GB...")
 
         if not moveable_shards:
@@ -251,7 +257,7 @@ def generate_rebalancing_recommendations(self,
         zone_stats = self.check_zone_balance(table_name, zone_tolerance_percent)
 
         # Calculate target distribution
-        total_shards = sum(stats['TOTAL'] for stats in zone_stats.values())
+        total_shards = sum(stats["TOTAL"] for stats in zone_stats.values())
         zones = list(zone_stats.keys())
         target_per_zone = total_shards // len(zones) if zones else 0
 
@@ -260,10 +266,10 @@ def generate_rebalancing_recommendations(self,
         underloaded_zones = []
 
         for zone, stats in zone_stats.items():
-            current_count = stats['TOTAL']
+            current_count = stats["TOTAL"]
             threshold_high = target_per_zone * (1 + zone_tolerance_percent / 100)
             threshold_low = target_per_zone * (1 - zone_tolerance_percent / 100)
-            
+
             if current_count > threshold_high:
                 overloaded_zones.append(zone)
             elif current_count < threshold_low:
@@ -277,9 +283,9 @@ def generate_rebalancing_recommendations(self,
             processing_shards = moveable_shards
 
         # Generate move recommendations
-        safe_recommendations = 0
+        safe_recommendations = 0  # noqa: F841
         total_evaluated = 0
-        
+
         for i, shard in enumerate(processing_shards):
             if len(recommendations) >= max_recommendations:
                 break
@@ -287,9 +293,9 @@ def generate_rebalancing_recommendations(self,
             # Show progress every 50 shards when processing many
             if len(processing_shards) > 100 and i > 0 and i % 50 == 0:
                 print(".", end="", flush=True)
-            
+
             total_evaluated += 1
-            
+
             # Skip based on priority mode
             if not prioritize_space:
                 # Zone balancing mode: only move shards from overloaded zones
@@ -302,14 +308,14 @@ def generate_rebalancing_recommendations(self,
                 required_space_gb=shard.size_gb,
                 exclude_nodes={shard.node_name},  # Don't move to same node
                 min_free_space_gb=min_free_space_gb,
-                max_disk_usage_percent=max_disk_usage_percent
+                max_disk_usage_percent=max_disk_usage_percent,
             )
 
             # Quick pre-filter to avoid expensive safety validations
             # Only check nodes in different zones (for zone balancing)
             if not prioritize_space:
                 target_nodes = [node for node in target_nodes if node.zone != shard.zone]
-            
+
             # Limit to top 3 candidates to reduce validation overhead
             target_nodes = target_nodes[:3]
 
@@ -327,9 +333,9 @@ def generate_rebalancing_recommendations(self,
                     to_zone=candidate_node.zone,
                     shard_type=shard.shard_type,
                     size_gb=shard.size_gb,
-                    reason="Safety validation"
+                    reason="Safety validation",
                 )
-                
+
                 # Check if this move would be safe
                 is_safe, safety_msg = self.validate_move_safety(temp_rec, max_disk_usage_percent)
                 if is_safe:
@@ -389,7 +395,7 @@ def generate_rebalancing_recommendations(self,
                 to_zone=target_node.zone,
                 shard_type=shard.shard_type,
                 size_gb=shard.size_gb,
-                reason=reason
+                reason=reason,
             )
 
             recommendations.append(recommendation)
@@ -400,8 +406,9 @@ def generate_rebalancing_recommendations(self,
         print(f"Performance: {self.get_cache_stats()}")
         return recommendations
 
-    def validate_move_safety(self, recommendation: MoveRecommendation, 
-                           max_disk_usage_percent: float = 90.0) -> Tuple[bool, str]:
+    def validate_move_safety(
+        self, recommendation: MoveRecommendation, max_disk_usage_percent: float = 90.0
+    ) -> Tuple[bool, str]:
         """Validate that a move recommendation is safe to execute"""
         # Find target node (with caching)
         target_node = self._get_node_cached(recommendation.to_node)
@@ -417,20 +424,24 @@ def validate_move_safety(self, recommendation: MoveRecommendation,
         # Check available space
         required_space_gb = recommendation.size_gb + 50  # 50GB buffer
         if target_node.available_space_gb < required_space_gb:
-            return False, f"Insufficient space on target node (need {required_space_gb:.1f}GB, have {target_node.available_space_gb:.1f}GB)"
+            return (
+                False,
+                f"Insufficient space on target node (need {required_space_gb:.1f}GB, "
+                f"have {target_node.available_space_gb:.1f}GB)",
+            )
 
         # Check disk usage
         if target_node.disk_usage_percent > max_disk_usage_percent:
             return False, f"Target node disk usage too high ({target_node.disk_usage_percent:.1f}%)"
 
         return True, "Move appears safe"
-    
+
     def _get_node_cached(self, node_name: str):
         """Get node by name with caching"""
         if node_name in self._node_lookup_cache:
             self._cache_hits += 1
             return self._node_lookup_cache[node_name]
-        
+
         # Find node (cache miss)
         self._cache_misses += 1
         target_node = None
@@ -438,64 +449,65 @@ def _get_node_cached(self, node_name: str):
             if node.name == node_name:
                 target_node = node
                 break
-        
+
         self._node_lookup_cache[node_name] = target_node
         return target_node
-    
+
     def _check_zone_conflict_cached(self, recommendation: MoveRecommendation) -> Optional[str]:
         """Check zone conflicts with caching"""
         # Create cache key: table, shard, target zone
         target_zone = self._get_node_zone(recommendation.to_node)
         cache_key = (recommendation.table_name, recommendation.shard_id, target_zone)
-        
+
         if cache_key in self._zone_conflict_cache:
             self._cache_hits += 1
             return self._zone_conflict_cache[cache_key]
-        
+
         # Cache miss - do expensive check
         self._cache_misses += 1
         result = self._check_zone_conflict(recommendation)
         self._zone_conflict_cache[cache_key] = result
         return result
-    
+
     def _get_node_zone(self, node_name: str) -> str:
         """Get zone for a node name"""
         node = self._get_node_cached(node_name)
         return node.zone if node else "unknown"
-    
+
     def get_cache_stats(self) -> str:
         """Get cache performance statistics"""
         total = self._cache_hits + self._cache_misses
         if total == 0:
             return "Cache stats: No operations yet"
-        
+
         hit_rate = (self._cache_hits / total) * 100
         return f"Cache stats: {hit_rate:.1f}% hit rate ({self._cache_hits} hits, {self._cache_misses} misses)"
-    
-    def _find_nodes_with_capacity_cached(self, required_space_gb: float, exclude_nodes: set, 
-                                       min_free_space_gb: float, max_disk_usage_percent: float) -> List[NodeInfo]:
+
+    def _find_nodes_with_capacity_cached(
+        self, required_space_gb: float, exclude_nodes: set, min_free_space_gb: float, max_disk_usage_percent: float
+    ) -> List[NodeInfo]:
         """Find nodes with capacity using caching for repeated queries"""
         # Create cache key based on parameters (rounded to avoid float precision issues)
         cache_key = (
             round(required_space_gb, 1),
             frozenset(exclude_nodes),
             round(min_free_space_gb, 1),
-            round(max_disk_usage_percent, 1)
+            round(max_disk_usage_percent, 1),
         )
-        
+
         if cache_key in self._target_nodes_cache:
             self._cache_hits += 1
             return self._target_nodes_cache[cache_key]
-        
+
         # Cache miss - do expensive calculation
         self._cache_misses += 1
         result = self.find_nodes_with_capacity(
             required_space_gb=required_space_gb,
             exclude_nodes=exclude_nodes,
             min_free_space_gb=min_free_space_gb,
-            max_disk_usage_percent=max_disk_usage_percent
+            max_disk_usage_percent=max_disk_usage_percent,
         )
-        
+
         self._target_nodes_cache[cache_key] = result
         return result
 
@@ -526,14 +538,15 @@ def _check_zone_conflict(self, recommendation: MoveRecommendation) -> Optional[s
             ORDER BY s."primary" DESC, zone, node_name
             """
 
-            result = self.client.execute_query(query, [
-                recommendation.table_name,
-                recommendation.schema_name,
-                recommendation.shard_id
-            ])
+            result = self.client.execute_query(
+                query, [recommendation.table_name, recommendation.schema_name, recommendation.shard_id]
+            )
 
-            if not result.get('rows'):
-                return f"Cannot find shard {recommendation.shard_id} for table {recommendation.schema_name}.{recommendation.table_name}"
+            if not result.get("rows"):
+                return (
+                    f"Cannot find shard {recommendation.shard_id} "
+                    f"for table {recommendation.schema_name}.{recommendation.table_name}"
+                )
 
             # Analyze current distribution
             zones_with_copies = set()
@@ -552,23 +565,23 @@ def _check_zone_conflict(self, recommendation: MoveRecommendation) -> Optional[s
             if not target_node_id:
                 return f"Target node {recommendation.to_node} not found in cluster"
 
-            for row in result['rows']:
+            for row in result["rows"]:
                 node_id, node_name, zone, is_primary, routing_state, state = row
-                zone = zone or 'unknown'
+                zone = zone or "unknown"
                 total_copies += 1
 
                 # Track the shard we're planning to move
                 if node_name == recommendation.from_node:
                     current_location = {
-                        'zone': zone,
-                        'is_primary': is_primary,
-                        'routing_state': routing_state,
-                        'state': state
+                        "zone": zone,
+                        "is_primary": is_primary,
+                        "routing_state": routing_state,
+                        "state": state,
                     }
 
                 # Track all copies for conflict detection
                 nodes_with_copies.add(node_id)
-                if routing_state == 'STARTED' and state == 'STARTED':
+                if routing_state == "STARTED" and state == "STARTED":
                     healthy_copies += 1
                     zones_with_copies.add(zone)
 
@@ -576,23 +589,29 @@ def _check_zone_conflict(self, recommendation: MoveRecommendation) -> Optional[s
             if not current_location:
                 return f"Shard not found on source node {recommendation.from_node}"
 
-            if current_location['routing_state'] != 'STARTED':
+            if current_location["routing_state"] != "STARTED":
                 return f"Source shard is not in STARTED state (current: {current_location['routing_state']})"
 
             # CRITICAL CHECK 1: Target node already has a copy of this shard
             if target_node_id in nodes_with_copies:
-                return f"Node conflict: Target node {recommendation.to_node} already has a copy of shard {recommendation.shard_id}"
+                return (
+                    f"Node conflict: Target node {recommendation.to_node} "
+                    f"already has a copy of shard {recommendation.shard_id}"
+                )
 
             # CRITICAL CHECK 2: Target zone already has a copy (zone allocation limits)
             if recommendation.to_zone in zones_with_copies:
                 return f"Zone conflict: {recommendation.to_zone} already has a copy of shard {recommendation.shard_id}"
 
             # CRITICAL CHECK 3: Ensure we're not creating a single point of failure
-            if len(zones_with_copies) == 1 and current_location['zone'] in zones_with_copies:
+            if len(zones_with_copies) == 1 and current_location["zone"] in zones_with_copies:
                 # This is the only zone with this shard - moving it is good for zone distribution
                 pass
             elif len(zones_with_copies) <= 1 and healthy_copies <= 1:
-                return f"Safety concern: Only {healthy_copies} healthy copy(ies) exist. Moving might risk data availability."
+                return (
+                    f"Safety concern: Only {healthy_copies} healthy copy(ies) exist. "
+                    f"Moving might risk data availability."
+                )
 
             # ADDITIONAL CHECK: Verify zone allocation constraints for this table
             table_zone_query = """
@@ -609,21 +628,22 @@ def _check_zone_conflict(self, recommendation: MoveRecommendation) -> Optional[s
             ORDER BY zone
             """
 
-            zone_result = self.client.execute_query(table_zone_query, [
-                recommendation.table_name,
-                recommendation.schema_name,
-                recommendation.shard_id
-            ])
+            zone_result = self.client.execute_query(
+                table_zone_query, [recommendation.table_name, recommendation.schema_name, recommendation.shard_id]
+            )
 
             current_zone_counts = {}
-            for row in zone_result.get('rows', []):
+            for row in zone_result.get("rows", []):
                 zone_name, count = row
-                current_zone_counts[zone_name or 'unknown'] = count
+                current_zone_counts[zone_name or "unknown"] = count
 
             # Check if adding to target zone would violate balance
             target_zone_count = current_zone_counts.get(recommendation.to_zone, 0)
             if target_zone_count > 0:
-                return f"Zone allocation violation: {recommendation.to_zone} would have {target_zone_count + 1} copies after move"
+                return (
+                    f"Zone allocation violation: {recommendation.to_zone} "
+                    f"would have {target_zone_count + 1} copies after move."
+                )
 
             return None
 
@@ -635,71 +655,70 @@ def get_cluster_overview(self) -> Dict[str, Any]:
         """Get a comprehensive overview of the cluster"""
         # Get cluster watermark settings
         watermarks = self.client.get_cluster_watermarks()
-        
+
         overview = {
-            'nodes': len(self.nodes),
-            'zones': len(set(node.zone for node in self.nodes)),
-            'total_shards': len(self.shards),
-            'primary_shards': len([s for s in self.shards if s.is_primary]),
-            'replica_shards': len([s for s in self.shards if not s.is_primary]),
-            'total_size_gb': sum(s.size_gb for s in self.shards),
-            'zone_distribution': defaultdict(int),
-            'node_health': [],
-            'watermarks': watermarks
+            "nodes": len(self.nodes),
+            "zones": len({node.zone for node in self.nodes}),
+            "total_shards": len(self.shards),
+            "primary_shards": len([s for s in self.shards if s.is_primary]),
+            "replica_shards": len([s for s in self.shards if not s.is_primary]),
+            "total_size_gb": sum(s.size_gb for s in self.shards),
+            "zone_distribution": defaultdict(int),
+            "node_health": [],
+            "watermarks": watermarks,
         }
 
         # Zone distribution
         for shard in self.shards:
-            overview['zone_distribution'][shard.zone] += 1
-        overview['zone_distribution'] = dict(overview['zone_distribution'])
+            overview["zone_distribution"][shard.zone] += 1
+        overview["zone_distribution"] = dict(overview["zone_distribution"])
 
         # Node health with watermark calculations
         for node in self.nodes:
             node_shards = [s for s in self.shards if s.node_name == node.name]
             watermark_info = self._calculate_node_watermark_remaining(node, watermarks)
-            
-            overview['node_health'].append({
-                'name': node.name,
-                'zone': node.zone,
-                'shards': len(node_shards),
-                'size_gb': sum(s.size_gb for s in node_shards),
-                'disk_usage_percent': node.disk_usage_percent,
-                'heap_usage_percent': node.heap_usage_percent,
-                'available_space_gb': node.available_space_gb,
-                'remaining_to_low_watermark_gb': watermark_info['remaining_to_low_gb'],
-                'remaining_to_high_watermark_gb': watermark_info['remaining_to_high_gb']
-            })
+
+            overview["node_health"].append(
+                {
+                    "name": node.name,
+                    "zone": node.zone,
+                    "shards": len(node_shards),
+                    "size_gb": sum(s.size_gb for s in node_shards),
+                    "disk_usage_percent": node.disk_usage_percent,
+                    "heap_usage_percent": node.heap_usage_percent,
+                    "available_space_gb": node.available_space_gb,
+                    "remaining_to_low_watermark_gb": watermark_info["remaining_to_low_gb"],
+                    "remaining_to_high_watermark_gb": watermark_info["remaining_to_high_gb"],
+                }
+            )
 
         return overview
 
-    def _calculate_node_watermark_remaining(self, node: 'NodeInfo', watermarks: Dict[str, Any]) -> Dict[str, float]:
+    def _calculate_node_watermark_remaining(self, node: "NodeInfo", watermarks: Dict[str, Any]) -> Dict[str, float]:
         """Calculate remaining space until watermarks are reached"""
-        
+
         # Parse watermark percentages
-        low_watermark = self._parse_watermark_percentage(watermarks.get('low', '85%'))
-        high_watermark = self._parse_watermark_percentage(watermarks.get('high', '90%'))
-        
+        low_watermark = self._parse_watermark_percentage(watermarks.get("low", "85%"))
+        high_watermark = self._parse_watermark_percentage(watermarks.get("high", "90%"))
+
         # Calculate remaining space to each watermark
         total_space_bytes = node.fs_total
         current_used_bytes = node.fs_used
-        
+
         # Space that would be used at each watermark
         low_watermark_used_bytes = total_space_bytes * (low_watermark / 100.0)
         high_watermark_used_bytes = total_space_bytes * (high_watermark / 100.0)
-        
+
         # Remaining space until each watermark (negative if already exceeded)
         remaining_to_low_gb = max(0, (low_watermark_used_bytes - current_used_bytes) / (1024**3))
         remaining_to_high_gb = max(0, (high_watermark_used_bytes - current_used_bytes) / (1024**3))
-        
-        return {
-            'remaining_to_low_gb': remaining_to_low_gb,
-            'remaining_to_high_gb': remaining_to_high_gb
-        }
-    
+
+        return {"remaining_to_low_gb": remaining_to_low_gb, "remaining_to_high_gb": remaining_to_high_gb}
+
     def _parse_watermark_percentage(self, watermark_value: str) -> float:
         """Parse watermark percentage from string like '85%' or '0.85'"""
         if isinstance(watermark_value, str):
-            if watermark_value.endswith('%'):
+            if watermark_value.endswith("%"):
                 return float(watermark_value[:-1])
             else:
                 # Handle decimal format like '0.85'
@@ -715,8 +734,7 @@ def _parse_watermark_percentage(self, watermark_value: str) -> float:
             # Default to common values if parsing fails
             return 85.0  # Default low watermark
 
-    def plan_node_decommission(self, node_name: str,
-                              min_free_space_gb: float = 100.0) -> Dict[str, Any]:
+    def plan_node_decommission(self, node_name: str, min_free_space_gb: float = 100.0) -> Dict[str, Any]:
         """Plan the decommissioning of a node by analyzing required shard moves
 
         Args:
@@ -734,26 +752,21 @@ def plan_node_decommission(self, node_name: str,
                 break
 
         if not target_node:
-            return {
-                'error': f"Node {node_name} not found in cluster",
-                'feasible': False
-            }
+            return {"error": f"Node {node_name} not found in cluster", "feasible": False}
 
         # Get all shards on this node (only healthy ones for safety)
-        node_shards = [s for s in self.shards
-                      if s.node_name == node_name
-                      and s.routing_state == 'STARTED']
+        node_shards = [s for s in self.shards if s.node_name == node_name and s.routing_state == "STARTED"]
 
         if not node_shards:
             return {
-                'node': node_name,
-                'zone': target_node.zone,
-                'feasible': True,
-                'shards_to_move': 0,
-                'total_size_gb': 0,
-                'recommendations': [],
-                'warnings': [],
-                'message': 'Node has no healthy shards - safe to decommission'
+                "node": node_name,
+                "zone": target_node.zone,
+                "feasible": True,
+                "shards_to_move": 0,
+                "total_size_gb": 0,
+                "recommendations": [],
+                "warnings": [],
+                "message": "Node has no healthy shards - safe to decommission",
             }
 
         # Calculate space requirements
@@ -767,17 +780,17 @@ def plan_node_decommission(self, node_name: str,
         for shard in node_shards:
             # Find nodes that can accommodate this shard
             potential_targets = self.find_nodes_with_capacity(
-                shard.size_gb,
-                exclude_nodes={node_name},
-                min_free_space_gb=min_free_space_gb
+                shard.size_gb, exclude_nodes={node_name}, min_free_space_gb=min_free_space_gb
             )
 
             if not potential_targets:
-                infeasible_moves.append({
-                    'shard': f"{shard.schema_name}.{shard.table_name}[{shard.shard_id}]",
-                    'size_gb': shard.size_gb,
-                    'reason': 'No nodes with sufficient capacity'
-                })
+                infeasible_moves.append(
+                    {
+                        "shard": f"{shard.schema_name}.{shard.table_name}[{shard.shard_id}]",
+                        "size_gb": shard.size_gb,
+                        "reason": "No nodes with sufficient capacity",
+                    }
+                )
                 continue
 
             # Check for zone conflicts
@@ -794,36 +807,42 @@ def plan_node_decommission(self, node_name: str,
                     to_zone=target.zone,
                     shard_type=shard.shard_type,
                     size_gb=shard.size_gb,
-                    reason=f"Node decommission: {node_name}"
+                    reason=f"Node decommission: {node_name}",
                 )
 
                 zone_conflict = self._check_zone_conflict(temp_rec)
                 if not zone_conflict:
                     safe_targets.append(target)
                 else:
-                    warnings.append(f"Zone conflict for {shard.schema_name}.{shard.table_name}[{shard.shard_id}]: {zone_conflict}")
+                    warnings.append(
+                        f"Zone conflict for {shard.schema_name}.{shard.table_name}[{shard.shard_id}]: {zone_conflict}"
+                    )
 
             if safe_targets:
                 # Choose the target with most available space
                 best_target = safe_targets[0]
-                move_plan.append(MoveRecommendation(
-                    table_name=shard.table_name,
-                    schema_name=shard.schema_name,
-                    shard_id=shard.shard_id,
-                    from_node=node_name,
-                    to_node=best_target.name,
-                    from_zone=shard.zone,
-                    to_zone=best_target.zone,
-                    shard_type=shard.shard_type,
-                    size_gb=shard.size_gb,
-                    reason=f"Node decommission: {node_name}"
-                ))
+                move_plan.append(
+                    MoveRecommendation(
+                        table_name=shard.table_name,
+                        schema_name=shard.schema_name,
+                        shard_id=shard.shard_id,
+                        from_node=node_name,
+                        to_node=best_target.name,
+                        from_zone=shard.zone,
+                        to_zone=best_target.zone,
+                        shard_type=shard.shard_type,
+                        size_gb=shard.size_gb,
+                        reason=f"Node decommission: {node_name}",
+                    )
+                )
             else:
-                infeasible_moves.append({
-                    'shard': f"{shard.schema_name}.{shard.table_name}[{shard.shard_id}]",
-                    'size_gb': shard.size_gb,
-                    'reason': 'Zone conflicts prevent safe move'
-                })
+                infeasible_moves.append(
+                    {
+                        "shard": f"{shard.schema_name}.{shard.table_name}[{shard.shard_id}]",
+                        "size_gb": shard.size_gb,
+                        "reason": "Zone conflicts prevent safe move",
+                    }
+                )
 
         # Determine feasibility
         feasible = len(infeasible_moves) == 0
@@ -833,144 +852,139 @@ def plan_node_decommission(self, node_name: str,
             # Check if remaining cluster capacity is sufficient after decommission
             remaining_capacity = sum(n.available_space_gb for n in self.nodes if n.name != node_name)
             if remaining_capacity < total_size_gb * 1.2:  # 20% safety margin
-                warnings.append(f"Low remaining capacity after decommission. Only {remaining_capacity:.1f}GB available for {total_size_gb:.1f}GB of data")
+                warnings.append(
+                    f"Low remaining capacity after decommission. "
+                    f"Only {remaining_capacity:.1f}GB available for {total_size_gb:.1f}GB of data"
+                )
 
         return {
-            'node': node_name,
-            'zone': target_node.zone,
-            'feasible': feasible,
-            'shards_to_move': len(node_shards),
-            'moveable_shards': len(move_plan),
-            'total_size_gb': total_size_gb,
-            'recommendations': move_plan,
-            'infeasible_moves': infeasible_moves,
-            'warnings': warnings,
-            'estimated_time_hours': len(move_plan) * 0.1,  # Rough estimate: 6 minutes per move
-            'message': 'Decommission plan generated' if feasible else 'Decommission not currently feasible'
+            "node": node_name,
+            "zone": target_node.zone,
+            "feasible": feasible,
+            "shards_to_move": len(node_shards),
+            "moveable_shards": len(move_plan),
+            "total_size_gb": total_size_gb,
+            "recommendations": move_plan,
+            "infeasible_moves": infeasible_moves,
+            "warnings": warnings,
+            "estimated_time_hours": len(move_plan) * 0.1,  # Rough estimate: 6 minutes per move
+            "message": "Decommission plan generated" if feasible else "Decommission not currently feasible",
         }
 
 
 class RecoveryMonitor:
     """Monitor shard recovery operations"""
-    
+
     def __init__(self, client: CrateDBClient):
         self.client = client
-    
-    def get_cluster_recovery_status(self, 
-                                  table_name: Optional[str] = None,
-                                  node_name: Optional[str] = None,
-                                  recovery_type_filter: str = 'all',
-                                  include_transitioning: bool = False) -> List[RecoveryInfo]:
+
+    def get_cluster_recovery_status(
+        self,
+        table_name: Optional[str] = None,
+        node_name: Optional[str] = None,
+        recovery_type_filter: str = "all",
+        include_transitioning: bool = False,
+    ) -> List[RecoveryInfo]:
         """Get comprehensive recovery status with minimal cluster impact"""
-        
+
         # Get all recovering shards using the efficient combined query
         recoveries = self.client.get_all_recovering_shards(table_name, node_name, include_transitioning)
-        
+
         # Apply recovery type filter
-        if recovery_type_filter != 'all':
+        if recovery_type_filter != "all":
             recoveries = [r for r in recoveries if r.recovery_type.upper() == recovery_type_filter.upper()]
-        
+
         return recoveries
-    
+
     def get_recovery_summary(self, recoveries: List[RecoveryInfo]) -> Dict[str, Any]:
         """Generate a summary of recovery operations"""
-        
+
         if not recoveries:
-            return {
-                'total_recoveries': 0,
-                'by_type': {},
-                'by_stage': {},
-                'avg_progress': 0.0,
-                'total_size_gb': 0.0
-            }
-        
+            return {"total_recoveries": 0, "by_type": {}, "by_stage": {}, "avg_progress": 0.0, "total_size_gb": 0.0}
+
         # Group by recovery type
         by_type = {}
         by_stage = {}
         total_progress = 0.0
         total_size_gb = 0.0
-        
+
         for recovery in recoveries:
             # By type
             if recovery.recovery_type not in by_type:
-                by_type[recovery.recovery_type] = {
-                    'count': 0,
-                    'total_size_gb': 0.0,
-                    'avg_progress': 0.0
-                }
-            by_type[recovery.recovery_type]['count'] += 1
-            by_type[recovery.recovery_type]['total_size_gb'] += recovery.size_gb
-            
+                by_type[recovery.recovery_type] = {"count": 0, "total_size_gb": 0.0, "avg_progress": 0.0}
+            by_type[recovery.recovery_type]["count"] += 1
+            by_type[recovery.recovery_type]["total_size_gb"] += recovery.size_gb
+
             # By stage
             if recovery.stage not in by_stage:
                 by_stage[recovery.stage] = 0
             by_stage[recovery.stage] += 1
-            
+
             # Totals
             total_progress += recovery.overall_progress
             total_size_gb += recovery.size_gb
-        
+
         # Calculate averages
         for type_name, rec_type in by_type.items():
-            if rec_type['count'] > 0:
+            if rec_type["count"] > 0:
                 type_recoveries = [r for r in recoveries if r.recovery_type == type_name]
                 if type_recoveries:
-                    rec_type['avg_progress'] = sum(r.overall_progress for r in type_recoveries) / len(type_recoveries)
-        
+                    rec_type["avg_progress"] = sum(r.overall_progress for r in type_recoveries) / len(type_recoveries)
+
         return {
-            'total_recoveries': len(recoveries),
-            'by_type': by_type,
-            'by_stage': by_stage,
-            'avg_progress': total_progress / len(recoveries) if recoveries else 0.0,
-            'total_size_gb': total_size_gb
+            "total_recoveries": len(recoveries),
+            "by_type": by_type,
+            "by_stage": by_stage,
+            "avg_progress": total_progress / len(recoveries) if recoveries else 0.0,
+            "total_size_gb": total_size_gb,
         }
-    
+
     def format_recovery_display(self, recoveries: List[RecoveryInfo]) -> str:
         """Format recovery information for display"""
-        
+
         if not recoveries:
             return "✅ No active shard recoveries found"
-        
+
         # Group by recovery type
-        peer_recoveries = [r for r in recoveries if r.recovery_type == 'PEER']
-        disk_recoveries = [r for r in recoveries if r.recovery_type == 'DISK']
-        other_recoveries = [r for r in recoveries if r.recovery_type not in ['PEER', 'DISK']]
-        
+        peer_recoveries = [r for r in recoveries if r.recovery_type == "PEER"]
+        disk_recoveries = [r for r in recoveries if r.recovery_type == "DISK"]
+        other_recoveries = [r for r in recoveries if r.recovery_type not in ["PEER", "DISK"]]
+
         output = [f"\n🔄 Active Shard Recoveries ({len(recoveries)} total)"]
         output.append("=" * 80)
-        
+
         if peer_recoveries:
             output.append(f"\n📡 PEER Recoveries ({len(peer_recoveries)})")
             output.append(self._format_recovery_table(peer_recoveries))
-        
+
         if disk_recoveries:
             output.append(f"\n💾 DISK Recoveries ({len(disk_recoveries)})")
             output.append(self._format_recovery_table(disk_recoveries))
-        
+
         if other_recoveries:
             output.append(f"\n🔧 Other Recoveries ({len(other_recoveries)})")
             output.append(self._format_recovery_table(other_recoveries))
-        
+
         # Add summary
         summary = self.get_recovery_summary(recoveries)
-        output.append(f"\n📊 Summary:")
+        output.append("\n📊 Summary:")
         output.append(f"   Total size: {summary['total_size_gb']:.1f} GB")
         output.append(f"   Average progress: {summary['avg_progress']:.1f}%")
-        
+
         return "\n".join(output)
-    
+
     def _format_recovery_table(self, recoveries: List[RecoveryInfo]) -> str:
         """Format a table of recovery information"""
-        
+
         if not recoveries:
             return "   No recoveries of this type"
-        
+
         # Table headers
         headers = ["Table", "Shard", "Node", "Type", "Stage", "Progress", "Size(GB)", "Time(s)"]
-        
+
         # Calculate column widths
         col_widths = [len(h) for h in headers]
-        
+
         rows = []
         for recovery in recoveries:
             row = [
@@ -981,25 +995,25 @@ def _format_recovery_table(self, recoveries: List[RecoveryInfo]) -> str:
                 recovery.stage,
                 f"{recovery.overall_progress:.1f}%",
                 f"{recovery.size_gb:.1f}",
-                f"{recovery.total_time_seconds:.1f}"
+                f"{recovery.total_time_seconds:.1f}",
             ]
             rows.append(row)
-            
+
             # Update column widths
             for i, cell in enumerate(row):
                 col_widths[i] = max(col_widths[i], len(cell))
-        
+
         # Format table
         output = []
-        
+
         # Header row
         header_row = "   " + " | ".join(h.ljust(w) for h, w in zip(headers, col_widths))
         output.append(header_row)
         output.append("   " + "-" * (len(header_row) - 3))
-        
+
         # Data rows
         for row in rows:
             data_row = "   " + " | ".join(cell.ljust(w) for cell, w in zip(row, col_widths))
             output.append(data_row)
-        
+
         return "\n".join(output)
diff --git a/cratedb_toolkit/admin/xmover/cli.py b/cratedb_toolkit/admin/xmover/cli.py
index e4f5800d..2ce29fdf 100644
--- a/cratedb_toolkit/admin/xmover/cli.py
+++ b/cratedb_toolkit/admin/xmover/cli.py
@@ -4,23 +4,16 @@
 
 import sys
 import time
-import os
 from typing import Optional
-try:
-    import click
-    from rich.console import Console
-    from rich.table import Table
-    from rich.panel import Panel
-    from rich.text import Text
-    from rich import box
-except ImportError as e:
-    print(f"Missing required dependency: {e}")
-    print("Please install dependencies with: pip install -e .")
-    sys.exit(1)
 
-from .database import CrateDBClient
-from .analyzer import ShardAnalyzer, RecoveryMonitor
+import click
+from rich import box
+from rich.console import Console
+from rich.panel import Panel
+from rich.table import Table
 
+from .analyzer import MoveRecommendation, RecoveryMonitor, ShardAnalyzer
+from .database import CrateDBClient
 
 console = Console()
 
@@ -28,11 +21,11 @@
 def format_size(size_gb: float) -> str:
     """Format size in GB with appropriate precision"""
     if size_gb >= 1000:
-        return f"{size_gb/1000:.1f}TB"
+        return f"{size_gb / 1000:.1f}TB"
     elif size_gb >= 1:
         return f"{size_gb:.1f}GB"
     else:
-        return f"{size_gb*1000:.0f}MB"
+        return f"{size_gb * 1000:.0f}MB"
 
 
 def format_percentage(value: float) -> str:
@@ -48,13 +41,13 @@ def format_percentage(value: float) -> str:
 def format_translog_info(recovery_info) -> str:
     """Format translog size information with color coding"""
     tl_bytes = recovery_info.translog_size_bytes
-    
+
     # Only show if significant (>10MB for production)
     if tl_bytes < 10 * 1024 * 1024:  # 10MB for production
         return ""
-    
+
     tl_gb = recovery_info.translog_size_gb
-    
+
     # Color coding based on size
     if tl_gb >= 5.0:
         color = "red"
@@ -62,13 +55,13 @@ def format_translog_info(recovery_info) -> str:
         color = "yellow"
     else:
         color = "green"
-    
+
     # Format size
     if tl_gb >= 1.0:
         size_str = f"{tl_gb:.1f}GB"
     else:
-        size_str = f"{tl_gb*1000:.0f}MB"
-    
+        size_str = f"{tl_gb * 1000:.0f}MB"
+
     return f" [dim]([{color}]TL:{size_str}[/{color}])[/dim]"
 
 
@@ -90,18 +83,18 @@ def main(ctx):
             console.print("[red]Error: Could not connect to CrateDB[/red]")
             console.print("Please check your CRATE_CONNECTION_STRING in .env file")
             sys.exit(1)
-        ctx.obj['client'] = client
+        ctx.obj["client"] = client
     except Exception as e:
         console.print(f"[red]Error connecting to CrateDB: {e}[/red]")
         sys.exit(1)
 
 
 @main.command()
-@click.option('--table', '-t', help='Analyze specific table only')
+@click.option("--table", "-t", help="Analyze specific table only")
 @click.pass_context
 def analyze(ctx, table: Optional[str]):
     """Analyze current shard distribution across nodes and zones"""
-    client = ctx.obj['client']
+    client = ctx.obj["client"]
     analyzer = ShardAnalyzer(client)
 
     console.print(Panel.fit("[bold blue]CrateDB Cluster Analysis[/bold blue]"))
@@ -114,27 +107,29 @@ def analyze(ctx, table: Optional[str]):
     summary_table.add_column("Metric", style="cyan")
     summary_table.add_column("Value", style="magenta")
 
-    summary_table.add_row("Nodes", str(overview['nodes']))
-    summary_table.add_row("Availability Zones", str(overview['zones']))
-    summary_table.add_row("Total Shards", str(overview['total_shards']))
-    summary_table.add_row("Primary Shards", str(overview['primary_shards']))
-    summary_table.add_row("Replica Shards", str(overview['replica_shards']))
-    summary_table.add_row("Total Size", format_size(overview['total_size_gb']))
+    summary_table.add_row("Nodes", str(overview["nodes"]))
+    summary_table.add_row("Availability Zones", str(overview["zones"]))
+    summary_table.add_row("Total Shards", str(overview["total_shards"]))
+    summary_table.add_row("Primary Shards", str(overview["primary_shards"]))
+    summary_table.add_row("Replica Shards", str(overview["replica_shards"]))
+    summary_table.add_row("Total Size", format_size(overview["total_size_gb"]))
 
     console.print(summary_table)
     console.print()
 
     # Disk watermarks table
-    if overview.get('watermarks'):
+    if overview.get("watermarks"):
         watermarks_table = Table(title="Disk Allocation Watermarks", box=box.ROUNDED)
         watermarks_table.add_column("Setting", style="cyan")
         watermarks_table.add_column("Value", style="magenta")
 
-        watermarks = overview['watermarks']
-        watermarks_table.add_row("Low Watermark", str(watermarks.get('low', 'Not set')))
-        watermarks_table.add_row("High Watermark", str(watermarks.get('high', 'Not set')))
-        watermarks_table.add_row("Flood Stage", str(watermarks.get('flood_stage', 'Not set')))
-        watermarks_table.add_row("Enable for Single Node", str(watermarks.get('enable_for_single_data_node', 'Not set')))
+        watermarks = overview["watermarks"]
+        watermarks_table.add_row("Low Watermark", str(watermarks.get("low", "Not set")))
+        watermarks_table.add_row("High Watermark", str(watermarks.get("high", "Not set")))
+        watermarks_table.add_row("Flood Stage", str(watermarks.get("flood_stage", "Not set")))
+        watermarks_table.add_row(
+            "Enable for Single Node", str(watermarks.get("enable_for_single_data_node", "Not set"))
+        )
 
         console.print(watermarks_table)
         console.print()
@@ -145,8 +140,8 @@ def analyze(ctx, table: Optional[str]):
     zone_table.add_column("Shards", justify="right", style="magenta")
     zone_table.add_column("Percentage", justify="right", style="green")
 
-    total_shards = overview['total_shards']
-    for zone, count in overview['zone_distribution'].items():
+    total_shards = overview["total_shards"]
+    for zone, count in overview["zone_distribution"].items():
         percentage = (count / total_shards * 100) if total_shards > 0 else 0
         zone_table.add_row(zone, str(count), f"{percentage:.1f}%")
 
@@ -164,20 +159,28 @@ def analyze(ctx, table: Optional[str]):
     node_table.add_column("Until Low WM", justify="right", style="yellow")
     node_table.add_column("Until High WM", justify="right", style="red")
 
-    for node_info in overview['node_health']:
+    for node_info in overview["node_health"]:
         # Format watermark remaining capacity
-        low_wm_remaining = format_size(node_info['remaining_to_low_watermark_gb']) if node_info['remaining_to_low_watermark_gb'] > 0 else "[red]Exceeded[/red]"
-        high_wm_remaining = format_size(node_info['remaining_to_high_watermark_gb']) if node_info['remaining_to_high_watermark_gb'] > 0 else "[red]Exceeded[/red]"
+        low_wm_remaining = (
+            format_size(node_info["remaining_to_low_watermark_gb"])
+            if node_info["remaining_to_low_watermark_gb"] > 0
+            else "[red]Exceeded[/red]"
+        )
+        high_wm_remaining = (
+            format_size(node_info["remaining_to_high_watermark_gb"])
+            if node_info["remaining_to_high_watermark_gb"] > 0
+            else "[red]Exceeded[/red]"
+        )
 
         node_table.add_row(
-            node_info['name'],
-            node_info['zone'],
-            str(node_info['shards']),
-            format_size(node_info['size_gb']),
-            format_percentage(node_info['disk_usage_percent']),
-            format_size(node_info['available_space_gb']),
+            node_info["name"],
+            node_info["zone"],
+            str(node_info["shards"]),
+            format_size(node_info["size_gb"]),
+            format_percentage(node_info["disk_usage_percent"]),
+            format_size(node_info["available_space_gb"]),
             low_wm_remaining,
-            high_wm_remaining
+            high_wm_remaining,
         )
 
     console.print(node_table)
@@ -202,11 +205,11 @@ def analyze(ctx, table: Optional[str]):
 
 
 @main.command()
-@click.option('--table', '-t', help='Find candidates for specific table only')
-@click.option('--min-size', default=40.0, help='Minimum shard size in GB (default: 40)')
-@click.option('--max-size', default=60.0, help='Maximum shard size in GB (default: 60)')
-@click.option('--limit', default=20, help='Maximum number of candidates to show (default: 20)')
-@click.option('--node', help='Only show candidates from this specific source node (e.g., data-hot-4)')
+@click.option("--table", "-t", help="Find candidates for specific table only")
+@click.option("--min-size", default=40.0, help="Minimum shard size in GB (default: 40)")
+@click.option("--max-size", default=60.0, help="Maximum shard size in GB (default: 60)")
+@click.option("--limit", default=20, help="Maximum number of candidates to show (default: 20)")
+@click.option("--node", help="Only show candidates from this specific source node (e.g., data-hot-4)")
 @click.pass_context
 def find_candidates(ctx, table: Optional[str], min_size: float, max_size: float, limit: int, node: Optional[str]):
     """Find shard candidates for movement based on size criteria
@@ -214,7 +217,7 @@ def find_candidates(ctx, table: Optional[str], min_size: float, max_size: float,
     Results are sorted by nodes with least available space first,
     then by shard size (smallest first) for easier moves.
     """
-    client = ctx.obj['client']
+    client = ctx.obj["client"]
     analyzer = ShardAnalyzer(client)
 
     console.print(Panel.fit(f"[bold blue]Finding Moveable Shards ({min_size}-{max_size}GB)[/bold blue]"))
@@ -232,7 +235,7 @@ def find_candidates(ctx, table: Optional[str], min_size: float, max_size: float,
     if not candidates:
         if node:
             console.print(f"[yellow]No moveable shards found on node '{node}' in the specified size range.[/yellow]")
-            console.print(f"[dim]Tip: Try different size ranges or remove --node filter to see all candidates[/dim]")
+            console.print("[dim]Tip: Try different size ranges or remove --node filter to see all candidates[/dim]")
         else:
             console.print("[yellow]No moveable shards found in the specified size range.[/yellow]")
         return
@@ -240,7 +243,9 @@ def find_candidates(ctx, table: Optional[str], min_size: float, max_size: float,
     # Show limited results
     shown_candidates = candidates[:limit]
 
-    candidates_table = Table(title=f"Moveable Shard Candidates (showing {len(shown_candidates)} of {len(candidates)})", box=box.ROUNDED)
+    candidates_table = Table(
+        title=f"Moveable Shard Candidates (showing {len(shown_candidates)} of {len(candidates)})", box=box.ROUNDED
+    )
     candidates_table.add_column("Table", style="cyan")
     candidates_table.add_column("Shard ID", justify="right", style="magenta")
     candidates_table.add_column("Type", style="blue")
@@ -263,7 +268,7 @@ def find_candidates(ctx, table: Optional[str], min_size: float, max_size: float,
             shard.zone,
             format_size(shard.size_gb),
             format_size(node_free_space),
-            f"{shard.num_docs:,}"
+            f"{shard.num_docs:,}",
         )
 
     console.print(candidates_table)
@@ -273,26 +278,51 @@ def find_candidates(ctx, table: Optional[str], min_size: float, max_size: float,
 
 
 @main.command()
-@click.option('--table', '-t', help='Generate recommendations for specific table only')
-@click.option('--min-size', default=40.0, help='Minimum shard size in GB (default: 40)')
-@click.option('--max-size', default=60.0, help='Maximum shard size in GB (default: 60)')
-@click.option('--zone-tolerance', default=10.0, help='Zone balance tolerance percentage (default: 10)')
-@click.option('--min-free-space', default=100.0, help='Minimum free space required on target nodes in GB (default: 100)')
-@click.option('--max-moves', default=10, help='Maximum number of move recommendations (default: 10)')
-@click.option('--max-disk-usage', default=90.0, help='Maximum disk usage percentage for target nodes (default: 90)')
-
-@click.option('--validate/--no-validate', default=True, help='Validate move safety (default: True)')
-@click.option('--prioritize-space/--prioritize-zones', default=False, help='Prioritize available space over zone balancing (default: False)')
-@click.option('--dry-run/--execute', default=True, help='Show what would be done without generating SQL commands (default: True)')
-@click.option('--auto-execute', is_flag=True, default=False, help='DANGER: Automatically execute the SQL commands (requires --execute, asks for confirmation)')
-@click.option('--node', help='Only recommend moves from this specific source node (e.g., data-hot-4)')
+@click.option("--table", "-t", help="Generate recommendations for specific table only")
+@click.option("--min-size", default=40.0, help="Minimum shard size in GB (default: 40)")
+@click.option("--max-size", default=60.0, help="Maximum shard size in GB (default: 60)")
+@click.option("--zone-tolerance", default=10.0, help="Zone balance tolerance percentage (default: 10)")
+@click.option(
+    "--min-free-space", default=100.0, help="Minimum free space required on target nodes in GB (default: 100)"
+)
+@click.option("--max-moves", default=10, help="Maximum number of move recommendations (default: 10)")
+@click.option("--max-disk-usage", default=90.0, help="Maximum disk usage percentage for target nodes (default: 90)")
+@click.option("--validate/--no-validate", default=True, help="Validate move safety (default: True)")
+@click.option(
+    "--prioritize-space/--prioritize-zones",
+    default=False,
+    help="Prioritize available space over zone balancing (default: False)",
+)
+@click.option(
+    "--dry-run/--execute", default=True, help="Show what would be done without generating SQL commands (default: True)"
+)
+@click.option(
+    "--auto-execute",
+    is_flag=True,
+    default=False,
+    help="DANGER: Automatically execute the SQL commands (requires --execute, asks for confirmation)",
+)
+@click.option("--node", help="Only recommend moves from this specific source node (e.g., data-hot-4)")
 @click.pass_context
-def recommend(ctx, table: Optional[str], min_size: float, max_size: float,
-              zone_tolerance: float, min_free_space: float, max_moves: int, max_disk_usage: float, validate: bool, prioritize_space: bool, dry_run: bool, auto_execute: bool, node: Optional[str]):
+def recommend(
+    ctx,
+    table: Optional[str],
+    min_size: float,
+    max_size: float,
+    zone_tolerance: float,
+    min_free_space: float,
+    max_moves: int,
+    max_disk_usage: float,
+    validate: bool,
+    prioritize_space: bool,
+    dry_run: bool,
+    auto_execute: bool,
+    node: Optional[str],
+):
     """Generate shard movement recommendations for rebalancing"""
-    client = ctx.obj['client']
+    client = ctx.obj["client"]
     analyzer = ShardAnalyzer(client)
-    
+
     # Safety check for auto-execute
     if auto_execute and dry_run:
         console.print("[red]❌ Error: --auto-execute requires --execute flag[/red]")
@@ -300,7 +330,12 @@ def recommend(ctx, table: Optional[str], min_size: float, max_size: float,
         return
 
     mode_text = "DRY RUN - Analysis Only" if dry_run else "EXECUTION MODE"
-    console.print(Panel.fit(f"[bold blue]Generating Rebalancing Recommendations[/bold blue] - [bold {'green' if dry_run else 'red'}]{mode_text}[/bold {'green' if dry_run else 'red'}]"))
+    console.print(
+        Panel.fit(
+            f"[bold blue]Generating Rebalancing Recommendations[/bold blue] - "
+            f"[bold {'green' if dry_run else 'red'}]{mode_text}[/bold {'green' if dry_run else 'red'}]"
+        )
+    )
     console.print("[dim]Note: Only analyzing healthy shards (STARTED + 100% recovered) for safe operations[/dim]")
     console.print("[dim]Zone conflict detection: Prevents moves that would violate CrateDB's zone awareness[/dim]")
     if prioritize_space:
@@ -328,21 +363,21 @@ def recommend(ctx, table: Optional[str], min_size: float, max_size: float,
         max_recommendations=max_moves,
         prioritize_space=prioritize_space,
         source_node=node,
-        max_disk_usage_percent=max_disk_usage
+        max_disk_usage_percent=max_disk_usage,
     )
 
     if not recommendations:
         if node:
             console.print(f"[yellow]No safe recommendations found for node '{node}'[/yellow]")
-            console.print(f"[dim]This could be due to:[/dim]")
-            console.print(f"[dim]  • Zone conflicts preventing safe moves[/dim]")
+            console.print("[dim]This could be due to:[/dim]")
+            console.print("[dim]  • Zone conflicts preventing safe moves[/dim]")
             console.print(f"[dim]  • Target nodes exceeding {max_disk_usage}% disk usage threshold[/dim]")
             console.print(f"[dim]  • Insufficient free space on target nodes (need {min_free_space}GB)[/dim]")
             console.print(f"[dim]  • No shards in size range {min_size}-{max_size}GB[/dim]")
-            console.print(f"[dim]Suggestions:[/dim]")
-            console.print(f"[dim]  • Try: --max-disk-usage 95 (allow higher disk usage)[/dim]")
-            console.print(f"[dim]  • Try: --min-free-space 50 (reduce space requirements)[/dim]")
-            console.print(f"[dim]  • Try: different size ranges or remove --node filter[/dim]")
+            console.print("[dim]Suggestions:[/dim]")
+            console.print("[dim]  • Try: --max-disk-usage 95 (allow higher disk usage)[/dim]")
+            console.print("[dim]  • Try: --min-free-space 50 (reduce space requirements)[/dim]")
+            console.print("[dim]  • Try: different size ranges or remove --node filter[/dim]")
         else:
             console.print("[green]No rebalancing recommendations needed. Cluster appears well balanced![/green]")
         return
@@ -377,7 +412,7 @@ def recommend(ctx, table: Optional[str], min_size: float, max_size: float,
             format_size(target_free_space),
             zone_change,
             format_size(rec.size_gb),
-            rec.reason
+            rec.reason,
         ]
 
         if validate:
@@ -420,14 +455,16 @@ def recommend(ctx, table: Optional[str], min_size: float, max_size: float,
             console.print(f"[dim]  Target SQL: {rec.to_sql()}[/dim]")
 
         console.print()
-        console.print(f"[bold]Dry Run Summary:[/bold]")
+        console.print("[bold]Dry Run Summary:[/bold]")
         console.print(f"  • Safe moves that would execute: [green]{safe_moves}[/green]")
         console.print(f"  • Zone conflicts prevented: [yellow]{zone_conflicts}[/yellow]")
         console.print(f"  • Space-related issues: [yellow]{space_issues}[/yellow]")
         if safe_moves > 0:
-            console.print(f"\n[green]✓ Ready to execute {safe_moves} safe moves. Use --execute to generate SQL commands.[/green]")
+            console.print(
+                f"\n[green]✓ Ready to execute {safe_moves} safe moves. Use --execute to generate SQL commands.[/green]"
+            )
         else:
-            console.print(f"\n[yellow]⚠ No safe moves identified. Review cluster balance or adjust parameters.[/yellow]")
+            console.print("\n[yellow]⚠ No safe moves identified. Review cluster balance or adjust parameters.[/yellow]")
     else:
         console.print(Panel.fit("[bold green]Generated SQL Commands[/bold green]"))
         console.print("[dim]# Copy and paste these commands to execute the moves[/dim]")
@@ -445,7 +482,7 @@ def recommend(ctx, table: Optional[str], min_size: float, max_size: float,
                     if "Zone conflict" in safety_msg:
                         zone_conflicts += 1
                         console.print(f"-- Move {i}: SKIPPED - {safety_msg}")
-                        console.print(f"--   Tip: Try moving to a different zone or check existing shard distribution")
+                        console.print("--   Tip: Try moving to a different zone or check existing shard distribution")
                     else:
                         console.print(f"-- Move {i}: SKIPPED - {safety_msg}")
                     continue
@@ -462,12 +499,14 @@ def recommend(ctx, table: Optional[str], min_size: float, max_size: float,
     if validate and safe_moves < len(recommendations):
         if zone_conflicts > 0:
             console.print(f"[yellow]Warning: {zone_conflicts} moves skipped due to zone conflicts[/yellow]")
-            console.print(f"[yellow]Tip: Use 'find-candidates' to see current shard distribution across zones[/yellow]")
-        console.print(f"[yellow]Warning: Only {safe_moves} of {len(recommendations)} moves passed safety validation[/yellow]")
+            console.print("[yellow]Tip: Use 'find-candidates' to see current shard distribution across zones[/yellow]")
+        console.print(
+            f"[yellow]Warning: Only {safe_moves} of {len(recommendations)} moves passed safety validation[/yellow]"
+        )
 
 
 @main.command()
-@click.option('--connection-string', help='Override connection string from .env')
+@click.option("--connection-string", help="Override connection string from .env")
 @click.pass_context
 def test_connection(ctx, connection_string: Optional[str]):
     """Test connection to CrateDB cluster"""
@@ -495,12 +534,12 @@ def test_connection(ctx, connection_string: Optional[str]):
 
 
 @main.command()
-@click.option('--table', '-t', help='Check balance for specific table only')
-@click.option('--tolerance', default=10.0, help='Zone balance tolerance percentage (default: 10)')
+@click.option("--table", "-t", help="Check balance for specific table only")
+@click.option("--tolerance", default=10.0, help="Zone balance tolerance percentage (default: 10)")
 @click.pass_context
 def check_balance(ctx, table: Optional[str], tolerance: float):
     """Check zone balance for shards"""
-    client = ctx.obj['client']
+    client = ctx.obj["client"]
     analyzer = ShardAnalyzer(client)
 
     console.print(Panel.fit("[bold blue]Zone Balance Check[/bold blue]"))
@@ -514,13 +553,10 @@ def check_balance(ctx, table: Optional[str], tolerance: float):
         return
 
     # Calculate totals and targets
-    total_shards = sum(stats['TOTAL'] for stats in zone_stats.values())
+    total_shards = sum(stats["TOTAL"] for stats in zone_stats.values())
     zones = list(zone_stats.keys())
     target_per_zone = total_shards // len(zones) if zones else 0
-    tolerance_range = (
-        target_per_zone * (1 - tolerance / 100),
-        target_per_zone * (1 + tolerance / 100)
-    )
+    tolerance_range = (target_per_zone * (1 - tolerance / 100), target_per_zone * (1 + tolerance / 100))
 
     balance_table = Table(title=f"Zone Balance Analysis (Target: {target_per_zone} ±{tolerance}%)", box=box.ROUNDED)
     balance_table.add_column("Zone", style="cyan")
@@ -530,7 +566,7 @@ def check_balance(ctx, table: Optional[str], tolerance: float):
     balance_table.add_column("Status", style="bold")
 
     for zone, stats in zone_stats.items():
-        total = stats['TOTAL']
+        total = stats["TOTAL"]
 
         if tolerance_range[0] <= total <= tolerance_range[1]:
             status = "[green]✓ Balanced[/green]"
@@ -539,24 +575,18 @@ def check_balance(ctx, table: Optional[str], tolerance: float):
         else:
             status = f"[red]⚠ Over ({total - target_per_zone:+})[/red]"
 
-        balance_table.add_row(
-            zone,
-            str(stats['PRIMARY']),
-            str(stats['REPLICA']),
-            str(total),
-            status
-        )
+        balance_table.add_row(zone, str(stats["PRIMARY"]), str(stats["REPLICA"]), str(total), status)
 
     console.print(balance_table)
 
 
 @main.command()
-@click.option('--table', '-t', help='Analyze zones for specific table only')
-@click.option('--show-shards/--no-show-shards', default=False, help='Show individual shard details (default: False)')
+@click.option("--table", "-t", help="Analyze zones for specific table only")
+@click.option("--show-shards/--no-show-shards", default=False, help="Show individual shard details (default: False)")
 @click.pass_context
 def zone_analysis(ctx, table: Optional[str], show_shards: bool):
     """Detailed analysis of zone distribution and potential conflicts"""
-    client = ctx.obj['client']
+    client = ctx.obj["client"]
 
     console.print(Panel.fit("[bold blue]Detailed Zone Analysis[/bold blue]"))
     console.print("[dim]Comprehensive zone distribution analysis for CrateDB cluster[/dim]")
@@ -626,23 +656,22 @@ def zone_analysis(ctx, table: Optional[str], show_shards: bool):
             replica_zones_str = ", ".join(sorted(replica_zones)) if replica_zones else "None"
 
             analysis_table.add_row(
-                str(shard_id),
-                primary_zone,
-                replica_zones_str,
-                str(total_copies),
-                " ".join(status_parts)
+                str(shard_id), primary_zone, replica_zones_str, str(total_copies), " ".join(status_parts)
             )
 
             # Show individual shard details if requested
             if show_shards:
                 for shard_copy in shard_copies:
-                    health_indicator = "✓" if shard_copy.routing_state == 'STARTED' else "⚠"
-                    console.print(f"    {health_indicator} {shard_copy.shard_type} on {shard_copy.node_name} ({shard_copy.zone}) - {shard_copy.routing_state}")
+                    health_indicator = "✓" if shard_copy.routing_state == "STARTED" else "⚠"
+                    console.print(
+                        f"    {health_indicator} {shard_copy.shard_type} "
+                        f"on {shard_copy.node_name} ({shard_copy.zone}) - {shard_copy.routing_state}"
+                    )
 
         console.print(analysis_table)
 
     # Summary
-    console.print(f"\n[bold]Zone Analysis Summary:[/bold]")
+    console.print("\n[bold]Zone Analysis Summary:[/bold]")
     console.print(f"  • Tables analyzed: [cyan]{len(tables)}[/cyan]")
     console.print(f"  • Zone conflicts detected: [red]{zone_conflicts}[/red]")
     console.print(f"  • Under-replicated shards: [yellow]{under_replicated}[/yellow]")
@@ -660,131 +689,12 @@ def zone_analysis(ctx, table: Optional[str], show_shards: bool):
         console.print("\n[green]✓ No critical zone distribution issues detected![/green]")
 
 
-# @main.command()
-# @click.argument('node_name')
-# @click.option('--min-free-space', default=100.0, help='Minimum free space required on target nodes in GB (default: 100)')
-# @click.option('--dry-run/--execute', default=True, help='Show decommission plan without generating SQL commands (default: True)')
-# @click.pass_context
-# def decommission(ctx, node_name: str, min_free_space: float, dry_run: bool):
-#     """Plan decommissioning of a node by analyzing required shard moves
-#
-#     NODE_NAME: Name of the node to decommission
-#     """
-#     client = ctx.obj['client']
-#     analyzer = ShardAnalyzer(client)
-#
-#     mode_text = "PLANNING MODE" if dry_run else "EXECUTION MODE"
-#     console.print(Panel.fit(f"[bold blue]Node Decommission Analysis[/bold blue] - [bold {'green' if dry_run else 'red'}]{mode_text}[/bold {'green' if dry_run else 'red'}]"))
-#     console.print(f"[dim]Analyzing decommission plan for node: {node_name}[/dim]")
-#     console.print()
-#
-#     # Generate decommission plan
-#     plan = analyzer.plan_node_decommission(node_name, min_free_space)
-#
-#     if 'error' in plan:
-#         console.print(f"[red]Error: {plan['error']}[/red]")
-#         return
-#
-#     # Display plan summary
-#     summary_table = Table(title=f"Decommission Plan for {node_name}", box=box.ROUNDED)
-#     summary_table.add_column("Metric", style="cyan")
-#     summary_table.add_column("Value", style="magenta")
-#
-#     summary_table.add_row("Node", plan['node'])
-#     summary_table.add_row("Zone", plan['zone'])
-#     summary_table.add_row("Feasible", "[green]✓ Yes[/green]" if plan['feasible'] else "[red]✗ No[/red]")
-#     summary_table.add_row("Shards to Move", str(plan['shards_to_move']))
-#     summary_table.add_row("Moveable Shards", str(plan['moveable_shards']))
-#     summary_table.add_row("Total Data Size", format_size(plan['total_size_gb']))
-#     summary_table.add_row("Estimated Time", f"{plan['estimated_time_hours']:.1f} hours")
-#
-#     console.print(summary_table)
-#     console.print()
-#
-#     # Show warnings if any
-#     if plan['warnings']:
-#         console.print("[bold yellow]⚠ Warnings:[/bold yellow]")
-#         for warning in plan['warnings']:
-#             console.print(f"  • [yellow]{warning}[/yellow]")
-#         console.print()
-#
-#     # Show infeasible moves if any
-#     if plan['infeasible_moves']:
-#         console.print("[bold red]✗ Cannot Move:[/bold red]")
-#         infeasible_table = Table(box=box.ROUNDED)
-#         infeasible_table.add_column("Shard", style="cyan")
-#         infeasible_table.add_column("Size", style="magenta")
-#         infeasible_table.add_column("Reason", style="red")
-#
-#         for move in plan['infeasible_moves']:
-#             infeasible_table.add_row(
-#                 move['shard'],
-#                 format_size(move['size_gb']),
-#                 move['reason']
-#             )
-#         console.print(infeasible_table)
-#         console.print()
-#
-#     # Show move recommendations
-#     if plan['recommendations']:
-#         move_table = Table(title="Required Shard Moves", box=box.ROUNDED)
-#         move_table.add_column("Table", style="cyan")
-#         move_table.add_column("Shard", justify="right", style="magenta")
-#         move_table.add_column("Type", style="blue")
-#         move_table.add_column("Size", style="green")
-#         move_table.add_column("From Zone", style="yellow")
-#         move_table.add_column("To Node", style="cyan")
-#         move_table.add_column("To Zone", style="yellow")
-#
-#         for rec in plan['recommendations']:
-#             move_table.add_row(
-#                 f"{rec.schema_name}.{rec.table_name}",
-#                 str(rec.shard_id),
-#                 rec.shard_type,
-#                 format_size(rec.size_gb),
-#                 rec.from_zone,
-#                 rec.to_node,
-#                 rec.to_zone
-#             )
-#
-#         console.print(move_table)
-#         console.print()
-#
-#         # Generate SQL commands if not in dry-run mode
-#         if not dry_run and plan['feasible']:
-#             console.print(Panel.fit("[bold green]Decommission SQL Commands[/bold green]"))
-#             console.print("[dim]# Execute these commands in order to prepare for node decommission[/dim]")
-#             console.print("[dim]# ALWAYS test in a non-production environment first![/dim]")
-#             console.print("[dim]# Monitor shard health after each move before proceeding[/dim]")
-#             console.print()
-#
-#             for i, rec in enumerate(plan['recommendations'], 1):
-#                 console.print(f"-- Move {i}: {rec.reason}")
-#                 console.print(f"{rec.to_sql()}")
-#                 console.print()
-#
-#             console.print(f"-- After all moves complete, the node {node_name} can be safely removed")
-#             console.print(f"-- Total moves required: {len(plan['recommendations'])}")
-#         elif dry_run:
-#             console.print("[green]✓ Decommission plan ready. Use --execute to generate SQL commands.[/green]")
-#
-#     # Final status
-#     if not plan['feasible']:
-#         console.print(f"[red]⚠ Node {node_name} cannot be safely decommissioned at this time.[/red]")
-#         console.print("[dim]Address the issues above before attempting decommission.[/dim]")
-#     elif plan['shards_to_move'] == 0:
-#         console.print(f"[green]✓ Node {node_name} is ready for immediate decommission (no shards to move).[/green]")
-#     else:
-#         console.print(f"[green]✓ Node {node_name} can be safely decommissioned after moving {len(plan['recommendations'])} shards.[/green]")
-
-
 @main.command()
-@click.argument('schema_table')
-@click.argument('shard_id', type=int)
-@click.argument('from_node')
-@click.argument('to_node')
-@click.option('--max-disk-usage', default=90.0, help='Maximum disk usage percentage for target node (default: 90)')
-
+@click.argument("schema_table")
+@click.argument("shard_id", type=int)
+@click.argument("from_node")
+@click.argument("to_node")
+@click.option("--max-disk-usage", default=90.0, help="Maximum disk usage percentage for target node (default: 90)")
 @click.pass_context
 def validate_move(ctx, schema_table: str, shard_id: int, from_node: str, to_node: str, max_disk_usage: float):
     """Validate a specific shard move before execution
@@ -796,17 +706,17 @@ def validate_move(ctx, schema_table: str, shard_id: int, from_node: str, to_node
 
     Example: xmover validate-move CUROV.maddoxxFormfactor 4 data-hot-1 data-hot-3
     """
-    client = ctx.obj['client']
+    client = ctx.obj["client"]
     analyzer = ShardAnalyzer(client)
 
     # Parse schema and table
-    if '.' not in schema_table:
+    if "." not in schema_table:
         console.print("[red]Error: Schema and table must be in format 'schema.table'[/red]")
         return
 
-    schema_name, table_name = schema_table.split('.', 1)
+    schema_name, table_name = schema_table.split(".", 1)
 
-    console.print(Panel.fit(f"[bold blue]Validating Shard Move[/bold blue]"))
+    console.print(Panel.fit("[bold blue]Validating Shard Move[/bold blue]"))
     console.print(f"[dim]Move: {schema_name}.{table_name}[{shard_id}] from {from_node} to {to_node}[/dim]")
     console.print()
 
@@ -830,16 +740,18 @@ def validate_move(ctx, schema_table: str, shard_id: int, from_node: str, to_node
     # Find the specific shard
     target_shard = None
     for shard in analyzer.shards:
-        if (shard.schema_name == schema_name and
-            shard.table_name == table_name and
-            shard.shard_id == shard_id and
-            shard.node_name == from_node):
+        if (
+            shard.schema_name == schema_name
+            and shard.table_name == table_name
+            and shard.shard_id == shard_id
+            and shard.node_name == from_node
+        ):
             target_shard = shard
             break
 
     if not target_shard:
         console.print(f"[red]✗ Shard {shard_id} not found on node {from_node}[/red]")
-        console.print(f"[dim]Use 'xmover find-candidates' to see available shards[/dim]")
+        console.print("[dim]Use 'xmover find-candidates' to see available shards[/dim]")
         return
 
     # Create a move recommendation for validation
@@ -853,7 +765,7 @@ def validate_move(ctx, schema_table: str, shard_id: int, from_node: str, to_node
         to_zone=to_node_info.zone,
         shard_type=target_shard.shard_type,
         size_gb=target_shard.size_gb,
-        reason="Manual validation"
+        reason="Manual validation",
     )
 
     # Display shard details
@@ -890,7 +802,9 @@ def validate_move(ctx, schema_table: str, shard_id: int, from_node: str, to_node
         console.print(f"{recommendation.to_sql()}")
         console.print()
         console.print("[dim]# Monitor shard health after execution[/dim]")
-        console.print("[dim]# Check with: SELECT * FROM sys.shards WHERE table_name = '{table_name}' AND id = {shard_id};[/dim]")
+        console.print(
+            "[dim]# Check with: SELECT * FROM sys.shards WHERE table_name = '{table_name}' AND id = {shard_id};[/dim]"
+        )
     else:
         console.print("[red]✗ VALIDATION FAILED - Move not safe[/red]")
         console.print(f"[red]✗ {safety_msg}[/red]")
@@ -921,7 +835,7 @@ def validate_move(ctx, schema_table: str, shard_id: int, from_node: str, to_node
 
 
 @main.command()
-@click.argument('error_message', required=False)
+@click.argument("error_message", required=False)
 @click.pass_context
 def explain_error(ctx, error_message: Optional[str]):
     """Explain CrateDB allocation error messages and provide solutions
@@ -951,7 +865,7 @@ def explain_error(ctx, error_message: Optional[str]):
         console.print("[yellow]No error message provided[/yellow]")
         return
 
-    console.print(f"[dim]Analyzing error message...[/dim]")
+    console.print("[dim]Analyzing error message...[/dim]")
     console.print()
 
     # Common CrateDB allocation error patterns and solutions
@@ -963,9 +877,9 @@ def explain_error(ctx, error_message: Optional[str]):
             "solutions": [
                 "Choose a different target node that doesn't have this shard",
                 "Use 'xmover zone-analysis --show-shards' to see current distribution",
-                "Verify the shard ID and table name are correct"
+                "Verify the shard ID and table name are correct",
             ],
-            "prevention": "Always check current shard locations before moving"
+            "prevention": "Always check current shard locations before moving",
         },
         {
             "pattern": "there are too many copies of the shard allocated to nodes with attribute",
@@ -974,9 +888,9 @@ def explain_error(ctx, error_message: Optional[str]):
             "solutions": [
                 "Move the shard to a different availability zone",
                 "Check zone balance with 'xmover check-balance'",
-                "Ensure target zone doesn't already have copies of this shard"
+                "Ensure target zone doesn't already have copies of this shard",
             ],
-            "prevention": "Use 'xmover recommend' which respects zone constraints"
+            "prevention": "Use 'xmover recommend' which respects zone constraints",
         },
         {
             "pattern": "not enough disk space",
@@ -985,9 +899,9 @@ def explain_error(ctx, error_message: Optional[str]):
             "solutions": [
                 "Free up space on the target node",
                 "Choose a node with more available capacity",
-                "Check available space with 'xmover analyze'"
+                "Check available space with 'xmover analyze'",
             ],
-            "prevention": "Use '--min-free-space' parameter in recommendations"
+            "prevention": "Use '--min-free-space' parameter in recommendations",
         },
         {
             "pattern": "shard recovery limit",
@@ -996,21 +910,22 @@ def explain_error(ctx, error_message: Optional[str]):
             "solutions": [
                 "Wait for current recoveries to complete",
                 "Check recovery status in CrateDB admin UI",
-                "Reduce concurrent recoveries in cluster settings"
+                "Reduce concurrent recoveries in cluster settings",
             ],
-            "prevention": "Move shards gradually, monitor recovery progress"
+            "prevention": "Move shards gradually, monitor recovery progress",
         },
         {
             "pattern": "allocation is disabled",
             "title": "Allocation Disabled",
             "explanation": "Shard allocation is temporarily disabled in the cluster.",
             "solutions": [
-                "Re-enable allocation: PUT /_cluster/settings {\"persistent\":{\"cluster.routing.allocation.enable\":\"all\"}}",
+                "Re-enable allocation: PUT /_cluster/settings "
+                '{"persistent":{"cluster.routing.allocation.enable":"all"}}',
                 "Check if allocation was disabled for maintenance",
-                "Verify cluster health before re-enabling"
+                "Verify cluster health before re-enabling",
             ],
-            "prevention": "Check allocation status before performing moves"
-        }
+            "prevention": "Check allocation status before performing moves",
+        },
     ]
 
     # Find matching patterns
@@ -1031,7 +946,7 @@ def explain_error(ctx, error_message: Optional[str]):
             console.print()
 
             console.print("[green]💡 Solutions:[/green]")
-            for j, solution in enumerate(match['solutions'], 1):
+            for j, solution in enumerate(match["solutions"], 1):
                 console.print(f"  {j}. {solution}")
             console.print()
 
@@ -1041,25 +956,33 @@ def explain_error(ctx, error_message: Optional[str]):
         console.print()
         console.print("[bold]General Troubleshooting Steps:[/bold]")
         console.print("1. Check current shard distribution: [cyan]xmover analyze[/cyan]")
-        console.print("2. Validate the specific move: [cyan]xmover validate-move schema.table shard_id from_node to_node[/cyan]")
+        console.print(
+            "2. Validate the specific move: [cyan]xmover validate-move schema.table shard_id from_node to_node[/cyan]"
+        )
         console.print("3. Check zone conflicts: [cyan]xmover zone-analysis --show-shards[/cyan]")
         console.print("4. Verify node capacity: [cyan]xmover analyze[/cyan]")
         console.print("5. Review CrateDB documentation on shard allocation")
 
     console.print()
     console.print("[dim]💡 Tip: Use 'xmover validate-move' to check moves before execution[/dim]")
-    console.print("[dim]📚 For more help: https://crate.io/docs/crate/reference/en/latest/admin/system-information.html[/dim]")
+    console.print(
+        "[dim]📚 For more help: https://crate.io/docs/crate/reference/en/latest/admin/system-information.html[/dim]"
+    )
 
 
 @main.command()
-@click.option('--table', '-t', help='Monitor recovery for specific table only')
-@click.option('--node', '-n', help='Monitor recovery on specific node only')
-@click.option('--watch', '-w', is_flag=True, help='Continuously monitor (refresh every 10s)')
-@click.option('--refresh-interval', default=10, help='Refresh interval for watch mode (seconds)')
-@click.option('--recovery-type', type=click.Choice(['PEER', 'DISK', 'all']), default='all', help='Filter by recovery type')
-@click.option('--include-transitioning', is_flag=True, help='Include completed recoveries still in transitioning state')
+@click.option("--table", "-t", help="Monitor recovery for specific table only")
+@click.option("--node", "-n", help="Monitor recovery on specific node only")
+@click.option("--watch", "-w", is_flag=True, help="Continuously monitor (refresh every 10s)")
+@click.option("--refresh-interval", default=10, help="Refresh interval for watch mode (seconds)")
+@click.option(
+    "--recovery-type", type=click.Choice(["PEER", "DISK", "all"]), default="all", help="Filter by recovery type"
+)
+@click.option("--include-transitioning", is_flag=True, help="Include completed recoveries still in transitioning state")
 @click.pass_context
-def monitor_recovery(ctx, table: str, node: str, watch: bool, refresh_interval: int, recovery_type: str, include_transitioning: bool):
+def monitor_recovery(
+    ctx, table: str, node: str, watch: bool, refresh_interval: int, recovery_type: str, include_transitioning: bool
+):
     """Monitor active shard recovery operations on the cluster
 
     This command monitors ongoing shard recoveries by querying sys.allocations
@@ -1076,11 +999,10 @@ def monitor_recovery(ctx, table: str, node: str, watch: bool, refresh_interval:
         xmover monitor-recovery --recovery-type PEER  # Only PEER recoveries
     """
     try:
-        client = ctx.obj['client']
+        client = ctx.obj["client"]
         recovery_monitor = RecoveryMonitor(client)
 
         if watch:
-
             console.print(f"🔄 Monitoring shard recoveries (refreshing every {refresh_interval}s)")
             console.print("Press Ctrl+C to stop")
             console.print()
@@ -1101,11 +1023,12 @@ def monitor_recovery(ctx, table: str, node: str, watch: bool, refresh_interval:
                         table_name=table,
                         node_name=node,
                         recovery_type_filter=recovery_type,
-                        include_transitioning=include_transitioning
+                        include_transitioning=include_transitioning,
                     )
 
                     # Display current time
                     from datetime import datetime
+
                     current_time = datetime.now().strftime("%H:%M:%S")
 
                     # Check for any changes
@@ -1114,7 +1037,9 @@ def monitor_recovery(ctx, table: str, node: str, watch: bool, refresh_interval:
                     completed_count = 0
 
                     for recovery in recoveries:
-                        recovery_key = f"{recovery.schema_name}.{recovery.table_name}.{recovery.shard_id}.{recovery.node_name}"
+                        recovery_key = (
+                            f"{recovery.schema_name}.{recovery.table_name}.{recovery.shard_id}.{recovery.node_name}"
+                        )
 
                         # Create complete table name
                         if recovery.schema_name == "doc":
@@ -1131,8 +1056,8 @@ def monitor_recovery(ctx, table: str, node: str, watch: bool, refresh_interval:
                         # Check for changes since last update
                         if recovery_key in previous_recoveries:
                             prev = previous_recoveries[recovery_key]
-                            if prev['progress'] != recovery.overall_progress:
-                                diff = recovery.overall_progress - prev['progress']
+                            if prev["progress"] != recovery.overall_progress:
+                                diff = recovery.overall_progress - prev["progress"]
                                 # Create node route display
                                 node_route = ""
                                 if recovery.recovery_type == "PEER" and recovery.source_node_name:
@@ -1142,12 +1067,20 @@ def monitor_recovery(ctx, table: str, node: str, watch: bool, refresh_interval:
 
                                 # Add translog info
                                 translog_info = format_translog_info(recovery)
-                                
+
                                 if diff > 0:
-                                    changes.append(f"[green]📈[/green] {table_display} S{recovery.shard_id} {recovery.overall_progress:.1f}% (+{diff:.1f}%) {recovery.size_gb:.1f}GB{translog_info}{node_route}")
+                                    changes.append(
+                                        f"[green]📈[/green] {table_display} S{recovery.shard_id} "
+                                        f"{recovery.overall_progress:.1f}% (+{diff:.1f}%) "
+                                        f"{recovery.size_gb:.1f}GB{translog_info}{node_route}"
+                                    )
                                 else:
-                                    changes.append(f"[yellow]📉[/yellow] {table_display} S{recovery.shard_id} {recovery.overall_progress:.1f}% ({diff:.1f}%) {recovery.size_gb:.1f}GB{translog_info}{node_route}")
-                            elif prev['stage'] != recovery.stage:
+                                    changes.append(
+                                        f"[yellow]📉[/yellow] {table_display} S{recovery.shard_id} "
+                                        f"{recovery.overall_progress:.1f}% ({diff:.1f}%) "
+                                        f"{recovery.size_gb:.1f}GB{translog_info}{node_route}"
+                                    )
+                            elif prev["stage"] != recovery.stage:
                                 # Create node route display
                                 node_route = ""
                                 if recovery.recovery_type == "PEER" and recovery.source_node_name:
@@ -1157,11 +1090,19 @@ def monitor_recovery(ctx, table: str, node: str, watch: bool, refresh_interval:
 
                                 # Add translog info
                                 translog_info = format_translog_info(recovery)
-                                
-                                changes.append(f"[blue]🔄[/blue] {table_display} S{recovery.shard_id} {prev['stage']}→{recovery.stage} {recovery.size_gb:.1f}GB{translog_info}{node_route}")
+
+                                changes.append(
+                                    f"[blue]🔄[/blue] {table_display} S{recovery.shard_id} "
+                                    f"{prev['stage']}→{recovery.stage} "
+                                    f"{recovery.size_gb:.1f}GB{translog_info}{node_route}"
+                                )
                         else:
                             # New recovery - show based on include_transitioning flag or first run
-                            if first_run or include_transitioning or (recovery.overall_progress < 100.0 or recovery.stage != "DONE"):
+                            if (
+                                first_run
+                                or include_transitioning
+                                or (recovery.overall_progress < 100.0 or recovery.stage != "DONE")
+                            ):
                                 # Create node route display
                                 node_route = ""
                                 if recovery.recovery_type == "PEER" and recovery.source_node_name:
@@ -1172,13 +1113,17 @@ def monitor_recovery(ctx, table: str, node: str, watch: bool, refresh_interval:
                                 status_icon = "[cyan]🆕[/cyan]" if not first_run else "[blue]📋[/blue]"
                                 # Add translog info
                                 translog_info = format_translog_info(recovery)
-                                
-                                changes.append(f"{status_icon} {table_display} S{recovery.shard_id} {recovery.stage} {recovery.overall_progress:.1f}% {recovery.size_gb:.1f}GB{translog_info}{node_route}")
+
+                                changes.append(
+                                    f"{status_icon} {table_display} S{recovery.shard_id} "
+                                    f"{recovery.stage} {recovery.overall_progress:.1f}% "
+                                    f"{recovery.size_gb:.1f}GB{translog_info}{node_route}"
+                                )
 
                         # Store current state for next comparison
                         previous_recoveries[recovery_key] = {
-                            'progress': recovery.overall_progress,
-                            'stage': recovery.stage
+                            "progress": recovery.overall_progress,
+                            "stage": recovery.stage,
                         }
 
                     # Always show a status line
@@ -1205,7 +1150,7 @@ def monitor_recovery(ctx, table: str, node: str, watch: bool, refresh_interval:
                             elif active_count > 0:
                                 console.print(f"{current_time} | {status} (no changes)")
 
-                    previous_timestamp = current_time
+                    previous_timestamp = current_time  # noqa: F841
                     first_run = False
                     time.sleep(refresh_interval)
 
@@ -1217,7 +1162,7 @@ def monitor_recovery(ctx, table: str, node: str, watch: bool, refresh_interval:
                     table_name=table,
                     node_name=node,
                     recovery_type_filter=recovery_type,
-                    include_transitioning=include_transitioning
+                    include_transitioning=include_transitioning,
                 )
 
                 if final_recoveries:
@@ -1233,10 +1178,13 @@ def monitor_recovery(ctx, table: str, node: str, watch: bool, refresh_interval:
                     console.print(f"   Total size: {summary['total_size_gb']:.1f} GB")
                     console.print(f"   Average progress: {summary['avg_progress']:.1f}%")
 
-                    if summary['by_type']:
-                        console.print(f"   By recovery type:")
-                        for rec_type, stats in summary['by_type'].items():
-                            console.print(f"     {rec_type}: {stats['count']} recoveries, {stats['avg_progress']:.1f}% avg progress")
+                    if summary["by_type"]:
+                        console.print("   By recovery type:")
+                        for rec_type, stats in summary["by_type"].items():
+                            console.print(
+                                f"     {rec_type}: {stats['count']} recoveries, "
+                                f"{stats['avg_progress']:.1f}% avg progress"
+                            )
                 else:
                     console.print("\n[green]✅ No active recoveries at exit[/green]")
 
@@ -1248,7 +1196,7 @@ def monitor_recovery(ctx, table: str, node: str, watch: bool, refresh_interval:
                 table_name=table,
                 node_name=node,
                 recovery_type_filter=recovery_type,
-                include_transitioning=include_transitioning
+                include_transitioning=include_transitioning,
             )
 
             display_output = recovery_monitor.format_recovery_display(recoveries)
@@ -1259,62 +1207,65 @@ def monitor_recovery(ctx, table: str, node: str, watch: bool, refresh_interval:
                     console.print("\n[green]✅ No recoveries found (active or transitioning)[/green]")
                 else:
                     console.print("\n[green]✅ No active recoveries found[/green]")
-                    console.print("[dim]💡 Use --include-transitioning to see completed recoveries still transitioning[/dim]")
+                    console.print(
+                        "[dim]💡 Use --include-transitioning to see completed recoveries still transitioning[/dim]"
+                    )
             else:
                 # Show summary
                 summary = recovery_monitor.get_recovery_summary(recoveries)
-                console.print(f"\n📊 [bold]Recovery Summary:[/bold]")
+                console.print("\n📊 [bold]Recovery Summary:[/bold]")
                 console.print(f"   Total recoveries: {summary['total_recoveries']}")
                 console.print(f"   Total size: {summary['total_size_gb']:.1f} GB")
                 console.print(f"   Average progress: {summary['avg_progress']:.1f}%")
 
                 # Show breakdown by type
-                if summary['by_type']:
-                    console.print(f"\n   By recovery type:")
-                    for rec_type, stats in summary['by_type'].items():
-                        console.print(f"     {rec_type}: {stats['count']} recoveries, {stats['avg_progress']:.1f}% avg progress")
+                if summary["by_type"]:
+                    console.print("\n   By recovery type:")
+                    for rec_type, stats in summary["by_type"].items():
+                        console.print(
+                            f"     {rec_type}: {stats['count']} recoveries, {stats['avg_progress']:.1f}% avg progress"
+                        )
 
-                console.print(f"\n[dim]💡 Use --watch flag for continuous monitoring[/dim]")
+                console.print("\n[dim]💡 Use --watch flag for continuous monitoring[/dim]")
 
     except Exception as e:
         console.print(f"[red]❌ Error monitoring recoveries: {e}[/red]")
-        if ctx.obj.get('debug'):
+        if ctx.obj.get("debug"):
             raise
 
 
 def _wait_for_recovery_capacity(client, max_concurrent_recoveries: int = 5):
     """Wait until active recovery count is below threshold"""
-    from xmover.analyzer import RecoveryMonitor
     from time import sleep
-    
+
+    from .analyzer import RecoveryMonitor
+
     recovery_monitor = RecoveryMonitor(client)
     wait_time = 0
-    
+
     while True:
         # Check active recoveries (including transitioning)
         recoveries = recovery_monitor.get_cluster_recovery_status(include_transitioning=True)
         active_count = len([r for r in recoveries if r.overall_progress < 100.0 or r.stage != "DONE"])
-        
+        status = f"{active_count}/{max_concurrent_recoveries}"
         if active_count < max_concurrent_recoveries:
             if wait_time > 0:
-                console.print(f"    [green]✓ Recovery capacity available ({active_count}/{max_concurrent_recoveries} active)[/green]")
+                console.print(f"    [green]✓ Recovery capacity available ({status} active)[/green]")
             break
-        else:
-            if wait_time == 0:
-                console.print(f"    [yellow]⏳ Waiting for recovery capacity... ({active_count}/{max_concurrent_recoveries} active)[/yellow]")
-            elif wait_time % 30 == 0:  # Update every 30 seconds
-                console.print(f"    [yellow]⏳ Still waiting... ({active_count}/{max_concurrent_recoveries} active)[/yellow]")
-            
-            sleep(10)  # Check every 10 seconds
-            wait_time += 10
+        if wait_time == 0:
+            console.print(f"    [yellow]⏳ Waiting for recovery capacity... ({status} active)[/yellow]")
+        elif wait_time % 30 == 0:  # Update every 30 seconds
+            console.print(f"    [yellow]⏳ Still waiting... ({status} active)[/yellow]")
+
+        sleep(10)  # Check every 10 seconds
+        wait_time += 10
 
 
 def _execute_recommendations_safely(client, recommendations, validate: bool):
     """Execute recommendations with extensive safety measures"""
-    from time import sleep
-    import sys
-    from xmover.analyzer import ShardAnalyzer
-    
+
+    from .analyzer import ShardAnalyzer
+
     # Filter to only safe recommendations
     safe_recommendations = []
     if validate:
@@ -1325,20 +1276,20 @@ def _execute_recommendations_safely(client, recommendations, validate: bool):
                 safe_recommendations.append(rec)
     else:
         safe_recommendations = recommendations
-    
+
     if not safe_recommendations:
         console.print("[yellow]⚠ No safe recommendations to execute[/yellow]")
         return
-    
-    console.print(f"\n[bold red]🚨 AUTO-EXECUTION MODE 🚨[/bold red]")
+
+    console.print("\n[bold red]🚨 AUTO-EXECUTION MODE 🚨[/bold red]")
     console.print(f"About to execute {len(safe_recommendations)} shard moves automatically:")
     console.print()
-    
+
     # Show what will be executed
     for i, rec in enumerate(safe_recommendations, 1):
         table_display = f"{rec.schema_name}.{rec.table_name}" if rec.schema_name != "doc" else rec.table_name
         console.print(f"  {i}. {table_display} S{rec.shard_id} ({rec.size_gb:.1f}GB) {rec.from_node} → {rec.to_node}")
-    
+
     console.print()
     console.print("[bold yellow]⚠ SAFETY WARNINGS:[/bold yellow]")
     console.print("  • These commands will immediately start shard movements")
@@ -1346,74 +1297,80 @@ def _execute_recommendations_safely(client, recommendations, validate: bool):
     console.print("  • Recovery time depends on shard size and network speed")
     console.print("  • You should monitor progress with: xmover monitor-recovery --watch")
     console.print()
-    
+
     # Double confirmation
     try:
         response1 = input("Type 'EXECUTE' to proceed with automatic execution: ").strip()
         if response1 != "EXECUTE":
             console.print("[yellow]❌ Execution cancelled[/yellow]")
             return
-        
+
         response2 = input(f"Confirm: Execute {len(safe_recommendations)} shard moves? (yes/no): ").strip().lower()
-        if response2 not in ['yes', 'y']:
+        if response2 not in ["yes", "y"]:
             console.print("[yellow]❌ Execution cancelled[/yellow]")
             return
-            
+
     except KeyboardInterrupt:
         console.print("\n[yellow]❌ Execution cancelled by user[/yellow]")
         return
-    
+
     console.print(f"\n🚀 [bold green]Executing {len(safe_recommendations)} shard moves...[/bold green]")
     console.print()
-    
+
     successful_moves = 0
     failed_moves = 0
-    
+
     for i, rec in enumerate(safe_recommendations, 1):
         table_display = f"{rec.schema_name}.{rec.table_name}" if rec.schema_name != "doc" else rec.table_name
         sql_command = rec.to_sql()
-        
-        console.print(f"[{i}/{len(safe_recommendations)}] Executing: {table_display} S{rec.shard_id} ({rec.size_gb:.1f}GB)")
+
+        console.print(
+            f"[{i}/{len(safe_recommendations)}] Executing: {table_display} S{rec.shard_id} ({rec.size_gb:.1f}GB)"
+        )
         console.print(f"    {rec.from_node} → {rec.to_node}")
-        
+
         try:
             # Execute the SQL command
             result = client.execute_query(sql_command)
-            
-            if result.get('rowcount', 0) >= 0:  # Success indicator for ALTER statements
-                console.print(f"    [green]✅ SUCCESS[/green] - Move initiated")
+
+            if result.get("rowcount", 0) >= 0:  # Success indicator for ALTER statements
+                console.print("    [green]✅ SUCCESS[/green] - Move initiated")
                 successful_moves += 1
-                
+
                 # Smart delay: check active recoveries before next move
                 if i < len(safe_recommendations):
                     _wait_for_recovery_capacity(client, max_concurrent_recoveries=5)
             else:
                 console.print(f"    [red]❌ FAILED[/red] - Unexpected result: {result}")
                 failed_moves += 1
-                
+
         except Exception as e:
             console.print(f"    [red]❌ FAILED[/red] - Error: {e}")
             failed_moves += 1
-            
+
             # Ask whether to continue after a failure
             if i < len(safe_recommendations):
                 try:
-                    continue_response = input(f"    Continue with remaining {len(safe_recommendations) - i} moves? (yes/no): ").strip().lower()
-                    if continue_response not in ['yes', 'y']:
+                    continue_response = (
+                        input(f"    Continue with remaining {len(safe_recommendations) - i} moves? (yes/no): ")
+                        .strip()
+                        .lower()
+                    )
+                    if continue_response not in ["yes", "y"]:
                         console.print("[yellow]⏹ Execution stopped by user[/yellow]")
                         break
                 except KeyboardInterrupt:
                     console.print("\n[yellow]⏹ Execution stopped by user[/yellow]")
                     break
-        
+
         console.print()
-    
+
     # Final summary
-    console.print(f"📊 [bold]Execution Summary:[/bold]")
+    console.print("📊 [bold]Execution Summary:[/bold]")
     console.print(f"   Successful moves: [green]{successful_moves}[/green]")
     console.print(f"   Failed moves: [red]{failed_moves}[/red]")
     console.print(f"   Total attempted: {successful_moves + failed_moves}")
-    
+
     if successful_moves > 0:
         console.print()
         console.print("[green]✅ Shard moves initiated successfully![/green]")
@@ -1421,11 +1378,11 @@ def _execute_recommendations_safely(client, recommendations, validate: bool):
         console.print("[dim]   xmover monitor-recovery --watch[/dim]")
         console.print("[dim]💡 Check cluster status with:[/dim]")
         console.print("[dim]   xmover analyze[/dim]")
-    
+
     if failed_moves > 0:
         console.print()
         console.print(f"[yellow]⚠ {failed_moves} moves failed - check cluster status and retry if needed[/yellow]")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/cratedb_toolkit/admin/xmover/database.py b/cratedb_toolkit/admin/xmover/database.py
index ec3a0098..a6e2d35f 100644
--- a/cratedb_toolkit/admin/xmover/database.py
+++ b/cratedb_toolkit/admin/xmover/database.py
@@ -2,17 +2,21 @@
 Database connection and query functions for CrateDB
 """
 
+import logging
 import os
-import json
-import requests
-from typing import Dict, List, Optional, Any
 from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+
+import requests
 from dotenv import load_dotenv
 
+logger = logging.getLogger(__name__)
+
 
 @dataclass
 class NodeInfo:
     """Information about a CrateDB node"""
+
     id: str
     name: str
     zone: str
@@ -21,15 +25,15 @@ class NodeInfo:
     fs_total: int
     fs_used: int
     fs_available: int
-    
+
     @property
     def heap_usage_percent(self) -> float:
         return (self.heap_used / self.heap_max) * 100 if self.heap_max > 0 else 0
-    
+
     @property
     def disk_usage_percent(self) -> float:
         return (self.fs_used / self.fs_total) * 100 if self.fs_total > 0 else 0
-    
+
     @property
     def available_space_gb(self) -> float:
         return self.fs_available / (1024**3)
@@ -38,6 +42,7 @@ def available_space_gb(self) -> float:
 @dataclass
 class ShardInfo:
     """Information about a shard"""
+
     table_name: str
     schema_name: str
     shard_id: int
@@ -50,7 +55,7 @@ class ShardInfo:
     num_docs: int
     state: str
     routing_state: str
-    
+
     @property
     def shard_type(self) -> str:
         return "PRIMARY" if self.is_primary else "REPLICA"
@@ -59,6 +64,7 @@ def shard_type(self) -> str:
 @dataclass
 class RecoveryInfo:
     """Information about an active shard recovery"""
+
     schema_name: str
     table_name: str
     shard_id: int
@@ -75,31 +81,31 @@ class RecoveryInfo:
     size_bytes: int
     source_node_name: Optional[str] = None  # Source node for PEER recoveries
     translog_size_bytes: int = 0  # Translog size in bytes
-    
+
     @property
     def overall_progress(self) -> float:
         """Calculate overall progress percentage"""
         return max(self.files_percent, self.bytes_percent)
-    
+
     @property
     def size_gb(self) -> float:
         """Size in GB"""
         return self.size_bytes / (1024**3)
-    
+
     @property
     def shard_type(self) -> str:
         return "PRIMARY" if self.is_primary else "REPLICA"
-    
+
     @property
     def total_time_seconds(self) -> float:
         """Total time in seconds"""
         return self.total_time_ms / 1000.0
-    
+
     @property
     def translog_size_gb(self) -> float:
         """Translog size in GB"""
         return self.translog_size_bytes / (1024**3)
-    
+
     @property
     def translog_percentage(self) -> float:
         """Translog size as percentage of shard size"""
@@ -108,52 +114,46 @@ def translog_percentage(self) -> float:
 
 class CrateDBClient:
     """Client for connecting to CrateDB and executing queries"""
-    
+
     def __init__(self, connection_string: Optional[str] = None):
         load_dotenv()
-        
-        self.connection_string = connection_string or os.getenv('CRATE_CONNECTION_STRING')
+
+        self.connection_string = connection_string or os.getenv("CRATE_CONNECTION_STRING")
         if not self.connection_string:
             raise ValueError("CRATE_CONNECTION_STRING not found in environment or provided")
-        
-        self.username = os.getenv('CRATE_USERNAME')
-        self.password = os.getenv('CRATE_PASSWORD')
-        self.ssl_verify = os.getenv('CRATE_SSL_VERIFY', 'true').lower() == 'true'
-        
+
+        self.username = os.getenv("CRATE_USERNAME")
+        self.password = os.getenv("CRATE_PASSWORD")
+        self.ssl_verify = os.getenv("CRATE_SSL_VERIFY", "true").lower() == "true"
+
         # Ensure connection string ends with _sql endpoint
-        if not self.connection_string.endswith('/_sql'):
-            self.connection_string = self.connection_string.rstrip('/') + '/_sql'
-    
+        if not self.connection_string.endswith("/_sql"):
+            self.connection_string = self.connection_string.rstrip("/") + "/_sql"
+
     def execute_query(self, query: str, parameters: Optional[List] = None) -> Dict[str, Any]:
         """Execute a SQL query against CrateDB"""
-        payload = {
-            'stmt': query
-        }
-        
+        payload = {"stmt": query}
+
         if parameters:
-            payload['args'] = parameters
-        
+            payload["args"] = parameters
+
         auth = None
         if self.username and self.password:
             auth = (self.username, self.password)
-        
+
         try:
             response = requests.post(
-                self.connection_string,
-                json=payload,
-                auth=auth,
-                verify=self.ssl_verify,
-                timeout=30
+                self.connection_string, json=payload, auth=auth, verify=self.ssl_verify, timeout=30
             )
             response.raise_for_status()
             return response.json()
         except requests.exceptions.RequestException as e:
-            raise Exception(f"Failed to execute query: {e}")
-    
+            raise Exception(f"Failed to execute query: {e}") from e
+
     def get_nodes_info(self) -> List[NodeInfo]:
         """Get information about all nodes in the cluster"""
         query = """
-        SELECT 
+        SELECT
             id,
             name,
             attributes['zone'] as zone,
@@ -166,30 +166,35 @@ def get_nodes_info(self) -> List[NodeInfo]:
         WHERE name IS NOT NULL
         ORDER BY name
         """
-        
+
         result = self.execute_query(query)
         nodes = []
-        
-        for row in result.get('rows', []):
-            nodes.append(NodeInfo(
-                id=row[0],
-                name=row[1],
-                zone=row[2] or 'unknown',
-                heap_used=row[3] or 0,
-                heap_max=row[4] or 0,
-                fs_total=row[5] or 0,
-                fs_used=row[6] or 0,
-                fs_available=row[7] or 0
-            ))
-        
+
+        for row in result.get("rows", []):
+            nodes.append(
+                NodeInfo(
+                    id=row[0],
+                    name=row[1],
+                    zone=row[2] or "unknown",
+                    heap_used=row[3] or 0,
+                    heap_max=row[4] or 0,
+                    fs_total=row[5] or 0,
+                    fs_used=row[6] or 0,
+                    fs_available=row[7] or 0,
+                )
+            )
+
         return nodes
-    
-    def get_shards_info(self, table_name: Optional[str] = None, 
-                       min_size_gb: Optional[float] = None,
-                       max_size_gb: Optional[float] = None,
-                       for_analysis: bool = False) -> List[ShardInfo]:
+
+    def get_shards_info(
+        self,
+        table_name: Optional[str] = None,
+        min_size_gb: Optional[float] = None,
+        max_size_gb: Optional[float] = None,
+        for_analysis: bool = False,
+    ) -> List[ShardInfo]:
         """Get information about shards, optionally filtered by table and size
-        
+
         Args:
             table_name: Filter by specific table
             min_size_gb: Minimum shard size in GB
@@ -197,34 +202,31 @@ def get_shards_info(self, table_name: Optional[str] = None,
             for_analysis: If True, includes all shards regardless of state (for cluster analysis)
                          If False, only includes healthy shards suitable for operations
         """
-        
+
         where_conditions = []
         if not for_analysis:
             # For operations, only include healthy shards
-            where_conditions.extend([
-                "s.routing_state = 'STARTED'",
-                "s.recovery['files']['percent'] = 100.0"
-            ])
+            where_conditions.extend(["s.routing_state = 'STARTED'", "s.recovery['files']['percent'] = 100.0"])
         parameters = []
-        
+
         if table_name:
             where_conditions.append("s.table_name = ?")
             parameters.append(table_name)
-        
+
         if min_size_gb is not None:
             where_conditions.append("s.size >= ?")
             parameters.append(int(min_size_gb * 1024**3))  # Convert GB to bytes
-        
+
         if max_size_gb is not None:
             where_conditions.append("s.size <= ?")
             parameters.append(int(max_size_gb * 1024**3))  # Convert GB to bytes
-        
+
         where_clause = ""
         if where_conditions:
             where_clause = f"WHERE {' AND '.join(where_conditions)}"
-        
+
         query = f"""
-        SELECT 
+        SELECT
             s.table_name,
             s.schema_name,
             s.id as shard_id,
@@ -241,32 +243,34 @@ def get_shards_info(self, table_name: Optional[str] = None,
         JOIN sys.nodes n ON s.node['id'] = n.id
         {where_clause}
         ORDER BY s.table_name, s.schema_name, s.id, s."primary" DESC
-        """
-        
+        """  # noqa: S608
+
         result = self.execute_query(query, parameters)
         shards = []
-        
-        for row in result.get('rows', []):
-            shards.append(ShardInfo(
-                table_name=row[0],
-                schema_name=row[1],
-                shard_id=row[2],
-                node_id=row[3],
-                node_name=row[4],
-                zone=row[5] or 'unknown',
-                is_primary=row[6],
-                size_bytes=row[7] or 0,
-                size_gb=float(row[8] or 0),
-                num_docs=row[9] or 0,
-                state=row[10],
-                routing_state=row[11]
-            ))
-        
+
+        for row in result.get("rows", []):
+            shards.append(
+                ShardInfo(
+                    table_name=row[0],
+                    schema_name=row[1],
+                    shard_id=row[2],
+                    node_id=row[3],
+                    node_name=row[4],
+                    zone=row[5] or "unknown",
+                    is_primary=row[6],
+                    size_bytes=row[7] or 0,
+                    size_gb=float(row[8] or 0),
+                    num_docs=row[9] or 0,
+                    state=row[10],
+                    routing_state=row[11],
+                )
+            )
+
         return shards
-    
+
     def get_shard_distribution_summary(self, for_analysis: bool = True) -> Dict[str, Any]:
         """Get a summary of shard distribution across nodes and zones
-        
+
         Args:
             for_analysis: If True, includes all shards for complete cluster analysis
                          If False, only includes operational shards
@@ -276,9 +280,9 @@ def get_shard_distribution_summary(self, for_analysis: bool = True) -> Dict[str,
             where_clause = """
         WHERE s.routing_state = 'STARTED'
             AND s.recovery['files']['percent'] = 100.0"""
-        
+
         query = f"""
-        SELECT 
+        SELECT
             n.attributes['zone'] as zone,
             s.node['name'] as node_name,
             CASE WHEN s."primary" = true THEN 'PRIMARY' ELSE 'REPLICA' END as shard_type,
@@ -289,98 +293,90 @@ def get_shard_distribution_summary(self, for_analysis: bool = True) -> Dict[str,
         JOIN sys.nodes n ON s.node['id'] = n.id{where_clause}
         GROUP BY n.attributes['zone'], s.node['name'], s."primary"
         ORDER BY zone, node_name, shard_type DESC
-        """
-        
+        """  # noqa: S608
+
         result = self.execute_query(query)
-        
-        summary = {
-            'by_zone': {},
-            'by_node': {},
-            'totals': {'primary': 0, 'replica': 0, 'total_size_gb': 0}
-        }
-        
-        for row in result.get('rows', []):
-            zone = row[0] or 'unknown'
+
+        summary = {"by_zone": {}, "by_node": {}, "totals": {"primary": 0, "replica": 0, "total_size_gb": 0}}
+
+        for row in result.get("rows", []):
+            zone = row[0] or "unknown"
             node_name = row[1]
             shard_type = row[2]
             shard_count = row[3]
             total_size_gb = float(row[4] or 0)
-            avg_size_gb = float(row[5] or 0)
-            
+            avg_size_gb = float(row[5] or 0)  # noqa: F841
+
             # By zone summary
-            if zone not in summary['by_zone']:
-                summary['by_zone'][zone] = {'PRIMARY': 0, 'REPLICA': 0, 'total_size_gb': 0}
-            summary['by_zone'][zone][shard_type] += shard_count
-            summary['by_zone'][zone]['total_size_gb'] += total_size_gb
-            
+            if zone not in summary["by_zone"]:
+                summary["by_zone"][zone] = {"PRIMARY": 0, "REPLICA": 0, "total_size_gb": 0}
+            summary["by_zone"][zone][shard_type] += shard_count
+            summary["by_zone"][zone]["total_size_gb"] += total_size_gb
+
             # By node summary
-            if node_name not in summary['by_node']:
-                summary['by_node'][node_name] = {
-                    'zone': zone,
-                    'PRIMARY': 0,
-                    'REPLICA': 0,
-                    'total_size_gb': 0
-                }
-            summary['by_node'][node_name][shard_type] += shard_count
-            summary['by_node'][node_name]['total_size_gb'] += total_size_gb
-            
+            if node_name not in summary["by_node"]:
+                summary["by_node"][node_name] = {"zone": zone, "PRIMARY": 0, "REPLICA": 0, "total_size_gb": 0}
+            summary["by_node"][node_name][shard_type] += shard_count
+            summary["by_node"][node_name]["total_size_gb"] += total_size_gb
+
             # Overall totals
-            if shard_type == 'PRIMARY':
-                summary['totals']['primary'] += shard_count
+            if shard_type == "PRIMARY":
+                summary["totals"]["primary"] += shard_count
             else:
-                summary['totals']['replica'] += shard_count
-            summary['totals']['total_size_gb'] += total_size_gb
-        
+                summary["totals"]["replica"] += shard_count
+            summary["totals"]["total_size_gb"] += total_size_gb
+
         return summary
-    
+
     def test_connection(self) -> bool:
         """Test the connection to CrateDB"""
         try:
             result = self.execute_query("SELECT 1")
-            return result.get('rowcount', 0) >= 0
+            return result.get("rowcount", 0) >= 0
         except Exception:
             return False
-    
+
     def get_cluster_watermarks(self) -> Dict[str, Any]:
         """Get cluster disk watermark settings"""
         query = """
         SELECT settings['cluster']['routing']['allocation']['disk']['watermark']
         FROM sys.cluster
         """
-        
+
         try:
             result = self.execute_query(query)
-            if result.get('rows'):
-                watermarks = result['rows'][0][0] or {}
+            if result.get("rows"):
+                watermarks = result["rows"][0][0] or {}
                 return {
-                    'low': watermarks.get('low', 'Not set'),
-                    'high': watermarks.get('high', 'Not set'),
-                    'flood_stage': watermarks.get('flood_stage', 'Not set'),
-                    'enable_for_single_data_node': watermarks.get('enable_for_single_data_node', 'Not set')
+                    "low": watermarks.get("low", "Not set"),
+                    "high": watermarks.get("high", "Not set"),
+                    "flood_stage": watermarks.get("flood_stage", "Not set"),
+                    "enable_for_single_data_node": watermarks.get("enable_for_single_data_node", "Not set"),
                 }
             return {}
         except Exception:
             return {}
-    
-    def get_active_recoveries(self, table_name: Optional[str] = None, 
-                            node_name: Optional[str] = None) -> List[Dict[str, Any]]:
+
+    def get_active_recoveries(
+        self, table_name: Optional[str] = None, node_name: Optional[str] = None
+    ) -> List[Dict[str, Any]]:
         """Get shards that are currently in recovery states from sys.allocations"""
-        
+
         where_conditions = ["current_state != 'STARTED'"]
         parameters = []
-        
+
         if table_name:
             where_conditions.append("table_name = ?")
             parameters.append(table_name)
-        
+
         if node_name:
             where_conditions.append("node_id = (SELECT id FROM sys.nodes WHERE name = ?)")
             parameters.append(node_name)
-        
+
         where_clause = f"WHERE {' AND '.join(where_conditions)}"
-        
+
         query = f"""
-        SELECT 
+        SELECT
             table_name,
             shard_id,
             current_state,
@@ -389,29 +385,31 @@ def get_active_recoveries(self, table_name: Optional[str] = None,
         FROM sys.allocations
         {where_clause}
         ORDER BY current_state, table_name, shard_id
-        """
-        
+        """  # noqa: S608
+
         result = self.execute_query(query, parameters)
-        
+
         allocations = []
-        for row in result.get('rows', []):
-            allocations.append({
-                'schema_name': 'doc',  # Default schema since not available in sys.allocations
-                'table_name': row[0], 
-                'shard_id': row[1],
-                'current_state': row[2],
-                'explanation': row[3],
-                'node_id': row[4]
-            })
-        
+        for row in result.get("rows", []):
+            allocations.append(
+                {
+                    "schema_name": "doc",  # Default schema since not available in sys.allocations
+                    "table_name": row[0],
+                    "shard_id": row[1],
+                    "current_state": row[2],
+                    "explanation": row[3],
+                    "node_id": row[4],
+                }
+            )
+
         return allocations
-    
+
     def get_recovery_details(self, schema_name: str, table_name: str, shard_id: int) -> Optional[Dict[str, Any]]:
         """Get detailed recovery information for a specific shard from sys.shards"""
-        
+
         # Query for shards that are actively recovering (not completed)
         query = """
-        SELECT 
+        SELECT
             s.table_name,
             s.schema_name,
             s.id as shard_id,
@@ -429,117 +427,118 @@ def get_recovery_details(self, schema_name: str, table_name: str, shard_id: int)
         ORDER BY s.schema_name
         LIMIT 1
         """
-        
+
         result = self.execute_query(query, [table_name, shard_id])
-        
-        if not result.get('rows'):
+
+        if not result.get("rows"):
             return None
-            
-        row = result['rows'][0]
+
+        row = result["rows"][0]
         return {
-            'table_name': row[0],
-            'schema_name': row[1],
-            'shard_id': row[2],
-            'node_name': row[3],
-            'node_id': row[4],
-            'routing_state': row[5],
-            'state': row[6],
-            'recovery': row[7],
-            'size': row[8],
-            'primary': row[9],
-            'translog_size': row[10] or 0
+            "table_name": row[0],
+            "schema_name": row[1],
+            "shard_id": row[2],
+            "node_name": row[3],
+            "node_id": row[4],
+            "routing_state": row[5],
+            "state": row[6],
+            "recovery": row[7],
+            "size": row[8],
+            "primary": row[9],
+            "translog_size": row[10] or 0,
         }
-    
-    def get_all_recovering_shards(self, table_name: Optional[str] = None, 
-                                node_name: Optional[str] = None,
-                                include_transitioning: bool = False) -> List[RecoveryInfo]:
+
+    def get_all_recovering_shards(
+        self, table_name: Optional[str] = None, node_name: Optional[str] = None, include_transitioning: bool = False
+    ) -> List[RecoveryInfo]:
         """Get comprehensive recovery information by combining sys.allocations and sys.shards data"""
-        
+
         # Step 1: Get active recoveries from allocations (efficient)
         active_allocations = self.get_active_recoveries(table_name, node_name)
-        
+
         if not active_allocations:
             return []
-        
+
         recoveries = []
-        
+
         # Step 2: Get detailed recovery info for each active recovery
         for allocation in active_allocations:
             recovery_detail = self.get_recovery_details(
-                allocation['schema_name'],  # This will be 'doc' default
-                allocation['table_name'], 
-                allocation['shard_id']
+                allocation["schema_name"],  # This will be 'doc' default
+                allocation["table_name"],
+                allocation["shard_id"],
             )
-            
-            if recovery_detail and recovery_detail.get('recovery'):
+
+            if recovery_detail and recovery_detail.get("recovery"):
                 # Update allocation with actual schema from sys.shards
-                allocation['schema_name'] = recovery_detail['schema_name']
+                allocation["schema_name"] = recovery_detail["schema_name"]
                 recovery_info = self._parse_recovery_info(allocation, recovery_detail)
-                
+
                 # Filter out completed recoveries unless include_transitioning is True
                 if include_transitioning or not self._is_recovery_completed(recovery_info):
                     recoveries.append(recovery_info)
-        
+
         # Sort by recovery type, then by progress
         return sorted(recoveries, key=lambda r: (r.recovery_type, -r.overall_progress))
-    
-    def _parse_recovery_info(self, allocation: Dict[str, Any], 
-                           shard_detail: Dict[str, Any]) -> RecoveryInfo:
+
+    def _parse_recovery_info(self, allocation: Dict[str, Any], shard_detail: Dict[str, Any]) -> RecoveryInfo:
         """Parse recovery information from allocation and shard data"""
-        
-        recovery = shard_detail.get('recovery', {})
-        
+
+        recovery = shard_detail.get("recovery", {})
+
         # Extract recovery progress information
-        files_info = recovery.get('files', {})
-        size_info = recovery.get('size', {})
-        
-        files_percent = float(files_info.get('percent', 0.0))
-        bytes_percent = float(size_info.get('percent', 0.0))
-        
+        files_info = recovery.get("files", {})
+        size_info = recovery.get("size", {})
+
+        files_percent = float(files_info.get("percent", 0.0))
+        bytes_percent = float(size_info.get("percent", 0.0))
+
         # Calculate actual progress based on recovered vs used
-        files_recovered = files_info.get('recovered', 0)
-        files_used = files_info.get('used', 1)  # Avoid division by zero
-        size_recovered = size_info.get('recovered', 0)
-        size_used = size_info.get('used', 1)  # Avoid division by zero
-        
+        files_recovered = files_info.get("recovered", 0)
+        files_used = files_info.get("used", 1)  # Avoid division by zero
+        size_recovered = size_info.get("recovered", 0)
+        size_used = size_info.get("used", 1)  # Avoid division by zero
+
         # Use actual progress if different from reported percent
         actual_files_percent = (files_recovered / files_used * 100.0) if files_used > 0 else files_percent
         actual_size_percent = (size_recovered / size_used * 100.0) if size_used > 0 else bytes_percent
-        
+
         # Use the more conservative (lower) progress value
         final_files_percent = min(files_percent, actual_files_percent)
         final_bytes_percent = min(bytes_percent, actual_size_percent)
-        
+
         # Get source node for PEER recoveries
         source_node = None
-        if recovery.get('type') == 'PEER':
+        if recovery.get("type") == "PEER":
             source_node = self._find_source_node_for_recovery(
-                shard_detail['schema_name'],
-                shard_detail['table_name'], 
-                shard_detail['shard_id'],
-                shard_detail['node_id']
+                shard_detail["schema_name"],
+                shard_detail["table_name"],
+                shard_detail["shard_id"],
+                shard_detail["node_id"],
             )
 
         return RecoveryInfo(
-            schema_name=shard_detail['schema_name'],
-            table_name=shard_detail['table_name'],
-            shard_id=shard_detail['shard_id'],
-            node_name=shard_detail['node_name'],
-            node_id=shard_detail['node_id'],
-            recovery_type=recovery.get('type', 'UNKNOWN'),
-            stage=recovery.get('stage', 'UNKNOWN'),
+            schema_name=shard_detail["schema_name"],
+            table_name=shard_detail["table_name"],
+            shard_id=shard_detail["shard_id"],
+            node_name=shard_detail["node_name"],
+            node_id=shard_detail["node_id"],
+            recovery_type=recovery.get("type", "UNKNOWN"),
+            stage=recovery.get("stage", "UNKNOWN"),
             files_percent=final_files_percent,
             bytes_percent=final_bytes_percent,
-            total_time_ms=recovery.get('total_time', 0),
-            routing_state=shard_detail['routing_state'],
-            current_state=allocation['current_state'],
-            is_primary=shard_detail['primary'],
-            size_bytes=shard_detail.get('size', 0),
+            total_time_ms=recovery.get("total_time", 0),
+            routing_state=shard_detail["routing_state"],
+            current_state=allocation["current_state"],
+            is_primary=shard_detail["primary"],
+            size_bytes=shard_detail.get("size", 0),
             source_node_name=source_node,
-            translog_size_bytes=shard_detail.get('translog_size', 0)
+            translog_size_bytes=shard_detail.get("translog_size", 0),
         )
-    
-    def _find_source_node_for_recovery(self, schema_name: str, table_name: str, shard_id: int, target_node_id: str) -> Optional[str]:
+
+    def _find_source_node_for_recovery(
+        self, schema_name: str, table_name: str, shard_id: int, target_node_id: str
+    ) -> Optional[str]:
         """Find source node for PEER recovery by looking for primary or other replicas"""
         try:
             # First try to find the primary shard of the same table/shard
@@ -551,12 +550,12 @@ def _find_source_node_for_recovery(self, schema_name: str, table_name: str, shar
             AND "primary" = true
             LIMIT 1
             """
-            
+
             result = self.execute_query(query, [schema_name, table_name, shard_id, target_node_id])
-            
-            if result.get('rows'):
-                return result['rows'][0][0]
-            
+
+            if result.get("rows"):
+                return result["rows"][0][0]
+
             # If no primary found, look for any started replica
             query_replica = """
             SELECT node['name'] as node_name
@@ -565,20 +564,22 @@ def _find_source_node_for_recovery(self, schema_name: str, table_name: str, shar
             AND state = 'STARTED' AND node['id'] != ?
             LIMIT 1
             """
-            
+
             result = self.execute_query(query_replica, [schema_name, table_name, shard_id, target_node_id])
-            
-            if result.get('rows'):
-                return result['rows'][0][0]
-                
+
+            if result.get("rows"):
+                return result["rows"][0][0]
+
         except Exception:
             # If query fails, just return None
-            pass
-            
+            logger.warning("Failed to find source node for recovery", exc_info=True)
+
         return None
 
     def _is_recovery_completed(self, recovery_info: RecoveryInfo) -> bool:
         """Check if a recovery is completed but still transitioning"""
-        return (recovery_info.stage == 'DONE' and 
-                recovery_info.files_percent >= 100.0 and 
-                recovery_info.bytes_percent >= 100.0)
\ No newline at end of file
+        return (
+            recovery_info.stage == "DONE"
+            and recovery_info.files_percent >= 100.0
+            and recovery_info.bytes_percent >= 100.0
+        )
diff --git a/cratedb_toolkit/cli.py b/cratedb_toolkit/cli.py
index 2410d5ec..80e0b395 100644
--- a/cratedb_toolkit/cli.py
+++ b/cratedb_toolkit/cli.py
@@ -3,8 +3,8 @@
 
 from cratedb_toolkit.util.cli import boot_click
 
-from .admin.xmover.cli import main as admin_xmover_cli
 from .adapter.rockset.cli import cli as rockset_cli
+from .admin.xmover.cli import main as admin_xmover_cli
 from .cfr.cli import cli as cfr_cli
 from .cluster.cli import cli as cloud_cli
 from .cmd.tail.cli import cli as tail_cli
diff --git a/pyproject.toml b/pyproject.toml
index c12d4c32..bd3fa6be 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -324,11 +324,12 @@ lint.extend-ignore = [
   "S108",
 ]
 
-lint.per-file-ignores."cratedb_toolkit/retention/cli.py" = [ "T201" ]       # Allow `print`
-lint.per-file-ignores."cratedb_toolkit/sqlalchemy/__init__.py" = [ "F401" ] # Allow `module´ imported but unused
+lint.per-file-ignores."cratedb_toolkit/admin/xmover/analyzer.py" = [ "T201" ] # Allow `print`
+lint.per-file-ignores."cratedb_toolkit/retention/cli.py" = [ "T201" ]         # Allow `print`
+lint.per-file-ignores."cratedb_toolkit/sqlalchemy/__init__.py" = [ "F401" ]   # Allow `module´ imported but unused
 lint.per-file-ignores."doc/conf.py" = [ "A001", "ERA001" ]
-lint.per-file-ignores."examples/*" = [ "ERA001", "F401", "T201", "T203" ]   # Allow `print` and `pprint`
-lint.per-file-ignores."tests/*" = [ "S101" ]                                # Allow use of `assert`, and `print`.
+lint.per-file-ignores."examples/*" = [ "ERA001", "F401", "T201", "T203" ]     # Allow `print` and `pprint`
+lint.per-file-ignores."tests/*" = [ "S101" ]                                  # Allow use of `assert`, and `print`.
 lint.per-file-ignores."tests/adapter/test_rockset.py" = [ "E402" ]
 lint.per-file-ignores."tests/info/test_http.py" = [ "E402" ]
 

From 4efd1edea73e65930ef2075ed1eff1fcb81ab342 Mon Sep 17 00:00:00 2001
From: Andreas Motl <andreas.motl@crate.io>
Date: Wed, 20 Aug 2025 11:16:17 +0200
Subject: [PATCH 03/18] Admin/XMover: Type checking

---
 cratedb_toolkit/admin/xmover/analyzer.py | 30 +++++++++++++++---------
 cratedb_toolkit/admin/xmover/cli.py      | 12 +++++-----
 cratedb_toolkit/admin/xmover/database.py | 16 +++++++++----
 3 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/cratedb_toolkit/admin/xmover/analyzer.py b/cratedb_toolkit/admin/xmover/analyzer.py
index 36d43618..e160d21a 100644
--- a/cratedb_toolkit/admin/xmover/analyzer.py
+++ b/cratedb_toolkit/admin/xmover/analyzer.py
@@ -2,13 +2,16 @@
 Shard analysis and rebalancing logic for CrateDB
 """
 
+import logging
 import math
 from collections import defaultdict
 from dataclasses import dataclass
-from typing import Any, Dict, List, Optional, Set, Tuple
+from typing import Any, Dict, List, Optional, Set, Tuple, Union
 
 from .database import CrateDBClient, NodeInfo, RecoveryInfo, ShardInfo
 
+logger = logging.getLogger(__name__)
+
 
 @dataclass
 class MoveRecommendation:
@@ -70,10 +73,10 @@ def __init__(self, client: CrateDBClient):
         self.nodes: List[NodeInfo] = []
         self.shards: List[ShardInfo] = []
 
-        # Initialize session-based caches for performance
-        self._zone_conflict_cache = {}
-        self._node_lookup_cache = {}
-        self._target_nodes_cache = {}
+        # Initialize session-based caches for performance.
+        self._zone_conflict_cache: Dict[Tuple[str, int, str], Union[str, None]] = {}
+        self._node_lookup_cache: Dict[str, Union[NodeInfo, None]] = {}
+        self._target_nodes_cache: Dict[Tuple[float, frozenset[Any], float, float], List[NodeInfo]] = {}
         self._cache_hits = 0
         self._cache_misses = 0
 
@@ -99,8 +102,8 @@ def analyze_distribution(self, table_name: Optional[str] = None) -> Distribution
         total_size_gb = sum(s.size_gb for s in shards)
 
         # Count by zone and node
-        zone_counts = defaultdict(int)
-        node_counts = defaultdict(int)
+        zone_counts: Dict[str, int] = defaultdict(int)
+        node_counts: Dict[str, int] = defaultdict(int)
 
         for shard in shards:
             zone_counts[shard.zone] += 1
@@ -171,7 +174,7 @@ def check_zone_balance(
             shards = [s for s in shards if s.table_name == table_name]
 
         # Count shards by zone and type
-        zone_stats = defaultdict(lambda: {"PRIMARY": 0, "REPLICA": 0, "TOTAL": 0})
+        zone_stats: Dict[str, Dict] = defaultdict(lambda: {"PRIMARY": 0, "REPLICA": 0, "TOTAL": 0})
 
         for shard in shards:
             shard_type = shard.shard_type
@@ -243,7 +246,7 @@ def generate_rebalancing_recommendations(
             source_node: If specified, only generate recommendations for shards on this node
             max_disk_usage_percent: Maximum disk usage percentage for target nodes
         """
-        recommendations = []
+        recommendations: List[MoveRecommendation] = []
 
         # Get moveable shards (only healthy ones for actual operations)
         moveable_shards = self.find_moveable_shards(min_size_gb, max_size_gb, table_name)
@@ -287,7 +290,12 @@ def generate_rebalancing_recommendations(
         total_evaluated = 0
 
         for i, shard in enumerate(processing_shards):
+            if shard is None:
+                logger.info(f"Shard not found: {i}")
+                continue
+
             if len(recommendations) >= max_recommendations:
+                logger.info(f"Found {len(recommendations)} recommendations for shard: {shard.shard_id}")
                 break
 
             # Show progress every 50 shards when processing many
@@ -344,6 +352,7 @@ def generate_rebalancing_recommendations(
             if not safe_target_nodes:
                 continue  # No safe targets found, skip this shard
 
+            target_node: NodeInfo
             if prioritize_space:
                 # Space priority mode: choose node with most available space
                 target_node = safe_target_nodes[0]  # Already sorted by available space (desc)
@@ -356,7 +365,6 @@ def generate_rebalancing_recommendations(
                 # Choose target node with intelligent priority:
                 # 1. If a node has significantly more space (2x) than zone-preferred nodes, prioritize space
                 # 2. Otherwise, prefer zone balancing first, then available space
-                target_node = None
 
                 if preferred_nodes and other_nodes:
                     best_preferred = preferred_nodes[0]  # Most space in preferred zones
@@ -656,7 +664,7 @@ def get_cluster_overview(self) -> Dict[str, Any]:
         # Get cluster watermark settings
         watermarks = self.client.get_cluster_watermarks()
 
-        overview = {
+        overview: Dict[str, Any] = {
             "nodes": len(self.nodes),
             "zones": len({node.zone for node in self.nodes}),
             "total_shards": len(self.shards),
diff --git a/cratedb_toolkit/admin/xmover/cli.py b/cratedb_toolkit/admin/xmover/cli.py
index 2ce29fdf..18f4c86f 100644
--- a/cratedb_toolkit/admin/xmover/cli.py
+++ b/cratedb_toolkit/admin/xmover/cli.py
@@ -4,7 +4,7 @@
 
 import sys
 import time
-from typing import Optional
+from typing import Any, Dict, List, Optional, cast
 
 import click
 from rich import box
@@ -13,7 +13,7 @@
 from rich.table import Table
 
 from .analyzer import MoveRecommendation, RecoveryMonitor, ShardAnalyzer
-from .database import CrateDBClient
+from .database import CrateDBClient, ShardInfo
 
 console = Console()
 
@@ -600,7 +600,7 @@ def zone_analysis(ctx, table: Optional[str], show_shards: bool):
         return
 
     # Organize by table and shard
-    tables = {}
+    tables: Dict[str, Dict[str, List[ShardInfo]]] = {}
     for shard in shards:
         table_key = f"{shard.schema_name}.{shard.table_name}"
         if table_key not in tables:
@@ -850,7 +850,7 @@ def explain_error(ctx, error_message: Optional[str]):
 
     if not error_message:
         console.print("Please paste the CrateDB error message (press Enter twice when done):")
-        lines = []
+        lines: List[str] = []
         while True:
             try:
                 line = input()
@@ -933,7 +933,7 @@ def explain_error(ctx, error_message: Optional[str]):
     error_lower = error_message.lower()
 
     for pattern_info in error_patterns:
-        if pattern_info["pattern"].lower() in error_lower:
+        if cast(str, pattern_info["pattern"]).lower() in error_lower:
             matches.append(pattern_info)
 
     if matches:
@@ -1013,7 +1013,7 @@ def monitor_recovery(
                 console.print("=" * 80)
 
                 # Track previous state for change detection
-                previous_recoveries = {}
+                previous_recoveries: Dict[str, Dict[str, Any]] = {}
                 previous_timestamp = None
                 first_run = True
 
diff --git a/cratedb_toolkit/admin/xmover/database.py b/cratedb_toolkit/admin/xmover/database.py
index a6e2d35f..55a6c194 100644
--- a/cratedb_toolkit/admin/xmover/database.py
+++ b/cratedb_toolkit/admin/xmover/database.py
@@ -5,7 +5,7 @@
 import logging
 import os
 from dataclasses import dataclass
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union
 
 import requests
 from dotenv import load_dotenv
@@ -118,7 +118,9 @@ class CrateDBClient:
     def __init__(self, connection_string: Optional[str] = None):
         load_dotenv()
 
-        self.connection_string = connection_string or os.getenv("CRATE_CONNECTION_STRING")
+        self.connection_string: str = (
+            connection_string or os.getenv("CRATE_CONNECTION_STRING") or "http://localhost:4200"
+        )
         if not self.connection_string:
             raise ValueError("CRATE_CONNECTION_STRING not found in environment or provided")
 
@@ -132,7 +134,7 @@ def __init__(self, connection_string: Optional[str] = None):
 
     def execute_query(self, query: str, parameters: Optional[List] = None) -> Dict[str, Any]:
         """Execute a SQL query against CrateDB"""
-        payload = {"stmt": query}
+        payload: Dict[str, Any] = {"stmt": query}
 
         if parameters:
             payload["args"] = parameters
@@ -207,7 +209,7 @@ def get_shards_info(
         if not for_analysis:
             # For operations, only include healthy shards
             where_conditions.extend(["s.routing_state = 'STARTED'", "s.recovery['files']['percent'] = 100.0"])
-        parameters = []
+        parameters: List[Union[str, int, Dict]] = []
 
         if table_name:
             where_conditions.append("s.table_name = ?")
@@ -297,7 +299,11 @@ def get_shard_distribution_summary(self, for_analysis: bool = True) -> Dict[str,
 
         result = self.execute_query(query)
 
-        summary = {"by_zone": {}, "by_node": {}, "totals": {"primary": 0, "replica": 0, "total_size_gb": 0}}
+        summary: Dict[str, Any] = {
+            "by_zone": {},
+            "by_node": {},
+            "totals": {"primary": 0, "replica": 0, "total_size_gb": 0},
+        }
 
         for row in result.get("rows", []):
             zone = row[0] or "unknown"

From 163001e459131b5c0a751920bad62ca22fb88b06 Mon Sep 17 00:00:00 2001
From: Andreas Motl <andreas.motl@crate.io>
Date: Wed, 20 Aug 2025 11:24:14 +0200
Subject: [PATCH 04/18] Admin/XMover: Implement suggestions by CodeRabbit

---
 CHANGES.md                          |  3 +--
 doc/admin/xmover/handbook.md        | 12 ++++++------
 doc/admin/xmover/index.md           |  2 +-
 doc/admin/xmover/queries.md         | 12 +++++++++---
 doc/admin/xmover/troubleshooting.md |  7 +++++--
 5 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 7ef70478..bbc19439 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,8 +1,7 @@
 # Changelog
 
 ## Unreleased
-- Admin: Added XMover - CrateDB Shard Analyzer and Movement Tool.
-  Thanks, @WalBeh.
+- Admin: Added XMover - CrateDB shard analyzer and movement tool. Thanks, @WalBeh.
 
 ## 2025/08/19 v0.0.41
 - I/O: Updated to `influxio-0.6.0`. Thanks, @ZillKhan.
diff --git a/doc/admin/xmover/handbook.md b/doc/admin/xmover/handbook.md
index c103c6f2..cf9b4abe 100644
--- a/doc/admin/xmover/handbook.md
+++ b/doc/admin/xmover/handbook.md
@@ -368,7 +368,7 @@ xmover recommend --prioritize-zones --execute
 
 ### Connection String Format
 
-```
+```text
 https://hostname:port
 ```
 
@@ -407,7 +407,7 @@ xmover analyze
 ### Common Issues and Solutions
 
 1. **Zone Conflicts**
-   ```
+   ```text
    Error: "NO(a copy of this shard is already allocated to this node)"
    ```
    - **Cause**: Target node already has a copy of the shard
@@ -415,7 +415,7 @@ xmover analyze
    - **Prevention**: Always use `xmover validate-move` before executing moves
 
 2. **Zone Allocation Limits**
-   ```
+   ```text
    Error: "too many copies of the shard allocated to nodes with attribute [zone]"
    ```
    - **Cause**: CrateDB's zone awareness prevents too many copies in same zone
@@ -423,7 +423,7 @@ xmover analyze
    - **Prevention**: Use `xmover recommend` which respects zone constraints
 
 3. **Insufficient Space**
-   ```
+   ```text
    Error: "not enough disk space"
    ```
    - **Cause**: Target node lacks sufficient free space
@@ -431,7 +431,7 @@ xmover analyze
    - **Check**: `xmover analyze` to see available space per node
 
 4. **High Disk Usage Blocking Moves**
-   ```
+   ```text
    Error: "Target node disk usage too high (85.3%)"
    ```
    - **Cause**: Target node exceeds default 85% disk usage threshold
@@ -465,7 +465,7 @@ XMover uses configurable safety thresholds to prevent risky moves:
 xmover recommend --max-disk-usage 90 --prioritize-space
 
 # For urgent space relief
-xmover validate-move SCHEMA.TABLE SHARD_ID FROM TO --max-disk-usage 95
+xmover validate-move <SCHEMA.TABLE> <SHARD_ID> <FROM> <TO> --max-disk-usage 95
 ```
 
 **When to Adjust Thresholds:**
diff --git a/doc/admin/xmover/index.md b/doc/admin/xmover/index.md
index 7b522310..affa4825 100644
--- a/doc/admin/xmover/index.md
+++ b/doc/admin/xmover/index.md
@@ -25,5 +25,5 @@ SQL commands for shard rebalancing and node decommissioning.
 
 Handbook <handbook>
 Troubleshooting <troubleshooting>
-Query gallery <queries>
+Query Gallery <queries>
 ```
diff --git a/doc/admin/xmover/queries.md b/doc/admin/xmover/queries.md
index 4600038c..27bd89e6 100644
--- a/doc/admin/xmover/queries.md
+++ b/doc/admin/xmover/queries.md
@@ -47,8 +47,15 @@ select node['name'], primary,  sum(size) / 1024^3, count(id)  from sys.shards  g
 ```
 
 ## Nodes available Space
-
 ```sql
+SELECT
+  name,
+  attributes['zone'] AS zone,
+  fs['total']['available'] / power(1024, 3) AS available_gb
+FROM sys.nodes
+ORDER BY name;
+```
+```text
 +------------+--------------------+-----------------------------------------------+
 | name       | attributes['zone'] | (fs[1]['disks']['available'] / 1.073741824E9) |
 +------------+--------------------+-----------------------------------------------+
@@ -87,8 +94,7 @@ SELECT 8 rows in set (0.062 sec)
 
 ## Move REROUTE
 ```sql
-
-alter table "curvo"."bottlefieldData" reroute move shard 21 from 'data-hot-2' to 'data-hot-3';
+ALTER TABLE curvo.bottlefielddata REROUTE MOVE SHARD 21 FROM 'data-hot-2' TO 'data-hot-3';
 ```
 ---
 
diff --git a/doc/admin/xmover/troubleshooting.md b/doc/admin/xmover/troubleshooting.md
index 14567586..1afa477a 100644
--- a/doc/admin/xmover/troubleshooting.md
+++ b/doc/admin/xmover/troubleshooting.md
@@ -232,7 +232,10 @@ CRATE_SSL_VERIFY=true
 **Step 3: Test Network Access**
 ```bash
 # Test HTTP connectivity
-curl -u username:password https://your-cluster:4200/_sql -d '{"stmt":"SELECT 1"}'
+curl -u 'username:password' \
+  -H 'Content-Type: application/json' \
+  'https://your-cluster:4200/_sql' \
+  -d '{"stmt":"SELECT 1"}'
 ```
 
 #### Prevention
@@ -285,7 +288,7 @@ xmover explain-error "your error message here"
 
 4. **Validate specific moves**
    ```bash
-   xmover validate-move SCHEMA.TABLE SHARD_ID FROM TO
+   xmover validate-move <SCHEMA.TABLE> <SHARD_ID> <FROM> <TO>
    ```
 
 5. **Execute gradually**

From de9ba97831f5a520a1ce884ff28fe8315bc927a4 Mon Sep 17 00:00:00 2001
From: Andreas Motl <andreas.motl@crate.io>
Date: Wed, 20 Aug 2025 13:24:55 +0200
Subject: [PATCH 05/18] Admin/XMover: Add software tests

---
 tests/admin/__init__.py |  0
 tests/admin/test_cli.py | 67 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+)
 create mode 100644 tests/admin/__init__.py
 create mode 100644 tests/admin/test_cli.py

diff --git a/tests/admin/__init__.py b/tests/admin/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/admin/test_cli.py b/tests/admin/test_cli.py
new file mode 100644
index 00000000..60e8d810
--- /dev/null
+++ b/tests/admin/test_cli.py
@@ -0,0 +1,67 @@
+import pytest
+from click.testing import CliRunner
+
+from cratedb_toolkit.admin.xmover.cli import main as cli
+
+
+@pytest.mark.parametrize(
+    "subcommand",
+    [
+        "analyze",
+        "check-balance",
+        "explain-error",
+        "find-candidates",
+        "monitor-recovery",
+        "recommend",
+        "test-connection",
+        "zone-analysis",
+    ],
+)
+def test_xmover_all(cratedb, subcommand):
+    """
+    CLI test: Invoke `xmover <subcommand>`.
+    """
+    http_url = cratedb.get_http_url()
+    runner = CliRunner()
+
+    result = runner.invoke(
+        cli,
+        args=subcommand,
+        env={"CRATE_CONNECTION_STRING": http_url},
+        catch_exceptions=False,
+    )
+    assert result.exit_code == 0
+
+
+def test_xmover_validate_move_success(cratedb):
+    """
+    CLI test: Invoke `xmover validate-move`.
+    """
+    http_url = cratedb.get_http_url()
+    runner = CliRunner()
+
+    result = runner.invoke(
+        cli,
+        args=["validate-move", "doc.demo", "1", "42", "84"],
+        env={"CRATE_CONNECTION_STRING": http_url},
+        catch_exceptions=False,
+    )
+    assert result.exit_code == 0
+    assert "Source node '42' not found in cluster" in result.output
+
+
+def test_xmover_validate_move_failure(cratedb):
+    """
+    CLI test: Invoke `xmover validate-move`.
+    """
+    http_url = cratedb.get_http_url()
+    runner = CliRunner()
+
+    result = runner.invoke(
+        cli,
+        args=["validate-move"],
+        env={"CRATE_CONNECTION_STRING": http_url},
+        catch_exceptions=False,
+    )
+    assert result.exit_code == 2
+    assert "Error: Missing argument 'SCHEMA_TABLE'." in result.output

From 0d7fcb5a9ec6e4e73dd2df5f52c8c7c6f230a278 Mon Sep 17 00:00:00 2001
From: Andreas Motl <andreas.motl@crate.io>
Date: Wed, 20 Aug 2025 19:19:56 +0200
Subject: [PATCH 06/18] Admin/XMover: Refactor -- "recovery"

---
 cratedb_toolkit/admin/xmover/analyzer.py | 150 +--------
 cratedb_toolkit/admin/xmover/cli.py      | 313 ++----------------
 cratedb_toolkit/admin/xmover/database.py | 102 +-----
 cratedb_toolkit/admin/xmover/model.py    | 101 ++++++
 cratedb_toolkit/admin/xmover/recovery.py | 384 +++++++++++++++++++++++
 cratedb_toolkit/admin/xmover/util.py     |  45 +++
 6 files changed, 555 insertions(+), 540 deletions(-)
 create mode 100644 cratedb_toolkit/admin/xmover/model.py
 create mode 100644 cratedb_toolkit/admin/xmover/recovery.py
 create mode 100644 cratedb_toolkit/admin/xmover/util.py

diff --git a/cratedb_toolkit/admin/xmover/analyzer.py b/cratedb_toolkit/admin/xmover/analyzer.py
index e160d21a..f9b8d6a9 100644
--- a/cratedb_toolkit/admin/xmover/analyzer.py
+++ b/cratedb_toolkit/admin/xmover/analyzer.py
@@ -8,7 +8,8 @@
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Set, Tuple, Union
 
-from .database import CrateDBClient, NodeInfo, RecoveryInfo, ShardInfo
+from .database import CrateDBClient
+from .model import NodeInfo, ShardInfo
 
 logger = logging.getLogger(__name__)
 
@@ -878,150 +879,3 @@ def plan_node_decommission(self, node_name: str, min_free_space_gb: float = 100.
             "estimated_time_hours": len(move_plan) * 0.1,  # Rough estimate: 6 minutes per move
             "message": "Decommission plan generated" if feasible else "Decommission not currently feasible",
         }
-
-
-class RecoveryMonitor:
-    """Monitor shard recovery operations"""
-
-    def __init__(self, client: CrateDBClient):
-        self.client = client
-
-    def get_cluster_recovery_status(
-        self,
-        table_name: Optional[str] = None,
-        node_name: Optional[str] = None,
-        recovery_type_filter: str = "all",
-        include_transitioning: bool = False,
-    ) -> List[RecoveryInfo]:
-        """Get comprehensive recovery status with minimal cluster impact"""
-
-        # Get all recovering shards using the efficient combined query
-        recoveries = self.client.get_all_recovering_shards(table_name, node_name, include_transitioning)
-
-        # Apply recovery type filter
-        if recovery_type_filter != "all":
-            recoveries = [r for r in recoveries if r.recovery_type.upper() == recovery_type_filter.upper()]
-
-        return recoveries
-
-    def get_recovery_summary(self, recoveries: List[RecoveryInfo]) -> Dict[str, Any]:
-        """Generate a summary of recovery operations"""
-
-        if not recoveries:
-            return {"total_recoveries": 0, "by_type": {}, "by_stage": {}, "avg_progress": 0.0, "total_size_gb": 0.0}
-
-        # Group by recovery type
-        by_type = {}
-        by_stage = {}
-        total_progress = 0.0
-        total_size_gb = 0.0
-
-        for recovery in recoveries:
-            # By type
-            if recovery.recovery_type not in by_type:
-                by_type[recovery.recovery_type] = {"count": 0, "total_size_gb": 0.0, "avg_progress": 0.0}
-            by_type[recovery.recovery_type]["count"] += 1
-            by_type[recovery.recovery_type]["total_size_gb"] += recovery.size_gb
-
-            # By stage
-            if recovery.stage not in by_stage:
-                by_stage[recovery.stage] = 0
-            by_stage[recovery.stage] += 1
-
-            # Totals
-            total_progress += recovery.overall_progress
-            total_size_gb += recovery.size_gb
-
-        # Calculate averages
-        for type_name, rec_type in by_type.items():
-            if rec_type["count"] > 0:
-                type_recoveries = [r for r in recoveries if r.recovery_type == type_name]
-                if type_recoveries:
-                    rec_type["avg_progress"] = sum(r.overall_progress for r in type_recoveries) / len(type_recoveries)
-
-        return {
-            "total_recoveries": len(recoveries),
-            "by_type": by_type,
-            "by_stage": by_stage,
-            "avg_progress": total_progress / len(recoveries) if recoveries else 0.0,
-            "total_size_gb": total_size_gb,
-        }
-
-    def format_recovery_display(self, recoveries: List[RecoveryInfo]) -> str:
-        """Format recovery information for display"""
-
-        if not recoveries:
-            return "✅ No active shard recoveries found"
-
-        # Group by recovery type
-        peer_recoveries = [r for r in recoveries if r.recovery_type == "PEER"]
-        disk_recoveries = [r for r in recoveries if r.recovery_type == "DISK"]
-        other_recoveries = [r for r in recoveries if r.recovery_type not in ["PEER", "DISK"]]
-
-        output = [f"\n🔄 Active Shard Recoveries ({len(recoveries)} total)"]
-        output.append("=" * 80)
-
-        if peer_recoveries:
-            output.append(f"\n📡 PEER Recoveries ({len(peer_recoveries)})")
-            output.append(self._format_recovery_table(peer_recoveries))
-
-        if disk_recoveries:
-            output.append(f"\n💾 DISK Recoveries ({len(disk_recoveries)})")
-            output.append(self._format_recovery_table(disk_recoveries))
-
-        if other_recoveries:
-            output.append(f"\n🔧 Other Recoveries ({len(other_recoveries)})")
-            output.append(self._format_recovery_table(other_recoveries))
-
-        # Add summary
-        summary = self.get_recovery_summary(recoveries)
-        output.append("\n📊 Summary:")
-        output.append(f"   Total size: {summary['total_size_gb']:.1f} GB")
-        output.append(f"   Average progress: {summary['avg_progress']:.1f}%")
-
-        return "\n".join(output)
-
-    def _format_recovery_table(self, recoveries: List[RecoveryInfo]) -> str:
-        """Format a table of recovery information"""
-
-        if not recoveries:
-            return "   No recoveries of this type"
-
-        # Table headers
-        headers = ["Table", "Shard", "Node", "Type", "Stage", "Progress", "Size(GB)", "Time(s)"]
-
-        # Calculate column widths
-        col_widths = [len(h) for h in headers]
-
-        rows = []
-        for recovery in recoveries:
-            row = [
-                f"{recovery.schema_name}.{recovery.table_name}",
-                str(recovery.shard_id),
-                recovery.node_name,
-                recovery.shard_type,
-                recovery.stage,
-                f"{recovery.overall_progress:.1f}%",
-                f"{recovery.size_gb:.1f}",
-                f"{recovery.total_time_seconds:.1f}",
-            ]
-            rows.append(row)
-
-            # Update column widths
-            for i, cell in enumerate(row):
-                col_widths[i] = max(col_widths[i], len(cell))
-
-        # Format table
-        output = []
-
-        # Header row
-        header_row = "   " + " | ".join(h.ljust(w) for h, w in zip(headers, col_widths))
-        output.append(header_row)
-        output.append("   " + "-" * (len(header_row) - 3))
-
-        # Data rows
-        for row in rows:
-            data_row = "   " + " | ".join(cell.ljust(w) for cell, w in zip(row, col_widths))
-            output.append(data_row)
-
-        return "\n".join(output)
diff --git a/cratedb_toolkit/admin/xmover/cli.py b/cratedb_toolkit/admin/xmover/cli.py
index 18f4c86f..2ad88d15 100644
--- a/cratedb_toolkit/admin/xmover/cli.py
+++ b/cratedb_toolkit/admin/xmover/cli.py
@@ -4,7 +4,7 @@
 
 import sys
 import time
-from typing import Any, Dict, List, Optional, cast
+from typing import Dict, List, Optional, cast
 
 import click
 from rich import box
@@ -12,57 +12,14 @@
 from rich.panel import Panel
 from rich.table import Table
 
-from .analyzer import MoveRecommendation, RecoveryMonitor, ShardAnalyzer
-from .database import CrateDBClient, ShardInfo
+from cratedb_toolkit.admin.xmover.model import ShardInfo
 
-console = Console()
-
-
-def format_size(size_gb: float) -> str:
-    """Format size in GB with appropriate precision"""
-    if size_gb >= 1000:
-        return f"{size_gb / 1000:.1f}TB"
-    elif size_gb >= 1:
-        return f"{size_gb:.1f}GB"
-    else:
-        return f"{size_gb * 1000:.0f}MB"
-
-
-def format_percentage(value: float) -> str:
-    """Format percentage with color coding"""
-    color = "green"
-    if value > 80:
-        color = "red"
-    elif value > 70:
-        color = "yellow"
-    return f"[{color}]{value:.1f}%[/{color}]"
-
-
-def format_translog_info(recovery_info) -> str:
-    """Format translog size information with color coding"""
-    tl_bytes = recovery_info.translog_size_bytes
-
-    # Only show if significant (>10MB for production)
-    if tl_bytes < 10 * 1024 * 1024:  # 10MB for production
-        return ""
-
-    tl_gb = recovery_info.translog_size_gb
+from .analyzer import MoveRecommendation, ShardAnalyzer
+from .database import CrateDBClient
+from .recovery import RecoveryMonitor, RecoveryOptions
+from .util import format_percentage, format_size
 
-    # Color coding based on size
-    if tl_gb >= 5.0:
-        color = "red"
-    elif tl_gb >= 1.0:
-        color = "yellow"
-    else:
-        color = "green"
-
-    # Format size
-    if tl_gb >= 1.0:
-        size_str = f"{tl_gb:.1f}GB"
-    else:
-        size_str = f"{tl_gb * 1000:.0f}MB"
-
-    return f" [dim]([{color}]TL:{size_str}[/{color}])[/dim]"
+console = Console()
 
 
 @click.group()
@@ -998,254 +955,28 @@ def monitor_recovery(
         xmover monitor-recovery --watch                # Continuous monitoring
         xmover monitor-recovery --recovery-type PEER  # Only PEER recoveries
     """
-    try:
-        client = ctx.obj["client"]
-        recovery_monitor = RecoveryMonitor(client)
-
-        if watch:
-            console.print(f"🔄 Monitoring shard recoveries (refreshing every {refresh_interval}s)")
-            console.print("Press Ctrl+C to stop")
-            console.print()
-
-            try:
-                # Show header once
-                console.print("📊 Recovery Progress Monitor")
-                console.print("=" * 80)
-
-                # Track previous state for change detection
-                previous_recoveries: Dict[str, Dict[str, Any]] = {}
-                previous_timestamp = None
-                first_run = True
-
-                while True:
-                    # Get current recovery status
-                    recoveries = recovery_monitor.get_cluster_recovery_status(
-                        table_name=table,
-                        node_name=node,
-                        recovery_type_filter=recovery_type,
-                        include_transitioning=include_transitioning,
-                    )
-
-                    # Display current time
-                    from datetime import datetime
-
-                    current_time = datetime.now().strftime("%H:%M:%S")
-
-                    # Check for any changes
-                    changes = []
-                    active_count = 0
-                    completed_count = 0
-
-                    for recovery in recoveries:
-                        recovery_key = (
-                            f"{recovery.schema_name}.{recovery.table_name}.{recovery.shard_id}.{recovery.node_name}"
-                        )
-
-                        # Create complete table name
-                        if recovery.schema_name == "doc":
-                            table_display = recovery.table_name
-                        else:
-                            table_display = f"{recovery.schema_name}.{recovery.table_name}"
-
-                        # Count active vs completed
-                        if recovery.stage == "DONE" and recovery.overall_progress >= 100.0:
-                            completed_count += 1
-                        else:
-                            active_count += 1
-
-                        # Check for changes since last update
-                        if recovery_key in previous_recoveries:
-                            prev = previous_recoveries[recovery_key]
-                            if prev["progress"] != recovery.overall_progress:
-                                diff = recovery.overall_progress - prev["progress"]
-                                # Create node route display
-                                node_route = ""
-                                if recovery.recovery_type == "PEER" and recovery.source_node_name:
-                                    node_route = f" {recovery.source_node_name} → {recovery.node_name}"
-                                elif recovery.recovery_type == "DISK":
-                                    node_route = f" disk → {recovery.node_name}"
-
-                                # Add translog info
-                                translog_info = format_translog_info(recovery)
-
-                                if diff > 0:
-                                    changes.append(
-                                        f"[green]📈[/green] {table_display} S{recovery.shard_id} "
-                                        f"{recovery.overall_progress:.1f}% (+{diff:.1f}%) "
-                                        f"{recovery.size_gb:.1f}GB{translog_info}{node_route}"
-                                    )
-                                else:
-                                    changes.append(
-                                        f"[yellow]📉[/yellow] {table_display} S{recovery.shard_id} "
-                                        f"{recovery.overall_progress:.1f}% ({diff:.1f}%) "
-                                        f"{recovery.size_gb:.1f}GB{translog_info}{node_route}"
-                                    )
-                            elif prev["stage"] != recovery.stage:
-                                # Create node route display
-                                node_route = ""
-                                if recovery.recovery_type == "PEER" and recovery.source_node_name:
-                                    node_route = f" {recovery.source_node_name} → {recovery.node_name}"
-                                elif recovery.recovery_type == "DISK":
-                                    node_route = f" disk → {recovery.node_name}"
-
-                                # Add translog info
-                                translog_info = format_translog_info(recovery)
-
-                                changes.append(
-                                    f"[blue]🔄[/blue] {table_display} S{recovery.shard_id} "
-                                    f"{prev['stage']}→{recovery.stage} "
-                                    f"{recovery.size_gb:.1f}GB{translog_info}{node_route}"
-                                )
-                        else:
-                            # New recovery - show based on include_transitioning flag or first run
-                            if (
-                                first_run
-                                or include_transitioning
-                                or (recovery.overall_progress < 100.0 or recovery.stage != "DONE")
-                            ):
-                                # Create node route display
-                                node_route = ""
-                                if recovery.recovery_type == "PEER" and recovery.source_node_name:
-                                    node_route = f" {recovery.source_node_name} → {recovery.node_name}"
-                                elif recovery.recovery_type == "DISK":
-                                    node_route = f" disk → {recovery.node_name}"
-
-                                status_icon = "[cyan]🆕[/cyan]" if not first_run else "[blue]📋[/blue]"
-                                # Add translog info
-                                translog_info = format_translog_info(recovery)
-
-                                changes.append(
-                                    f"{status_icon} {table_display} S{recovery.shard_id} "
-                                    f"{recovery.stage} {recovery.overall_progress:.1f}% "
-                                    f"{recovery.size_gb:.1f}GB{translog_info}{node_route}"
-                                )
-
-                        # Store current state for next comparison
-                        previous_recoveries[recovery_key] = {
-                            "progress": recovery.overall_progress,
-                            "stage": recovery.stage,
-                        }
-
-                    # Always show a status line
-                    if not recoveries:
-                        console.print(f"{current_time} | [green]No recoveries - cluster stable[/green]")
-                        previous_recoveries.clear()
-                    else:
-                        # Build status message
-                        status = ""
-                        if active_count > 0:
-                            status = f"{active_count} active"
-                        if completed_count > 0:
-                            status += f", {completed_count} done" if status else f"{completed_count} done"
-
-                        # Show status line with changes or periodic update
-                        if changes:
-                            console.print(f"{current_time} | {status}")
-                            for change in changes:
-                                console.print(f"         | {change}")
-                        else:
-                            # Show periodic status even without changes
-                            if include_transitioning and completed_count > 0:
-                                console.print(f"{current_time} | {status} (transitioning)")
-                            elif active_count > 0:
-                                console.print(f"{current_time} | {status} (no changes)")
-
-                    previous_timestamp = current_time  # noqa: F841
-                    first_run = False
-                    time.sleep(refresh_interval)
-
-            except KeyboardInterrupt:
-                console.print("\n\n[yellow]⏹  Monitoring stopped by user[/yellow]")
-
-                # Show final summary
-                final_recoveries = recovery_monitor.get_cluster_recovery_status(
-                    table_name=table,
-                    node_name=node,
-                    recovery_type_filter=recovery_type,
-                    include_transitioning=include_transitioning,
-                )
-
-                if final_recoveries:
-                    console.print("\n📊 [bold]Final Recovery Summary:[/bold]")
-                    summary = recovery_monitor.get_recovery_summary(final_recoveries)
-
-                    # Count active vs completed
-                    active_count = len([r for r in final_recoveries if r.overall_progress < 100.0 or r.stage != "DONE"])
-                    completed_count = len(final_recoveries) - active_count
-
-                    console.print(f"   Total recoveries: {summary['total_recoveries']}")
-                    console.print(f"   Active: {active_count}, Completed: {completed_count}")
-                    console.print(f"   Total size: {summary['total_size_gb']:.1f} GB")
-                    console.print(f"   Average progress: {summary['avg_progress']:.1f}%")
-
-                    if summary["by_type"]:
-                        console.print("   By recovery type:")
-                        for rec_type, stats in summary["by_type"].items():
-                            console.print(
-                                f"     {rec_type}: {stats['count']} recoveries, "
-                                f"{stats['avg_progress']:.1f}% avg progress"
-                            )
-                else:
-                    console.print("\n[green]✅ No active recoveries at exit[/green]")
-
-                return
-
-        else:
-            # Single status check
-            recoveries = recovery_monitor.get_cluster_recovery_status(
-                table_name=table,
-                node_name=node,
-                recovery_type_filter=recovery_type,
-                include_transitioning=include_transitioning,
-            )
-
-            display_output = recovery_monitor.format_recovery_display(recoveries)
-            console.print(display_output)
-
-            if not recoveries:
-                if include_transitioning:
-                    console.print("\n[green]✅ No recoveries found (active or transitioning)[/green]")
-                else:
-                    console.print("\n[green]✅ No active recoveries found[/green]")
-                    console.print(
-                        "[dim]💡 Use --include-transitioning to see completed recoveries still transitioning[/dim]"
-                    )
-            else:
-                # Show summary
-                summary = recovery_monitor.get_recovery_summary(recoveries)
-                console.print("\n📊 [bold]Recovery Summary:[/bold]")
-                console.print(f"   Total recoveries: {summary['total_recoveries']}")
-                console.print(f"   Total size: {summary['total_size_gb']:.1f} GB")
-                console.print(f"   Average progress: {summary['avg_progress']:.1f}%")
-
-                # Show breakdown by type
-                if summary["by_type"]:
-                    console.print("\n   By recovery type:")
-                    for rec_type, stats in summary["by_type"].items():
-                        console.print(
-                            f"     {rec_type}: {stats['count']} recoveries, {stats['avg_progress']:.1f}% avg progress"
-                        )
-
-                console.print("\n[dim]💡 Use --watch flag for continuous monitoring[/dim]")
-
-    except Exception as e:
-        console.print(f"[red]❌ Error monitoring recoveries: {e}[/red]")
-        if ctx.obj.get("debug"):
-            raise
+    recovery_monitor = RecoveryMonitor(
+        client=ctx.obj["client"],
+        options=RecoveryOptions(
+            table=table,
+            node=node,
+            refresh_interval=refresh_interval,
+            recovery_type=recovery_type,
+            include_transitioning=include_transitioning,
+        ),
+    )
+    recovery_monitor.start(watch=watch, debug=ctx.obj.get("debug"))
 
 
 def _wait_for_recovery_capacity(client, max_concurrent_recoveries: int = 5):
     """Wait until active recovery count is below threshold"""
-    from time import sleep
 
-    from .analyzer import RecoveryMonitor
-
-    recovery_monitor = RecoveryMonitor(client)
+    recovery_monitor = RecoveryMonitor(client, RecoveryOptions(include_transitioning=True))
     wait_time = 0
 
     while True:
         # Check active recoveries (including transitioning)
-        recoveries = recovery_monitor.get_cluster_recovery_status(include_transitioning=True)
+        recoveries = recovery_monitor.get_cluster_recovery_status()
         active_count = len([r for r in recoveries if r.overall_progress < 100.0 or r.stage != "DONE"])
         status = f"{active_count}/{max_concurrent_recoveries}"
         if active_count < max_concurrent_recoveries:
@@ -1257,15 +988,13 @@ def _wait_for_recovery_capacity(client, max_concurrent_recoveries: int = 5):
         elif wait_time % 30 == 0:  # Update every 30 seconds
             console.print(f"    [yellow]⏳ Still waiting... ({status} active)[/yellow]")
 
-        sleep(10)  # Check every 10 seconds
+        time.sleep(10)  # Check every 10 seconds
         wait_time += 10
 
 
 def _execute_recommendations_safely(client, recommendations, validate: bool):
     """Execute recommendations with extensive safety measures"""
 
-    from .analyzer import ShardAnalyzer
-
     # Filter to only safe recommendations
     safe_recommendations = []
     if validate:
diff --git a/cratedb_toolkit/admin/xmover/database.py b/cratedb_toolkit/admin/xmover/database.py
index 55a6c194..1cb16bb1 100644
--- a/cratedb_toolkit/admin/xmover/database.py
+++ b/cratedb_toolkit/admin/xmover/database.py
@@ -4,112 +4,14 @@
 
 import logging
 import os
-from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Union
 
 import requests
 from dotenv import load_dotenv
 
-logger = logging.getLogger(__name__)
-
+from cratedb_toolkit.admin.xmover.model import NodeInfo, RecoveryInfo, ShardInfo
 
-@dataclass
-class NodeInfo:
-    """Information about a CrateDB node"""
-
-    id: str
-    name: str
-    zone: str
-    heap_used: int
-    heap_max: int
-    fs_total: int
-    fs_used: int
-    fs_available: int
-
-    @property
-    def heap_usage_percent(self) -> float:
-        return (self.heap_used / self.heap_max) * 100 if self.heap_max > 0 else 0
-
-    @property
-    def disk_usage_percent(self) -> float:
-        return (self.fs_used / self.fs_total) * 100 if self.fs_total > 0 else 0
-
-    @property
-    def available_space_gb(self) -> float:
-        return self.fs_available / (1024**3)
-
-
-@dataclass
-class ShardInfo:
-    """Information about a shard"""
-
-    table_name: str
-    schema_name: str
-    shard_id: int
-    node_id: str
-    node_name: str
-    zone: str
-    is_primary: bool
-    size_bytes: int
-    size_gb: float
-    num_docs: int
-    state: str
-    routing_state: str
-
-    @property
-    def shard_type(self) -> str:
-        return "PRIMARY" if self.is_primary else "REPLICA"
-
-
-@dataclass
-class RecoveryInfo:
-    """Information about an active shard recovery"""
-
-    schema_name: str
-    table_name: str
-    shard_id: int
-    node_name: str
-    node_id: str
-    recovery_type: str  # PEER, DISK, etc.
-    stage: str  # INIT, INDEX, VERIFY_INDEX, TRANSLOG, FINALIZE, DONE
-    files_percent: float
-    bytes_percent: float
-    total_time_ms: int
-    routing_state: str  # INITIALIZING, RELOCATING, etc.
-    current_state: str  # from allocations
-    is_primary: bool
-    size_bytes: int
-    source_node_name: Optional[str] = None  # Source node for PEER recoveries
-    translog_size_bytes: int = 0  # Translog size in bytes
-
-    @property
-    def overall_progress(self) -> float:
-        """Calculate overall progress percentage"""
-        return max(self.files_percent, self.bytes_percent)
-
-    @property
-    def size_gb(self) -> float:
-        """Size in GB"""
-        return self.size_bytes / (1024**3)
-
-    @property
-    def shard_type(self) -> str:
-        return "PRIMARY" if self.is_primary else "REPLICA"
-
-    @property
-    def total_time_seconds(self) -> float:
-        """Total time in seconds"""
-        return self.total_time_ms / 1000.0
-
-    @property
-    def translog_size_gb(self) -> float:
-        """Translog size in GB"""
-        return self.translog_size_bytes / (1024**3)
-
-    @property
-    def translog_percentage(self) -> float:
-        """Translog size as percentage of shard size"""
-        return (self.translog_size_bytes / self.size_bytes * 100) if self.size_bytes > 0 else 0
+logger = logging.getLogger(__name__)
 
 
 class CrateDBClient:
diff --git a/cratedb_toolkit/admin/xmover/model.py b/cratedb_toolkit/admin/xmover/model.py
new file mode 100644
index 00000000..4d271445
--- /dev/null
+++ b/cratedb_toolkit/admin/xmover/model.py
@@ -0,0 +1,101 @@
+from dataclasses import dataclass
+from typing import Optional
+
+
+@dataclass
+class NodeInfo:
+    """Information about a CrateDB node"""
+
+    id: str
+    name: str
+    zone: str
+    heap_used: int
+    heap_max: int
+    fs_total: int
+    fs_used: int
+    fs_available: int
+
+    @property
+    def heap_usage_percent(self) -> float:
+        return (self.heap_used / self.heap_max) * 100 if self.heap_max > 0 else 0
+
+    @property
+    def disk_usage_percent(self) -> float:
+        return (self.fs_used / self.fs_total) * 100 if self.fs_total > 0 else 0
+
+    @property
+    def available_space_gb(self) -> float:
+        return self.fs_available / (1024**3)
+
+
+@dataclass
+class ShardInfo:
+    """Information about a shard"""
+
+    table_name: str
+    schema_name: str
+    shard_id: int
+    node_id: str
+    node_name: str
+    zone: str
+    is_primary: bool
+    size_bytes: int
+    size_gb: float
+    num_docs: int
+    state: str
+    routing_state: str
+
+    @property
+    def shard_type(self) -> str:
+        return "PRIMARY" if self.is_primary else "REPLICA"
+
+
+@dataclass
+class RecoveryInfo:
+    """Information about an active shard recovery"""
+
+    schema_name: str
+    table_name: str
+    shard_id: int
+    node_name: str
+    node_id: str
+    recovery_type: str  # PEER, DISK, etc.
+    stage: str  # INIT, INDEX, VERIFY_INDEX, TRANSLOG, FINALIZE, DONE
+    files_percent: float
+    bytes_percent: float
+    total_time_ms: int
+    routing_state: str  # INITIALIZING, RELOCATING, etc.
+    current_state: str  # from allocations
+    is_primary: bool
+    size_bytes: int
+    source_node_name: Optional[str] = None  # Source node for PEER recoveries
+    translog_size_bytes: int = 0  # Translog size in bytes
+
+    @property
+    def overall_progress(self) -> float:
+        """Calculate overall progress percentage"""
+        return max(self.files_percent, self.bytes_percent)
+
+    @property
+    def size_gb(self) -> float:
+        """Size in GB"""
+        return self.size_bytes / (1024**3)
+
+    @property
+    def shard_type(self) -> str:
+        return "PRIMARY" if self.is_primary else "REPLICA"
+
+    @property
+    def total_time_seconds(self) -> float:
+        """Total time in seconds"""
+        return self.total_time_ms / 1000.0
+
+    @property
+    def translog_size_gb(self) -> float:
+        """Translog size in GB"""
+        return self.translog_size_bytes / (1024**3)
+
+    @property
+    def translog_percentage(self) -> float:
+        """Translog size as percentage of shard size"""
+        return (self.translog_size_bytes / self.size_bytes * 100) if self.size_bytes > 0 else 0
diff --git a/cratedb_toolkit/admin/xmover/recovery.py b/cratedb_toolkit/admin/xmover/recovery.py
new file mode 100644
index 00000000..958aed15
--- /dev/null
+++ b/cratedb_toolkit/admin/xmover/recovery.py
@@ -0,0 +1,384 @@
+import dataclasses
+import time
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+
+from rich.console import Console
+
+from cratedb_toolkit.admin.xmover.database import CrateDBClient
+from cratedb_toolkit.admin.xmover.model import RecoveryInfo
+from cratedb_toolkit.admin.xmover.util import format_translog_info
+
+console = Console()
+
+
+@dataclasses.dataclass
+class RecoveryOptions:
+    table: Optional[str] = None
+    node: Optional[str] = None
+    refresh_interval: int = 10
+    include_transitioning: bool = False
+    recovery_type: Optional[str] = None
+
+
+class RecoveryMonitor:
+    """Monitor shard recovery operations"""
+
+    def __init__(self, client: CrateDBClient, options: RecoveryOptions):
+        self.client = client
+        self.options = options
+
+    def get_cluster_recovery_status(self) -> List[RecoveryInfo]:
+        """Get comprehensive recovery status with minimal cluster impact"""
+
+        # Get all recovering shards using the efficient combined query
+        recoveries = self.client.get_all_recovering_shards(
+            self.options.table, self.options.node, self.options.include_transitioning
+        )
+
+        # Apply recovery type filter
+        if self.options.recovery_type is not None:
+            recoveries = [r for r in recoveries if r.recovery_type.upper() == self.options.recovery_type.upper()]
+
+        return recoveries
+
+    def get_recovery_summary(self, recoveries: List[RecoveryInfo]) -> Dict[str, Any]:
+        """Generate a summary of recovery operations"""
+
+        if not recoveries:
+            return {"total_recoveries": 0, "by_type": {}, "by_stage": {}, "avg_progress": 0.0, "total_size_gb": 0.0}
+
+        # Group by recovery type
+        by_type = {}
+        by_stage = {}
+        total_progress = 0.0
+        total_size_gb = 0.0
+
+        for recovery in recoveries:
+            # By type
+            if recovery.recovery_type not in by_type:
+                by_type[recovery.recovery_type] = {"count": 0, "total_size_gb": 0.0, "avg_progress": 0.0}
+            by_type[recovery.recovery_type]["count"] += 1
+            by_type[recovery.recovery_type]["total_size_gb"] += recovery.size_gb
+
+            # By stage
+            if recovery.stage not in by_stage:
+                by_stage[recovery.stage] = 0
+            by_stage[recovery.stage] += 1
+
+            # Totals
+            total_progress += recovery.overall_progress
+            total_size_gb += recovery.size_gb
+
+        # Calculate averages
+        for type_name, rec_type in by_type.items():
+            if rec_type["count"] > 0:
+                type_recoveries = [r for r in recoveries if r.recovery_type == type_name]
+                if type_recoveries:
+                    rec_type["avg_progress"] = sum(r.overall_progress for r in type_recoveries) / len(type_recoveries)
+
+        return {
+            "total_recoveries": len(recoveries),
+            "by_type": by_type,
+            "by_stage": by_stage,
+            "avg_progress": total_progress / len(recoveries) if recoveries else 0.0,
+            "total_size_gb": total_size_gb,
+        }
+
+    def format_recovery_display(self, recoveries: List[RecoveryInfo]) -> str:
+        """Format recovery information for display"""
+
+        if not recoveries:
+            return "✅ No active shard recoveries found"
+
+        # Group by recovery type
+        peer_recoveries = [r for r in recoveries if r.recovery_type == "PEER"]
+        disk_recoveries = [r for r in recoveries if r.recovery_type == "DISK"]
+        other_recoveries = [r for r in recoveries if r.recovery_type not in ["PEER", "DISK"]]
+
+        output = [f"\n🔄 Active Shard Recoveries ({len(recoveries)} total)"]
+        output.append("=" * 80)
+
+        if peer_recoveries:
+            output.append(f"\n📡 PEER Recoveries ({len(peer_recoveries)})")
+            output.append(self._format_recovery_table(peer_recoveries))
+
+        if disk_recoveries:
+            output.append(f"\n💾 DISK Recoveries ({len(disk_recoveries)})")
+            output.append(self._format_recovery_table(disk_recoveries))
+
+        if other_recoveries:
+            output.append(f"\n🔧 Other Recoveries ({len(other_recoveries)})")
+            output.append(self._format_recovery_table(other_recoveries))
+
+        # Add summary
+        summary = self.get_recovery_summary(recoveries)
+        output.append("\n📊 Summary:")
+        output.append(f"   Total size: {summary['total_size_gb']:.1f} GB")
+        output.append(f"   Average progress: {summary['avg_progress']:.1f}%")
+
+        return "\n".join(output)
+
+    def _format_recovery_table(self, recoveries: List[RecoveryInfo]) -> str:
+        """Format a table of recovery information"""
+
+        if not recoveries:
+            return "   No recoveries of this type"
+
+        # Table headers
+        headers = ["Table", "Shard", "Node", "Type", "Stage", "Progress", "Size(GB)", "Time(s)"]
+
+        # Calculate column widths
+        col_widths = [len(h) for h in headers]
+
+        rows = []
+        for recovery in recoveries:
+            row = [
+                f"{recovery.schema_name}.{recovery.table_name}",
+                str(recovery.shard_id),
+                recovery.node_name,
+                recovery.shard_type,
+                recovery.stage,
+                f"{recovery.overall_progress:.1f}%",
+                f"{recovery.size_gb:.1f}",
+                f"{recovery.total_time_seconds:.1f}",
+            ]
+            rows.append(row)
+
+            # Update column widths
+            for i, cell in enumerate(row):
+                col_widths[i] = max(col_widths[i], len(cell))
+
+        # Format table
+        output = []
+
+        # Header row
+        header_row = "   " + " | ".join(h.ljust(w) for h, w in zip(headers, col_widths))
+        output.append(header_row)
+        output.append("   " + "-" * (len(header_row) - 3))
+
+        # Data rows
+        for row in rows:
+            data_row = "   " + " | ".join(cell.ljust(w) for cell, w in zip(row, col_widths))
+            output.append(data_row)
+
+        return "\n".join(output)
+
+    def start(self, watch: bool, debug: bool = False):
+        try:
+            if watch:
+                console.print(f"🔄 Monitoring shard recoveries (refreshing every {self.options.refresh_interval}s)")
+                console.print("Press Ctrl+C to stop")
+                console.print()
+
+                try:
+                    # Show header once
+                    console.print("📊 Recovery Progress Monitor")
+                    console.print("=" * 80)
+
+                    # Track previous state for change detection
+                    previous_recoveries: Dict[str, Dict[str, Any]] = {}
+                    previous_timestamp = None
+                    first_run = True
+
+                    while True:
+                        # Get current recovery status
+                        recoveries = self.get_cluster_recovery_status()
+
+                        # Display current time
+                        current_time = datetime.now().strftime("%H:%M:%S")
+
+                        # Check for any changes
+                        changes = []
+                        active_count = 0
+                        completed_count = 0
+
+                        for recovery in recoveries:
+                            recovery_key = (
+                                f"{recovery.schema_name}.{recovery.table_name}.{recovery.shard_id}.{recovery.node_name}"
+                            )
+
+                            # Create complete table name
+                            if recovery.schema_name == "doc":
+                                table_display = recovery.table_name
+                            else:
+                                table_display = f"{recovery.schema_name}.{recovery.table_name}"
+
+                            # Count active vs completed
+                            if recovery.stage == "DONE" and recovery.overall_progress >= 100.0:
+                                completed_count += 1
+                            else:
+                                active_count += 1
+
+                            # Check for changes since last update
+                            if recovery_key in previous_recoveries:
+                                prev = previous_recoveries[recovery_key]
+                                if prev["progress"] != recovery.overall_progress:
+                                    diff = recovery.overall_progress - prev["progress"]
+                                    # Create node route display
+                                    node_route = ""
+                                    if recovery.recovery_type == "PEER" and recovery.source_node_name:
+                                        node_route = f" {recovery.source_node_name} → {recovery.node_name}"
+                                    elif recovery.recovery_type == "DISK":
+                                        node_route = f" disk → {recovery.node_name}"
+
+                                    # Add translog info
+                                    translog_info = format_translog_info(recovery)
+
+                                    if diff > 0:
+                                        changes.append(
+                                            f"[green]📈[/green] {table_display} S{recovery.shard_id} "
+                                            f"{recovery.overall_progress:.1f}% (+{diff:.1f}%) "
+                                            f"{recovery.size_gb:.1f}GB{translog_info}{node_route}"
+                                        )
+                                    else:
+                                        changes.append(
+                                            f"[yellow]📉[/yellow] {table_display} S{recovery.shard_id} "
+                                            f"{recovery.overall_progress:.1f}% ({diff:.1f}%) "
+                                            f"{recovery.size_gb:.1f}GB{translog_info}{node_route}"
+                                        )
+                                elif prev["stage"] != recovery.stage:
+                                    # Create node route display
+                                    node_route = ""
+                                    if recovery.recovery_type == "PEER" and recovery.source_node_name:
+                                        node_route = f" {recovery.source_node_name} → {recovery.node_name}"
+                                    elif recovery.recovery_type == "DISK":
+                                        node_route = f" disk → {recovery.node_name}"
+
+                                    # Add translog info
+                                    translog_info = format_translog_info(recovery)
+
+                                    changes.append(
+                                        f"[blue]🔄[/blue] {table_display} S{recovery.shard_id} "
+                                        f"{prev['stage']}→{recovery.stage} "
+                                        f"{recovery.size_gb:.1f}GB{translog_info}{node_route}"
+                                    )
+                            else:
+                                # New recovery - show based on include_transitioning flag or first run
+                                if (
+                                    first_run
+                                    or self.options.include_transitioning
+                                    or (recovery.overall_progress < 100.0 or recovery.stage != "DONE")
+                                ):
+                                    # Create node route display
+                                    node_route = ""
+                                    if recovery.recovery_type == "PEER" and recovery.source_node_name:
+                                        node_route = f" {recovery.source_node_name} → {recovery.node_name}"
+                                    elif recovery.recovery_type == "DISK":
+                                        node_route = f" disk → {recovery.node_name}"
+
+                                    status_icon = "[cyan]🆕[/cyan]" if not first_run else "[blue]📋[/blue]"
+                                    # Add translog info
+                                    translog_info = format_translog_info(recovery)
+
+                                    changes.append(
+                                        f"{status_icon} {table_display} S{recovery.shard_id} "
+                                        f"{recovery.stage} {recovery.overall_progress:.1f}% "
+                                        f"{recovery.size_gb:.1f}GB{translog_info}{node_route}"
+                                    )
+
+                            # Store current state for next comparison
+                            previous_recoveries[recovery_key] = {
+                                "progress": recovery.overall_progress,
+                                "stage": recovery.stage,
+                            }
+
+                        # Always show a status line
+                        if not recoveries:
+                            console.print(f"{current_time} | [green]No recoveries - cluster stable[/green]")
+                            previous_recoveries.clear()
+                        else:
+                            # Build status message
+                            status = ""
+                            if active_count > 0:
+                                status = f"{active_count} active"
+                            if completed_count > 0:
+                                status += f", {completed_count} done" if status else f"{completed_count} done"
+
+                            # Show status line with changes or periodic update
+                            if changes:
+                                console.print(f"{current_time} | {status}")
+                                for change in changes:
+                                    console.print(f"         | {change}")
+                            else:
+                                # Show periodic status even without changes
+                                if self.options.include_transitioning and completed_count > 0:
+                                    console.print(f"{current_time} | {status} (transitioning)")
+                                elif active_count > 0:
+                                    console.print(f"{current_time} | {status} (no changes)")
+
+                        previous_timestamp = current_time  # noqa: F841
+                        first_run = False
+                        time.sleep(self.options.refresh_interval)
+
+                except KeyboardInterrupt:
+                    console.print("\n\n[yellow]⏹  Monitoring stopped by user[/yellow]")
+
+                    # Show final summary
+                    final_recoveries = self.get_cluster_recovery_status()
+
+                    if final_recoveries:
+                        console.print("\n📊 [bold]Final Recovery Summary:[/bold]")
+                        summary = self.get_recovery_summary(final_recoveries)
+
+                        # Count active vs completed
+                        active_count = len(
+                            [r for r in final_recoveries if r.overall_progress < 100.0 or r.stage != "DONE"]
+                        )
+                        completed_count = len(final_recoveries) - active_count
+
+                        console.print(f"   Total recoveries: {summary['total_recoveries']}")
+                        console.print(f"   Active: {active_count}, Completed: {completed_count}")
+                        console.print(f"   Total size: {summary['total_size_gb']:.1f} GB")
+                        console.print(f"   Average progress: {summary['avg_progress']:.1f}%")
+
+                        if summary["by_type"]:
+                            console.print("   By recovery type:")
+                            for rec_type, stats in summary["by_type"].items():
+                                console.print(
+                                    f"     {rec_type}: {stats['count']} recoveries, "
+                                    f"{stats['avg_progress']:.1f}% avg progress"
+                                )
+                    else:
+                        console.print("\n[green]✅ No active recoveries at exit[/green]")
+
+                    return
+
+            else:
+                # Single status check
+                recoveries = self.get_cluster_recovery_status()
+
+                display_output = self.format_recovery_display(recoveries)
+                console.print(display_output)
+
+                if not recoveries:
+                    if self.options.include_transitioning:
+                        console.print("\n[green]✅ No recoveries found (active or transitioning)[/green]")
+                    else:
+                        console.print("\n[green]✅ No active recoveries found[/green]")
+                        console.print(
+                            "[dim]💡 Use --include-transitioning to see completed recoveries still transitioning[/dim]"
+                        )
+                else:
+                    # Show summary
+                    summary = self.get_recovery_summary(recoveries)
+                    console.print("\n📊 [bold]Recovery Summary:[/bold]")
+                    console.print(f"   Total recoveries: {summary['total_recoveries']}")
+                    console.print(f"   Total size: {summary['total_size_gb']:.1f} GB")
+                    console.print(f"   Average progress: {summary['avg_progress']:.1f}%")
+
+                    # Show breakdown by type
+                    if summary["by_type"]:
+                        console.print("\n   By recovery type:")
+                        for rec_type, stats in summary["by_type"].items():
+                            console.print(
+                                f"     {rec_type}: {stats['count']} recoveries, "
+                                f"{stats['avg_progress']:.1f}% avg progress"
+                            )
+
+                    console.print("\n[dim]💡 Use --watch flag for continuous monitoring[/dim]")
+
+        except Exception as e:
+            console.print(f"[red]❌ Error monitoring recoveries: {e}[/red]")
+            if debug:
+                raise
diff --git a/cratedb_toolkit/admin/xmover/util.py b/cratedb_toolkit/admin/xmover/util.py
new file mode 100644
index 00000000..82c8a3d0
--- /dev/null
+++ b/cratedb_toolkit/admin/xmover/util.py
@@ -0,0 +1,45 @@
+def format_size(size_gb: float) -> str:
+    """Format size in GB with appropriate precision"""
+    if size_gb >= 1000:
+        return f"{size_gb / 1000:.1f}TB"
+    elif size_gb >= 1:
+        return f"{size_gb:.1f}GB"
+    else:
+        return f"{size_gb * 1000:.0f}MB"
+
+
+def format_percentage(value: float) -> str:
+    """Format percentage with color coding"""
+    color = "green"
+    if value > 80:
+        color = "red"
+    elif value > 70:
+        color = "yellow"
+    return f"[{color}]{value:.1f}%[/{color}]"
+
+
+def format_translog_info(recovery_info) -> str:
+    """Format translog size information with color coding"""
+    tl_bytes = recovery_info.translog_size_bytes
+
+    # Only show if significant (>10MB for production)
+    if tl_bytes < 10 * 1024 * 1024:  # 10MB for production
+        return ""
+
+    tl_gb = recovery_info.translog_size_gb
+
+    # Color coding based on size
+    if tl_gb >= 5.0:
+        color = "red"
+    elif tl_gb >= 1.0:
+        color = "yellow"
+    else:
+        color = "green"
+
+    # Format size
+    if tl_gb >= 1.0:
+        size_str = f"{tl_gb:.1f}GB"
+    else:
+        size_str = f"{tl_gb * 1000:.0f}MB"
+
+    return f" [dim]([{color}]TL:{size_str}[/{color}])[/dim]"

From 99f941da0a4fc6a4d329b949d85994aa48b25d59 Mon Sep 17 00:00:00 2001
From: Andreas Motl <andreas.motl@crate.io>
Date: Wed, 20 Aug 2025 19:44:50 +0200
Subject: [PATCH 07/18] Admin/XMover: Refactor -- "recommender"

---
 cratedb_toolkit/admin/xmover/analyzer.py    | 103 ++----
 cratedb_toolkit/admin/xmover/cli.py         | 355 ++-----------------
 cratedb_toolkit/admin/xmover/model.py       |  68 +++-
 cratedb_toolkit/admin/xmover/recommender.py | 366 ++++++++++++++++++++
 4 files changed, 476 insertions(+), 416 deletions(-)
 create mode 100644 cratedb_toolkit/admin/xmover/recommender.py

diff --git a/cratedb_toolkit/admin/xmover/analyzer.py b/cratedb_toolkit/admin/xmover/analyzer.py
index f9b8d6a9..311b1a33 100644
--- a/cratedb_toolkit/admin/xmover/analyzer.py
+++ b/cratedb_toolkit/admin/xmover/analyzer.py
@@ -5,67 +5,14 @@
 import logging
 import math
 from collections import defaultdict
-from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Set, Tuple, Union
 
 from .database import CrateDBClient
-from .model import NodeInfo, ShardInfo
+from .model import DistributionStats, MoveRecommendation, NodeInfo, RecommendationConstraints, ShardInfo
 
 logger = logging.getLogger(__name__)
 
 
-@dataclass
-class MoveRecommendation:
-    """Recommendation for moving a shard"""
-
-    table_name: str
-    schema_name: str
-    shard_id: int
-    from_node: str
-    to_node: str
-    from_zone: str
-    to_zone: str
-    shard_type: str
-    size_gb: float
-    reason: str
-
-    def to_sql(self) -> str:
-        """Generate the SQL command for this move"""
-        return (
-            f'ALTER TABLE "{self.schema_name}"."{self.table_name}" '
-            f"REROUTE MOVE SHARD {self.shard_id} "
-            f"FROM '{self.from_node}' TO '{self.to_node}';"
-        )
-
-    @property
-    def safety_score(self) -> float:
-        """Calculate a safety score for this move (0-1, higher is safer)"""
-        score = 1.0
-
-        # Penalize if moving to same zone (not ideal for zone distribution)
-        if self.from_zone == self.to_zone:
-            score -= 0.3
-
-        # Bonus for zone balancing moves
-        if "rebalancing" in self.reason.lower():
-            score += 0.2
-
-        # Ensure score stays in valid range
-        return max(0.0, min(1.0, score))
-
-
-@dataclass
-class DistributionStats:
-    """Statistics about shard distribution"""
-
-    total_shards: int
-    total_size_gb: float
-    zones: Dict[str, int]
-    nodes: Dict[str, int]
-    zone_balance_score: float  # 0-100, higher is better
-    node_balance_score: float  # 0-100, higher is better
-
-
 class ShardAnalyzer:
     """Analyzer for CrateDB shard distribution and rebalancing"""
 
@@ -227,18 +174,7 @@ def find_nodes_with_capacity(
         available_nodes.sort(key=lambda n: n.available_space_gb, reverse=True)
         return available_nodes
 
-    def generate_rebalancing_recommendations(
-        self,
-        table_name: Optional[str] = None,
-        min_size_gb: float = 40.0,
-        max_size_gb: float = 60.0,
-        zone_tolerance_percent: float = 10.0,
-        min_free_space_gb: float = 100.0,
-        max_recommendations: int = 10,
-        prioritize_space: bool = False,
-        source_node: Optional[str] = None,
-        max_disk_usage_percent: float = 90.0,
-    ) -> List[MoveRecommendation]:
+    def generate_rebalancing_recommendations(self, constraints: RecommendationConstraints) -> List[MoveRecommendation]:
         """Generate recommendations for rebalancing shards
 
         Args:
@@ -250,15 +186,18 @@ def generate_rebalancing_recommendations(
         recommendations: List[MoveRecommendation] = []
 
         # Get moveable shards (only healthy ones for actual operations)
-        moveable_shards = self.find_moveable_shards(min_size_gb, max_size_gb, table_name)
+        moveable_shards = self.find_moveable_shards(constraints.min_size, constraints.max_size, constraints.table_name)
 
-        print(f"Analyzing {len(moveable_shards)} candidate shards in size range {min_size_gb}-{max_size_gb}GB...")
+        print(
+            f"Analyzing {len(moveable_shards)} candidate shards "
+            f"in size range {constraints.min_size}-{constraints.max_size}GB..."
+        )
 
         if not moveable_shards:
             return recommendations
 
         # Analyze current zone balance
-        zone_stats = self.check_zone_balance(table_name, zone_tolerance_percent)
+        zone_stats = self.check_zone_balance(constraints.table_name, constraints.zone_tolerance)
 
         # Calculate target distribution
         total_shards = sum(stats["TOTAL"] for stats in zone_stats.values())
@@ -271,8 +210,8 @@ def generate_rebalancing_recommendations(
 
         for zone, stats in zone_stats.items():
             current_count = stats["TOTAL"]
-            threshold_high = target_per_zone * (1 + zone_tolerance_percent / 100)
-            threshold_low = target_per_zone * (1 - zone_tolerance_percent / 100)
+            threshold_high = target_per_zone * (1 + constraints.zone_tolerance / 100)
+            threshold_low = target_per_zone * (1 - constraints.zone_tolerance / 100)
 
             if current_count > threshold_high:
                 overloaded_zones.append(zone)
@@ -280,9 +219,9 @@ def generate_rebalancing_recommendations(
                 underloaded_zones.append(zone)
 
         # Optimize processing: if filtering by source node, only process those shards
-        if source_node:
-            processing_shards = [s for s in moveable_shards if s.node_name == source_node]
-            print(f"Focusing on {len(processing_shards)} shards from node {source_node}")
+        if constraints.source_node:
+            processing_shards = [s for s in moveable_shards if s.node_name == constraints.source_node]
+            print(f"Focusing on {len(processing_shards)} shards from node {constraints.source_node}")
         else:
             processing_shards = moveable_shards
 
@@ -295,7 +234,7 @@ def generate_rebalancing_recommendations(
                 logger.info(f"Shard not found: {i}")
                 continue
 
-            if len(recommendations) >= max_recommendations:
+            if len(recommendations) >= constraints.max_recommendations:
                 logger.info(f"Found {len(recommendations)} recommendations for shard: {shard.shard_id}")
                 break
 
@@ -306,7 +245,7 @@ def generate_rebalancing_recommendations(
             total_evaluated += 1
 
             # Skip based on priority mode
-            if not prioritize_space:
+            if not constraints.prioritize_space:
                 # Zone balancing mode: only move shards from overloaded zones
                 if shard.zone not in overloaded_zones:
                     continue
@@ -316,13 +255,13 @@ def generate_rebalancing_recommendations(
             target_nodes = self._find_nodes_with_capacity_cached(
                 required_space_gb=shard.size_gb,
                 exclude_nodes={shard.node_name},  # Don't move to same node
-                min_free_space_gb=min_free_space_gb,
-                max_disk_usage_percent=max_disk_usage_percent,
+                min_free_space_gb=constraints.min_free_space,
+                max_disk_usage_percent=constraints.max_disk_usage,
             )
 
             # Quick pre-filter to avoid expensive safety validations
             # Only check nodes in different zones (for zone balancing)
-            if not prioritize_space:
+            if not constraints.prioritize_space:
                 target_nodes = [node for node in target_nodes if node.zone != shard.zone]
 
             # Limit to top 3 candidates to reduce validation overhead
@@ -346,7 +285,7 @@ def generate_rebalancing_recommendations(
                 )
 
                 # Check if this move would be safe
-                is_safe, safety_msg = self.validate_move_safety(temp_rec, max_disk_usage_percent)
+                is_safe, safety_msg = self.validate_move_safety(temp_rec, constraints.max_disk_usage)
                 if is_safe:
                     safe_target_nodes.append(candidate_node)
 
@@ -354,7 +293,7 @@ def generate_rebalancing_recommendations(
                 continue  # No safe targets found, skip this shard
 
             target_node: NodeInfo
-            if prioritize_space:
+            if constraints.prioritize_space:
                 # Space priority mode: choose node with most available space
                 target_node = safe_target_nodes[0]  # Already sorted by available space (desc)
             else:
@@ -384,7 +323,7 @@ def generate_rebalancing_recommendations(
                     continue  # No suitable target found
 
             # Determine the reason for the move
-            if prioritize_space:
+            if constraints.prioritize_space:
                 if shard.zone == target_node.zone:
                     reason = f"Space optimization within {shard.zone}"
                 else:
diff --git a/cratedb_toolkit/admin/xmover/cli.py b/cratedb_toolkit/admin/xmover/cli.py
index 2ad88d15..065b89be 100644
--- a/cratedb_toolkit/admin/xmover/cli.py
+++ b/cratedb_toolkit/admin/xmover/cli.py
@@ -1,9 +1,10 @@
 """
-Command line interface for XMover - CrateDB Shard Analyzer and Movement Tool
+XMover - CrateDB Shard Analyzer and Movement Tool
+
+Command Line Interface.
 """
 
 import sys
-import time
 from typing import Dict, List, Optional, cast
 
 import click
@@ -12,9 +13,10 @@
 from rich.panel import Panel
 from rich.table import Table
 
-from cratedb_toolkit.admin.xmover.model import ShardInfo
+from cratedb_toolkit.admin.xmover.model import MoveRecommendation, RecommendationConstraints, ShardInfo
+from cratedb_toolkit.admin.xmover.recommender import Recommender
 
-from .analyzer import MoveRecommendation, ShardAnalyzer
+from .analyzer import ShardAnalyzer
 from .database import CrateDBClient
 from .recovery import RecoveryMonitor, RecoveryOptions
 from .util import format_percentage, format_size
@@ -264,202 +266,34 @@ def find_candidates(ctx, table: Optional[str], min_size: float, max_size: float,
 def recommend(
     ctx,
     table: Optional[str],
+    node: Optional[str],
     min_size: float,
     max_size: float,
     zone_tolerance: float,
     min_free_space: float,
     max_moves: int,
     max_disk_usage: float,
-    validate: bool,
     prioritize_space: bool,
+    validate: bool,
     dry_run: bool,
     auto_execute: bool,
-    node: Optional[str],
 ):
     """Generate shard movement recommendations for rebalancing"""
-    client = ctx.obj["client"]
-    analyzer = ShardAnalyzer(client)
-
-    # Safety check for auto-execute
-    if auto_execute and dry_run:
-        console.print("[red]❌ Error: --auto-execute requires --execute flag[/red]")
-        console.print("[dim]Use: --execute --auto-execute[/dim]")
-        return
-
-    mode_text = "DRY RUN - Analysis Only" if dry_run else "EXECUTION MODE"
-    console.print(
-        Panel.fit(
-            f"[bold blue]Generating Rebalancing Recommendations[/bold blue] - "
-            f"[bold {'green' if dry_run else 'red'}]{mode_text}[/bold {'green' if dry_run else 'red'}]"
-        )
-    )
-    console.print("[dim]Note: Only analyzing healthy shards (STARTED + 100% recovered) for safe operations[/dim]")
-    console.print("[dim]Zone conflict detection: Prevents moves that would violate CrateDB's zone awareness[/dim]")
-    if prioritize_space:
-        console.print("[dim]Mode: Prioritizing available space over zone balancing[/dim]")
-    else:
-        console.print("[dim]Mode: Prioritizing zone balancing over available space[/dim]")
-
-    if node:
-        console.print(f"[dim]Filtering: Only showing moves from source node '{node}'[/dim]")
-
-    console.print(f"[dim]Safety thresholds: Max disk usage {max_disk_usage}%, Min free space {min_free_space}GB[/dim]")
-
-    if dry_run:
-        console.print("[green]Running in DRY RUN mode - no SQL commands will be generated[/green]")
-    else:
-        console.print("[red]EXECUTION MODE - SQL commands will be generated for actual moves[/red]")
-    console.print()
-
-    recommendations = analyzer.generate_rebalancing_recommendations(
-        table_name=table,
-        min_size_gb=min_size,
-        max_size_gb=max_size,
-        zone_tolerance_percent=zone_tolerance,
-        min_free_space_gb=min_free_space,
-        max_recommendations=max_moves,
-        prioritize_space=prioritize_space,
-        source_node=node,
-        max_disk_usage_percent=max_disk_usage,
+    recommender = Recommender(
+        client=ctx.obj["client"],
+        constraints=RecommendationConstraints(
+            table_name=table,
+            source_node=node,
+            min_size=min_size,
+            max_size=max_size,
+            zone_tolerance=zone_tolerance,
+            min_free_space=min_free_space,
+            max_recommendations=max_moves,
+            max_disk_usage=max_disk_usage,
+            prioritize_space=prioritize_space,
+        ),
     )
-
-    if not recommendations:
-        if node:
-            console.print(f"[yellow]No safe recommendations found for node '{node}'[/yellow]")
-            console.print("[dim]This could be due to:[/dim]")
-            console.print("[dim]  • Zone conflicts preventing safe moves[/dim]")
-            console.print(f"[dim]  • Target nodes exceeding {max_disk_usage}% disk usage threshold[/dim]")
-            console.print(f"[dim]  • Insufficient free space on target nodes (need {min_free_space}GB)[/dim]")
-            console.print(f"[dim]  • No shards in size range {min_size}-{max_size}GB[/dim]")
-            console.print("[dim]Suggestions:[/dim]")
-            console.print("[dim]  • Try: --max-disk-usage 95 (allow higher disk usage)[/dim]")
-            console.print("[dim]  • Try: --min-free-space 50 (reduce space requirements)[/dim]")
-            console.print("[dim]  • Try: different size ranges or remove --node filter[/dim]")
-        else:
-            console.print("[green]No rebalancing recommendations needed. Cluster appears well balanced![/green]")
-        return
-
-    # Show recommendations table
-    rec_table = Table(title=f"Rebalancing Recommendations ({len(recommendations)} moves)", box=box.ROUNDED)
-    rec_table.add_column("Table", style="cyan")
-    rec_table.add_column("Shard", justify="right", style="magenta")
-    rec_table.add_column("Type", style="blue")
-    rec_table.add_column("From Node", style="red")
-    rec_table.add_column("To Node", style="green")
-    rec_table.add_column("Target Free Space", justify="right", style="cyan")
-    rec_table.add_column("Zone Change", style="yellow")
-    rec_table.add_column("Size", justify="right", style="white")
-    rec_table.add_column("Reason", style="dim")
-    if validate:
-        rec_table.add_column("Safety Check", style="bold")
-
-    # Create a mapping of node names to available space for display
-    node_space_map = {node.name: node.available_space_gb for node in analyzer.nodes}
-
-    for rec in recommendations:
-        zone_change = f"{rec.from_zone} → {rec.to_zone}" if rec.from_zone != rec.to_zone else rec.from_zone
-        target_free_space = node_space_map.get(rec.to_node, 0)
-
-        row = [
-            f"{rec.schema_name}.{rec.table_name}",
-            str(rec.shard_id),
-            rec.shard_type,
-            rec.from_node,
-            rec.to_node,
-            format_size(target_free_space),
-            zone_change,
-            format_size(rec.size_gb),
-            rec.reason,
-        ]
-
-        if validate:
-            is_safe, safety_msg = analyzer.validate_move_safety(rec, max_disk_usage_percent=max_disk_usage)
-            safety_status = "[green]✓ SAFE[/green]" if is_safe else f"[red]✗ {safety_msg}[/red]"
-            row.append(safety_status)
-
-        rec_table.add_row(*row)
-
-    console.print(rec_table)
-    console.print()
-
-    # Generate SQL commands or show dry-run analysis
-    if dry_run:
-        console.print(Panel.fit("[bold yellow]Dry Run Analysis - No Commands Generated[/bold yellow]"))
-        console.print("[dim]# This is a dry run - showing what would be recommended[/dim]")
-        console.print("[dim]# Use --execute flag to generate actual SQL commands[/dim]")
-        console.print()
-
-        safe_moves = 0
-        zone_conflicts = 0
-        space_issues = 0
-
-        for i, rec in enumerate(recommendations, 1):
-            if validate:
-                is_safe, safety_msg = analyzer.validate_move_safety(rec, max_disk_usage_percent=max_disk_usage)
-                if not is_safe:
-                    if "zone conflict" in safety_msg.lower():
-                        zone_conflicts += 1
-                        console.print(f"[yellow]⚠ Move {i}: WOULD BE SKIPPED - {safety_msg}[/yellow]")
-                    elif "space" in safety_msg.lower():
-                        space_issues += 1
-                        console.print(f"[yellow]⚠ Move {i}: WOULD BE SKIPPED - {safety_msg}[/yellow]")
-                    else:
-                        console.print(f"[yellow]⚠ Move {i}: WOULD BE SKIPPED - {safety_msg}[/yellow]")
-                    continue
-                safe_moves += 1
-
-            console.print(f"[green]✓ Move {i}: WOULD EXECUTE - {rec.reason}[/green]")
-            console.print(f"[dim]  Target SQL: {rec.to_sql()}[/dim]")
-
-        console.print()
-        console.print("[bold]Dry Run Summary:[/bold]")
-        console.print(f"  • Safe moves that would execute: [green]{safe_moves}[/green]")
-        console.print(f"  • Zone conflicts prevented: [yellow]{zone_conflicts}[/yellow]")
-        console.print(f"  • Space-related issues: [yellow]{space_issues}[/yellow]")
-        if safe_moves > 0:
-            console.print(
-                f"\n[green]✓ Ready to execute {safe_moves} safe moves. Use --execute to generate SQL commands.[/green]"
-            )
-        else:
-            console.print("\n[yellow]⚠ No safe moves identified. Review cluster balance or adjust parameters.[/yellow]")
-    else:
-        console.print(Panel.fit("[bold green]Generated SQL Commands[/bold green]"))
-        console.print("[dim]# Copy and paste these commands to execute the moves[/dim]")
-        console.print("[dim]# ALWAYS test in a non-production environment first![/dim]")
-        console.print("[dim]# These commands only operate on healthy shards (STARTED + fully recovered)[/dim]")
-        console.print("[dim]# Commands use quoted identifiers for schema and table names[/dim]")
-        console.print()
-
-        safe_moves = 0
-        zone_conflicts = 0
-        for i, rec in enumerate(recommendations, 1):
-            if validate:
-                is_safe, safety_msg = analyzer.validate_move_safety(rec, max_disk_usage_percent=max_disk_usage)
-                if not is_safe:
-                    if "Zone conflict" in safety_msg:
-                        zone_conflicts += 1
-                        console.print(f"-- Move {i}: SKIPPED - {safety_msg}")
-                        console.print("--   Tip: Try moving to a different zone or check existing shard distribution")
-                    else:
-                        console.print(f"-- Move {i}: SKIPPED - {safety_msg}")
-                    continue
-                safe_moves += 1
-
-            console.print(f"-- Move {i}: {rec.reason}")
-            console.print(f"{rec.to_sql()}")
-        console.print()
-
-        # Auto-execution if requested
-        if auto_execute:
-            _execute_recommendations_safely(client, recommendations, validate)
-
-    if validate and safe_moves < len(recommendations):
-        if zone_conflicts > 0:
-            console.print(f"[yellow]Warning: {zone_conflicts} moves skipped due to zone conflicts[/yellow]")
-            console.print("[yellow]Tip: Use 'find-candidates' to see current shard distribution across zones[/yellow]")
-        console.print(
-            f"[yellow]Warning: Only {safe_moves} of {len(recommendations)} moves passed safety validation[/yellow]"
-        )
+    recommender.start(auto_execute=auto_execute, validate=validate, dry_run=dry_run)
 
 
 @main.command()
@@ -968,150 +802,5 @@ def monitor_recovery(
     recovery_monitor.start(watch=watch, debug=ctx.obj.get("debug"))
 
 
-def _wait_for_recovery_capacity(client, max_concurrent_recoveries: int = 5):
-    """Wait until active recovery count is below threshold"""
-
-    recovery_monitor = RecoveryMonitor(client, RecoveryOptions(include_transitioning=True))
-    wait_time = 0
-
-    while True:
-        # Check active recoveries (including transitioning)
-        recoveries = recovery_monitor.get_cluster_recovery_status()
-        active_count = len([r for r in recoveries if r.overall_progress < 100.0 or r.stage != "DONE"])
-        status = f"{active_count}/{max_concurrent_recoveries}"
-        if active_count < max_concurrent_recoveries:
-            if wait_time > 0:
-                console.print(f"    [green]✓ Recovery capacity available ({status} active)[/green]")
-            break
-        if wait_time == 0:
-            console.print(f"    [yellow]⏳ Waiting for recovery capacity... ({status} active)[/yellow]")
-        elif wait_time % 30 == 0:  # Update every 30 seconds
-            console.print(f"    [yellow]⏳ Still waiting... ({status} active)[/yellow]")
-
-        time.sleep(10)  # Check every 10 seconds
-        wait_time += 10
-
-
-def _execute_recommendations_safely(client, recommendations, validate: bool):
-    """Execute recommendations with extensive safety measures"""
-
-    # Filter to only safe recommendations
-    safe_recommendations = []
-    if validate:
-        analyzer = ShardAnalyzer(client)
-        for rec in recommendations:
-            is_safe, safety_msg = analyzer.validate_move_safety(rec, max_disk_usage_percent=95.0)
-            if is_safe:
-                safe_recommendations.append(rec)
-    else:
-        safe_recommendations = recommendations
-
-    if not safe_recommendations:
-        console.print("[yellow]⚠ No safe recommendations to execute[/yellow]")
-        return
-
-    console.print("\n[bold red]🚨 AUTO-EXECUTION MODE 🚨[/bold red]")
-    console.print(f"About to execute {len(safe_recommendations)} shard moves automatically:")
-    console.print()
-
-    # Show what will be executed
-    for i, rec in enumerate(safe_recommendations, 1):
-        table_display = f"{rec.schema_name}.{rec.table_name}" if rec.schema_name != "doc" else rec.table_name
-        console.print(f"  {i}. {table_display} S{rec.shard_id} ({rec.size_gb:.1f}GB) {rec.from_node} → {rec.to_node}")
-
-    console.print()
-    console.print("[bold yellow]⚠ SAFETY WARNINGS:[/bold yellow]")
-    console.print("  • These commands will immediately start shard movements")
-    console.print("  • Each move will temporarily impact cluster performance")
-    console.print("  • Recovery time depends on shard size and network speed")
-    console.print("  • You should monitor progress with: xmover monitor-recovery --watch")
-    console.print()
-
-    # Double confirmation
-    try:
-        response1 = input("Type 'EXECUTE' to proceed with automatic execution: ").strip()
-        if response1 != "EXECUTE":
-            console.print("[yellow]❌ Execution cancelled[/yellow]")
-            return
-
-        response2 = input(f"Confirm: Execute {len(safe_recommendations)} shard moves? (yes/no): ").strip().lower()
-        if response2 not in ["yes", "y"]:
-            console.print("[yellow]❌ Execution cancelled[/yellow]")
-            return
-
-    except KeyboardInterrupt:
-        console.print("\n[yellow]❌ Execution cancelled by user[/yellow]")
-        return
-
-    console.print(f"\n🚀 [bold green]Executing {len(safe_recommendations)} shard moves...[/bold green]")
-    console.print()
-
-    successful_moves = 0
-    failed_moves = 0
-
-    for i, rec in enumerate(safe_recommendations, 1):
-        table_display = f"{rec.schema_name}.{rec.table_name}" if rec.schema_name != "doc" else rec.table_name
-        sql_command = rec.to_sql()
-
-        console.print(
-            f"[{i}/{len(safe_recommendations)}] Executing: {table_display} S{rec.shard_id} ({rec.size_gb:.1f}GB)"
-        )
-        console.print(f"    {rec.from_node} → {rec.to_node}")
-
-        try:
-            # Execute the SQL command
-            result = client.execute_query(sql_command)
-
-            if result.get("rowcount", 0) >= 0:  # Success indicator for ALTER statements
-                console.print("    [green]✅ SUCCESS[/green] - Move initiated")
-                successful_moves += 1
-
-                # Smart delay: check active recoveries before next move
-                if i < len(safe_recommendations):
-                    _wait_for_recovery_capacity(client, max_concurrent_recoveries=5)
-            else:
-                console.print(f"    [red]❌ FAILED[/red] - Unexpected result: {result}")
-                failed_moves += 1
-
-        except Exception as e:
-            console.print(f"    [red]❌ FAILED[/red] - Error: {e}")
-            failed_moves += 1
-
-            # Ask whether to continue after a failure
-            if i < len(safe_recommendations):
-                try:
-                    continue_response = (
-                        input(f"    Continue with remaining {len(safe_recommendations) - i} moves? (yes/no): ")
-                        .strip()
-                        .lower()
-                    )
-                    if continue_response not in ["yes", "y"]:
-                        console.print("[yellow]⏹ Execution stopped by user[/yellow]")
-                        break
-                except KeyboardInterrupt:
-                    console.print("\n[yellow]⏹ Execution stopped by user[/yellow]")
-                    break
-
-        console.print()
-
-    # Final summary
-    console.print("📊 [bold]Execution Summary:[/bold]")
-    console.print(f"   Successful moves: [green]{successful_moves}[/green]")
-    console.print(f"   Failed moves: [red]{failed_moves}[/red]")
-    console.print(f"   Total attempted: {successful_moves + failed_moves}")
-
-    if successful_moves > 0:
-        console.print()
-        console.print("[green]✅ Shard moves initiated successfully![/green]")
-        console.print("[dim]💡 Monitor progress with:[/dim]")
-        console.print("[dim]   xmover monitor-recovery --watch[/dim]")
-        console.print("[dim]💡 Check cluster status with:[/dim]")
-        console.print("[dim]   xmover analyze[/dim]")
-
-    if failed_moves > 0:
-        console.print()
-        console.print(f"[yellow]⚠ {failed_moves} moves failed - check cluster status and retry if needed[/yellow]")
-
-
 if __name__ == "__main__":
     main()
diff --git a/cratedb_toolkit/admin/xmover/model.py b/cratedb_toolkit/admin/xmover/model.py
index 4d271445..12286597 100644
--- a/cratedb_toolkit/admin/xmover/model.py
+++ b/cratedb_toolkit/admin/xmover/model.py
@@ -1,5 +1,6 @@
+import dataclasses
 from dataclasses import dataclass
-from typing import Optional
+from typing import Dict, Optional
 
 
 @dataclass
@@ -99,3 +100,68 @@ def translog_size_gb(self) -> float:
     def translog_percentage(self) -> float:
         """Translog size as percentage of shard size"""
         return (self.translog_size_bytes / self.size_bytes * 100) if self.size_bytes > 0 else 0
+
+
+@dataclass
+class MoveRecommendation:
+    """Recommendation for moving a shard"""
+
+    table_name: str
+    schema_name: str
+    shard_id: int
+    from_node: str
+    to_node: str
+    from_zone: str
+    to_zone: str
+    shard_type: str
+    size_gb: float
+    reason: str
+
+    def to_sql(self) -> str:
+        """Generate the SQL command for this move"""
+        return (
+            f'ALTER TABLE "{self.schema_name}"."{self.table_name}" '
+            f"REROUTE MOVE SHARD {self.shard_id} "
+            f"FROM '{self.from_node}' TO '{self.to_node}';"
+        )
+
+    @property
+    def safety_score(self) -> float:
+        """Calculate a safety score for this move (0-1, higher is safer)"""
+        score = 1.0
+
+        # Penalize if moving to same zone (not ideal for zone distribution)
+        if self.from_zone == self.to_zone:
+            score -= 0.3
+
+        # Bonus for zone balancing moves
+        if "rebalancing" in self.reason.lower():
+            score += 0.2
+
+        # Ensure score stays in valid range
+        return max(0.0, min(1.0, score))
+
+
+@dataclass
+class DistributionStats:
+    """Statistics about shard distribution"""
+
+    total_shards: int
+    total_size_gb: float
+    zones: Dict[str, int]
+    nodes: Dict[str, int]
+    zone_balance_score: float  # 0-100, higher is better
+    node_balance_score: float  # 0-100, higher is better
+
+
+@dataclasses.dataclass
+class RecommendationConstraints:
+    min_size: float = 40.0
+    max_size: float = 60.0
+    table_name: Optional[str] = None
+    source_node: Optional[str] = None
+    zone_tolerance: float = 10.0
+    min_free_space: float = 100.0
+    max_recommendations: int = 10
+    max_disk_usage: float = 90.0
+    prioritize_space: bool = False
diff --git a/cratedb_toolkit/admin/xmover/recommender.py b/cratedb_toolkit/admin/xmover/recommender.py
new file mode 100644
index 00000000..7e780600
--- /dev/null
+++ b/cratedb_toolkit/admin/xmover/recommender.py
@@ -0,0 +1,366 @@
+import time
+
+from rich import box
+from rich.console import Console
+from rich.panel import Panel
+from rich.table import Table
+
+from .analyzer import ShardAnalyzer
+from .database import CrateDBClient
+from .model import RecommendationConstraints
+from .recovery import RecoveryMonitor, RecoveryOptions
+from .util import format_size
+
+console = Console()
+
+
+class Recommender:
+    def __init__(self, client: CrateDBClient, constraints: RecommendationConstraints):
+        self.client = client
+        self.constraints = constraints
+        self.analyzer = ShardAnalyzer(self.client)
+
+    def start(
+        self,
+        auto_execute: bool,
+        validate: bool,
+        dry_run: bool,
+    ):
+        # Safety check for auto-execute
+        if auto_execute and dry_run:
+            console.print("[red]❌ Error: --auto-execute requires --execute flag[/red]")
+            console.print("[dim]Use: --execute --auto-execute[/dim]")
+            return
+
+        mode_text = "DRY RUN - Analysis Only" if dry_run else "EXECUTION MODE"
+        console.print(
+            Panel.fit(
+                f"[bold blue]Generating Rebalancing Recommendations[/bold blue] - "
+                f"[bold {'green' if dry_run else 'red'}]{mode_text}[/bold {'green' if dry_run else 'red'}]"
+            )
+        )
+        console.print("[dim]Note: Only analyzing healthy shards (STARTED + 100% recovered) for safe operations[/dim]")
+        console.print("[dim]Zone conflict detection: Prevents moves that would violate CrateDB's zone awareness[/dim]")
+        if self.constraints.prioritize_space:
+            console.print("[dim]Mode: Prioritizing available space over zone balancing[/dim]")
+        else:
+            console.print("[dim]Mode: Prioritizing zone balancing over available space[/dim]")
+
+        if self.constraints.source_node:
+            console.print(f"[dim]Filtering: Only showing moves from source node '{self.constraints.source_node}'[/dim]")
+
+        console.print(
+            f"[dim]Safety thresholds: Max disk usage {self.constraints.max_disk_usage}%, "
+            f"Min free space {self.constraints.min_free_space}GB[/dim]"
+        )
+
+        if dry_run:
+            console.print("[green]Running in DRY RUN mode - no SQL commands will be generated[/green]")
+        else:
+            console.print("[red]EXECUTION MODE - SQL commands will be generated for actual moves[/red]")
+        console.print()
+
+        recommendations = self.analyzer.generate_rebalancing_recommendations(constraints=self.constraints)
+
+        if not recommendations:
+            if self.constraints.source_node:
+                console.print(
+                    f"[yellow]No safe recommendations found for node '{self.constraints.source_node}'[/yellow]"
+                )
+                console.print("[dim]This could be due to:[/dim]")
+                console.print("[dim]  • Zone conflicts preventing safe moves[/dim]")
+                console.print(
+                    f"[dim]  • Target nodes exceeding {self.constraints.max_disk_usage}% disk usage threshold[/dim]"
+                )
+                console.print(
+                    f"[dim]  • Insufficient free space on target nodes (need {self.constraints.min_free_space}GB)[/dim]"
+                )
+                console.print(
+                    f"[dim]  • No shards in size range {self.constraints.min_size}-{self.constraints.max_size}GB[/dim]"
+                )
+                console.print("[dim]Suggestions:[/dim]")
+                console.print("[dim]  • Try: --max-disk-usage 95 (allow higher disk usage)[/dim]")
+                console.print("[dim]  • Try: --min-free-space 50 (reduce space requirements)[/dim]")
+                console.print("[dim]  • Try: different size ranges or remove --node filter[/dim]")
+            else:
+                console.print("[green]No rebalancing recommendations needed. Cluster appears well balanced![/green]")
+            return
+
+        # Show recommendations table
+        rec_table = Table(title=f"Rebalancing Recommendations ({len(recommendations)} moves)", box=box.ROUNDED)
+        rec_table.add_column("Table", style="cyan")
+        rec_table.add_column("Shard", justify="right", style="magenta")
+        rec_table.add_column("Type", style="blue")
+        rec_table.add_column("From Node", style="red")
+        rec_table.add_column("To Node", style="green")
+        rec_table.add_column("Target Free Space", justify="right", style="cyan")
+        rec_table.add_column("Zone Change", style="yellow")
+        rec_table.add_column("Size", justify="right", style="white")
+        rec_table.add_column("Reason", style="dim")
+        if validate:
+            rec_table.add_column("Safety Check", style="bold")
+
+        # Create a mapping of node names to available space for display
+        node_space_map = {node.name: node.available_space_gb for node in self.analyzer.nodes}
+
+        for rec in recommendations:
+            zone_change = f"{rec.from_zone} → {rec.to_zone}" if rec.from_zone != rec.to_zone else rec.from_zone
+            target_free_space = node_space_map.get(rec.to_node, 0)
+
+            row = [
+                f"{rec.schema_name}.{rec.table_name}",
+                str(rec.shard_id),
+                rec.shard_type,
+                rec.from_node,
+                rec.to_node,
+                format_size(target_free_space),
+                zone_change,
+                format_size(rec.size_gb),
+                rec.reason,
+            ]
+
+            if validate:
+                is_safe, safety_msg = self.analyzer.validate_move_safety(
+                    rec, max_disk_usage_percent=self.constraints.max_disk_usage
+                )
+                safety_status = "[green]✓ SAFE[/green]" if is_safe else f"[red]✗ {safety_msg}[/red]"
+                row.append(safety_status)
+
+            rec_table.add_row(*row)
+
+        console.print(rec_table)
+        console.print()
+
+        # Generate SQL commands or show dry-run analysis
+        if dry_run:
+            console.print(Panel.fit("[bold yellow]Dry Run Analysis - No Commands Generated[/bold yellow]"))
+            console.print("[dim]# This is a dry run - showing what would be recommended[/dim]")
+            console.print("[dim]# Use --execute flag to generate actual SQL commands[/dim]")
+            console.print()
+
+            safe_moves = 0
+            zone_conflicts = 0
+            space_issues = 0
+
+            for i, rec in enumerate(recommendations, 1):
+                if validate:
+                    is_safe, safety_msg = self.analyzer.validate_move_safety(
+                        rec, max_disk_usage_percent=self.constraints.max_disk_usage
+                    )
+                    if not is_safe:
+                        if "zone conflict" in safety_msg.lower():
+                            zone_conflicts += 1
+                            console.print(f"[yellow]⚠ Move {i}: WOULD BE SKIPPED - {safety_msg}[/yellow]")
+                        elif "space" in safety_msg.lower():
+                            space_issues += 1
+                            console.print(f"[yellow]⚠ Move {i}: WOULD BE SKIPPED - {safety_msg}[/yellow]")
+                        else:
+                            console.print(f"[yellow]⚠ Move {i}: WOULD BE SKIPPED - {safety_msg}[/yellow]")
+                        continue
+                    safe_moves += 1
+
+                console.print(f"[green]✓ Move {i}: WOULD EXECUTE - {rec.reason}[/green]")
+                console.print(f"[dim]  Target SQL: {rec.to_sql()}[/dim]")
+
+            console.print()
+            console.print("[bold]Dry Run Summary:[/bold]")
+            console.print(f"  • Safe moves that would execute: [green]{safe_moves}[/green]")
+            console.print(f"  • Zone conflicts prevented: [yellow]{zone_conflicts}[/yellow]")
+            console.print(f"  • Space-related issues: [yellow]{space_issues}[/yellow]")
+            if safe_moves > 0:
+                console.print(
+                    f"\n[green]✓ Ready to execute {safe_moves} safe moves. "
+                    f"Use --execute to generate SQL commands.[/green]"
+                )
+            else:
+                console.print(
+                    "\n[yellow]⚠ No safe moves identified. Review cluster balance or adjust parameters.[/yellow]"
+                )
+        else:
+            console.print(Panel.fit("[bold green]Generated SQL Commands[/bold green]"))
+            console.print("[dim]# Copy and paste these commands to execute the moves[/dim]")
+            console.print("[dim]# ALWAYS test in a non-production environment first![/dim]")
+            console.print("[dim]# These commands only operate on healthy shards (STARTED + fully recovered)[/dim]")
+            console.print("[dim]# Commands use quoted identifiers for schema and table names[/dim]")
+            console.print()
+
+            safe_moves = 0
+            zone_conflicts = 0
+            for i, rec in enumerate(recommendations, 1):
+                if validate:
+                    is_safe, safety_msg = self.analyzer.validate_move_safety(
+                        rec, max_disk_usage_percent=self.constraints.max_disk_usage
+                    )
+                    if not is_safe:
+                        if "Zone conflict" in safety_msg:
+                            zone_conflicts += 1
+                            console.print(f"-- Move {i}: SKIPPED - {safety_msg}")
+                            console.print(
+                                "--   Tip: Try moving to a different zone or check existing shard distribution"
+                            )
+                        else:
+                            console.print(f"-- Move {i}: SKIPPED - {safety_msg}")
+                        continue
+                    safe_moves += 1
+
+                console.print(f"-- Move {i}: {rec.reason}")
+                console.print(f"{rec.to_sql()}")
+            console.print()
+
+            # Auto-execution if requested
+            if auto_execute:
+                self._execute_recommendations_safely(recommendations, validate)
+
+        if validate and safe_moves < len(recommendations):
+            if zone_conflicts > 0:
+                console.print(f"[yellow]Warning: {zone_conflicts} moves skipped due to zone conflicts[/yellow]")
+                console.print(
+                    "[yellow]Tip: Use 'find-candidates' to see current shard distribution across zones[/yellow]"
+                )
+            console.print(
+                f"[yellow]Warning: Only {safe_moves} of {len(recommendations)} moves passed safety validation[/yellow]"
+            )
+
+    def _execute_recommendations_safely(self, recommendations, validate: bool):
+        """Execute recommendations with extensive safety measures"""
+
+        # Filter to only safe recommendations
+        safe_recommendations = []
+        if validate:
+            for rec in recommendations:
+                is_safe, safety_msg = self.analyzer.validate_move_safety(rec, max_disk_usage_percent=95.0)
+                if is_safe:
+                    safe_recommendations.append(rec)
+        else:
+            safe_recommendations = recommendations
+
+        if not safe_recommendations:
+            console.print("[yellow]⚠ No safe recommendations to execute[/yellow]")
+            return
+
+        console.print("\n[bold red]🚨 AUTO-EXECUTION MODE 🚨[/bold red]")
+        console.print(f"About to execute {len(safe_recommendations)} shard moves automatically:")
+        console.print()
+
+        # Show what will be executed
+        for i, rec in enumerate(safe_recommendations, 1):
+            table_display = f"{rec.schema_name}.{rec.table_name}" if rec.schema_name != "doc" else rec.table_name
+            console.print(
+                f"  {i}. {table_display} S{rec.shard_id} ({rec.size_gb:.1f}GB) {rec.from_node} → {rec.to_node}"
+            )
+
+        console.print()
+        console.print("[bold yellow]⚠ SAFETY WARNINGS:[/bold yellow]")
+        console.print("  • These commands will immediately start shard movements")
+        console.print("  • Each move will temporarily impact cluster performance")
+        console.print("  • Recovery time depends on shard size and network speed")
+        console.print("  • You should monitor progress with: xmover monitor-recovery --watch")
+        console.print()
+
+        # Double confirmation
+        try:
+            response1 = input("Type 'EXECUTE' to proceed with automatic execution: ").strip()
+            if response1 != "EXECUTE":
+                console.print("[yellow]❌ Execution cancelled[/yellow]")
+                return
+
+            response2 = input(f"Confirm: Execute {len(safe_recommendations)} shard moves? (yes/no): ").strip().lower()
+            if response2 not in ["yes", "y"]:
+                console.print("[yellow]❌ Execution cancelled[/yellow]")
+                return
+
+        except KeyboardInterrupt:
+            console.print("\n[yellow]❌ Execution cancelled by user[/yellow]")
+            return
+
+        console.print(f"\n🚀 [bold green]Executing {len(safe_recommendations)} shard moves...[/bold green]")
+        console.print()
+
+        successful_moves = 0
+        failed_moves = 0
+
+        for i, rec in enumerate(safe_recommendations, 1):
+            table_display = f"{rec.schema_name}.{rec.table_name}" if rec.schema_name != "doc" else rec.table_name
+            sql_command = rec.to_sql()
+
+            console.print(
+                f"[{i}/{len(safe_recommendations)}] Executing: {table_display} S{rec.shard_id} ({rec.size_gb:.1f}GB)"
+            )
+            console.print(f"    {rec.from_node} → {rec.to_node}")
+
+            try:
+                # Execute the SQL command
+                result = self.client.execute_query(sql_command)
+
+                if result.get("rowcount", 0) >= 0:  # Success indicator for ALTER statements
+                    console.print("    [green]✅ SUCCESS[/green] - Move initiated")
+                    successful_moves += 1
+
+                    # Smart delay: check active recoveries before next move
+                    if i < len(safe_recommendations):
+                        self._wait_for_recovery_capacity(max_concurrent_recoveries=5)
+                else:
+                    console.print(f"    [red]❌ FAILED[/red] - Unexpected result: {result}")
+                    failed_moves += 1
+
+            except Exception as e:
+                console.print(f"    [red]❌ FAILED[/red] - Error: {e}")
+                failed_moves += 1
+
+                # Ask whether to continue after a failure
+                if i < len(safe_recommendations):
+                    try:
+                        continue_response = (
+                            input(f"    Continue with remaining {len(safe_recommendations) - i} moves? (yes/no): ")
+                            .strip()
+                            .lower()
+                        )
+                        if continue_response not in ["yes", "y"]:
+                            console.print("[yellow]⏹ Execution stopped by user[/yellow]")
+                            break
+                    except KeyboardInterrupt:
+                        console.print("\n[yellow]⏹ Execution stopped by user[/yellow]")
+                        break
+
+            console.print()
+
+        # Final summary
+        console.print("📊 [bold]Execution Summary:[/bold]")
+        console.print(f"   Successful moves: [green]{successful_moves}[/green]")
+        console.print(f"   Failed moves: [red]{failed_moves}[/red]")
+        console.print(f"   Total attempted: {successful_moves + failed_moves}")
+
+        if successful_moves > 0:
+            console.print()
+            console.print("[green]✅ Shard moves initiated successfully![/green]")
+            console.print("[dim]💡 Monitor progress with:[/dim]")
+            console.print("[dim]   xmover monitor-recovery --watch[/dim]")
+            console.print("[dim]💡 Check cluster status with:[/dim]")
+            console.print("[dim]   xmover analyze[/dim]")
+
+        if failed_moves > 0:
+            console.print()
+            console.print(f"[yellow]⚠ {failed_moves} moves failed - check cluster status and retry if needed[/yellow]")
+
+    def _wait_for_recovery_capacity(self, max_concurrent_recoveries: int = 5):
+        """Wait until active recovery count is below threshold"""
+
+        recovery_monitor = RecoveryMonitor(self.client, RecoveryOptions(include_transitioning=True))
+        wait_time = 0
+
+        while True:
+            # Check active recoveries (including transitioning)
+            recoveries = recovery_monitor.get_cluster_recovery_status()
+            active_count = len([r for r in recoveries if r.overall_progress < 100.0 or r.stage != "DONE"])
+            status = f"{active_count}/{max_concurrent_recoveries}"
+            if active_count < max_concurrent_recoveries:
+                if wait_time > 0:
+                    console.print(f"    [green]✓ Recovery capacity available ({status} active)[/green]")
+                break
+            if wait_time == 0:
+                console.print(f"    [yellow]⏳ Waiting for recovery capacity... ({status} active)[/yellow]")
+            elif wait_time % 30 == 0:  # Update every 30 seconds
+                console.print(f"    [yellow]⏳ Still waiting... ({status} active)[/yellow]")
+
+            time.sleep(10)  # Check every 10 seconds
+            wait_time += 10

From 2dc5b913614ae75d934f8db9b5bb331a5c26d116 Mon Sep 17 00:00:00 2001
From: Andreas Motl <andreas.motl@crate.io>
Date: Wed, 20 Aug 2025 20:16:10 +0200
Subject: [PATCH 08/18] Admin/XMover: Refactor -- "reporter"

---
 cratedb_toolkit/admin/xmover/analyzer.py |  22 +-
 cratedb_toolkit/admin/xmover/cli.py      | 323 +++-------------------
 cratedb_toolkit/admin/xmover/model.py    |  21 +-
 cratedb_toolkit/admin/xmover/reporter.py | 325 +++++++++++++++++++++++
 4 files changed, 387 insertions(+), 304 deletions(-)
 create mode 100644 cratedb_toolkit/admin/xmover/reporter.py

diff --git a/cratedb_toolkit/admin/xmover/analyzer.py b/cratedb_toolkit/admin/xmover/analyzer.py
index 311b1a33..98af6a21 100644
--- a/cratedb_toolkit/admin/xmover/analyzer.py
+++ b/cratedb_toolkit/admin/xmover/analyzer.py
@@ -8,7 +8,7 @@
 from typing import Any, Dict, List, Optional, Set, Tuple, Union
 
 from .database import CrateDBClient
-from .model import DistributionStats, MoveRecommendation, NodeInfo, RecommendationConstraints, ShardInfo
+from .model import DistributionStats, NodeInfo, RecommendationConstraints, ShardInfo, ShardMoveRecommendation
 
 logger = logging.getLogger(__name__)
 
@@ -174,7 +174,9 @@ def find_nodes_with_capacity(
         available_nodes.sort(key=lambda n: n.available_space_gb, reverse=True)
         return available_nodes
 
-    def generate_rebalancing_recommendations(self, constraints: RecommendationConstraints) -> List[MoveRecommendation]:
+    def generate_rebalancing_recommendations(
+        self, constraints: RecommendationConstraints
+    ) -> List[ShardMoveRecommendation]:
         """Generate recommendations for rebalancing shards
 
         Args:
@@ -183,7 +185,7 @@ def generate_rebalancing_recommendations(self, constraints: RecommendationConstr
             source_node: If specified, only generate recommendations for shards on this node
             max_disk_usage_percent: Maximum disk usage percentage for target nodes
         """
-        recommendations: List[MoveRecommendation] = []
+        recommendations: List[ShardMoveRecommendation] = []
 
         # Get moveable shards (only healthy ones for actual operations)
         moveable_shards = self.find_moveable_shards(constraints.min_size, constraints.max_size, constraints.table_name)
@@ -271,7 +273,7 @@ def generate_rebalancing_recommendations(self, constraints: RecommendationConstr
             safe_target_nodes = []
             for candidate_node in target_nodes:
                 # Create a temporary recommendation to test safety
-                temp_rec = MoveRecommendation(
+                temp_rec = ShardMoveRecommendation(
                     table_name=shard.table_name,
                     schema_name=shard.schema_name,
                     shard_id=shard.shard_id,
@@ -333,7 +335,7 @@ def generate_rebalancing_recommendations(self, constraints: RecommendationConstr
                 if shard.zone == target_node.zone:
                     reason = f"Node balancing within {shard.zone}"
 
-            recommendation = MoveRecommendation(
+            recommendation = ShardMoveRecommendation(
                 table_name=shard.table_name,
                 schema_name=shard.schema_name,
                 shard_id=shard.shard_id,
@@ -355,7 +357,7 @@ def generate_rebalancing_recommendations(self, constraints: RecommendationConstr
         return recommendations
 
     def validate_move_safety(
-        self, recommendation: MoveRecommendation, max_disk_usage_percent: float = 90.0
+        self, recommendation: ShardMoveRecommendation, max_disk_usage_percent: float = 90.0
     ) -> Tuple[bool, str]:
         """Validate that a move recommendation is safe to execute"""
         # Find target node (with caching)
@@ -401,7 +403,7 @@ def _get_node_cached(self, node_name: str):
         self._node_lookup_cache[node_name] = target_node
         return target_node
 
-    def _check_zone_conflict_cached(self, recommendation: MoveRecommendation) -> Optional[str]:
+    def _check_zone_conflict_cached(self, recommendation: ShardMoveRecommendation) -> Optional[str]:
         """Check zone conflicts with caching"""
         # Create cache key: table, shard, target zone
         target_zone = self._get_node_zone(recommendation.to_node)
@@ -459,7 +461,7 @@ def _find_nodes_with_capacity_cached(
         self._target_nodes_cache[cache_key] = result
         return result
 
-    def _check_zone_conflict(self, recommendation: MoveRecommendation) -> Optional[str]:
+    def _check_zone_conflict(self, recommendation: ShardMoveRecommendation) -> Optional[str]:
         """Check if moving this shard would create a zone conflict
 
         Performs comprehensive zone safety analysis:
@@ -745,7 +747,7 @@ def plan_node_decommission(self, node_name: str, min_free_space_gb: float = 100.
             safe_targets = []
             for target in potential_targets:
                 # Create a temporary recommendation to test zone safety
-                temp_rec = MoveRecommendation(
+                temp_rec = ShardMoveRecommendation(
                     table_name=shard.table_name,
                     schema_name=shard.schema_name,
                     shard_id=shard.shard_id,
@@ -770,7 +772,7 @@ def plan_node_decommission(self, node_name: str, min_free_space_gb: float = 100.
                 # Choose the target with most available space
                 best_target = safe_targets[0]
                 move_plan.append(
-                    MoveRecommendation(
+                    ShardMoveRecommendation(
                         table_name=shard.table_name,
                         schema_name=shard.schema_name,
                         shard_id=shard.shard_id,
diff --git a/cratedb_toolkit/admin/xmover/cli.py b/cratedb_toolkit/admin/xmover/cli.py
index 065b89be..76143895 100644
--- a/cratedb_toolkit/admin/xmover/cli.py
+++ b/cratedb_toolkit/admin/xmover/cli.py
@@ -13,13 +13,18 @@
 from rich.panel import Panel
 from rich.table import Table
 
-from cratedb_toolkit.admin.xmover.model import MoveRecommendation, RecommendationConstraints, ShardInfo
+from cratedb_toolkit.admin.xmover.model import (
+    RecommendationConstraints,
+    ShardInfo,
+    ShardMoveRequest,
+    SizeCriteria,
+)
 from cratedb_toolkit.admin.xmover.recommender import Recommender
+from cratedb_toolkit.admin.xmover.reporter import ShardReporter
 
 from .analyzer import ShardAnalyzer
 from .database import CrateDBClient
 from .recovery import RecoveryMonitor, RecoveryOptions
-from .util import format_percentage, format_size
 
 console = Console()
 
@@ -55,185 +60,31 @@ def analyze(ctx, table: Optional[str]):
     """Analyze current shard distribution across nodes and zones"""
     client = ctx.obj["client"]
     analyzer = ShardAnalyzer(client)
-
-    console.print(Panel.fit("[bold blue]CrateDB Cluster Analysis[/bold blue]"))
-
-    # Get cluster overview (includes all shards for complete analysis)
-    overview = analyzer.get_cluster_overview()
-
-    # Cluster summary table
-    summary_table = Table(title="Cluster Summary", box=box.ROUNDED)
-    summary_table.add_column("Metric", style="cyan")
-    summary_table.add_column("Value", style="magenta")
-
-    summary_table.add_row("Nodes", str(overview["nodes"]))
-    summary_table.add_row("Availability Zones", str(overview["zones"]))
-    summary_table.add_row("Total Shards", str(overview["total_shards"]))
-    summary_table.add_row("Primary Shards", str(overview["primary_shards"]))
-    summary_table.add_row("Replica Shards", str(overview["replica_shards"]))
-    summary_table.add_row("Total Size", format_size(overview["total_size_gb"]))
-
-    console.print(summary_table)
-    console.print()
-
-    # Disk watermarks table
-    if overview.get("watermarks"):
-        watermarks_table = Table(title="Disk Allocation Watermarks", box=box.ROUNDED)
-        watermarks_table.add_column("Setting", style="cyan")
-        watermarks_table.add_column("Value", style="magenta")
-
-        watermarks = overview["watermarks"]
-        watermarks_table.add_row("Low Watermark", str(watermarks.get("low", "Not set")))
-        watermarks_table.add_row("High Watermark", str(watermarks.get("high", "Not set")))
-        watermarks_table.add_row("Flood Stage", str(watermarks.get("flood_stage", "Not set")))
-        watermarks_table.add_row(
-            "Enable for Single Node", str(watermarks.get("enable_for_single_data_node", "Not set"))
-        )
-
-        console.print(watermarks_table)
-        console.print()
-
-    # Zone distribution table
-    zone_table = Table(title="Zone Distribution", box=box.ROUNDED)
-    zone_table.add_column("Zone", style="cyan")
-    zone_table.add_column("Shards", justify="right", style="magenta")
-    zone_table.add_column("Percentage", justify="right", style="green")
-
-    total_shards = overview["total_shards"]
-    for zone, count in overview["zone_distribution"].items():
-        percentage = (count / total_shards * 100) if total_shards > 0 else 0
-        zone_table.add_row(zone, str(count), f"{percentage:.1f}%")
-
-    console.print(zone_table)
-    console.print()
-
-    # Node health table
-    node_table = Table(title="Node Health", box=box.ROUNDED)
-    node_table.add_column("Node", style="cyan")
-    node_table.add_column("Zone", style="blue")
-    node_table.add_column("Shards", justify="right", style="magenta")
-    node_table.add_column("Size", justify="right", style="green")
-    node_table.add_column("Disk Usage", justify="right")
-    node_table.add_column("Available Space", justify="right", style="green")
-    node_table.add_column("Until Low WM", justify="right", style="yellow")
-    node_table.add_column("Until High WM", justify="right", style="red")
-
-    for node_info in overview["node_health"]:
-        # Format watermark remaining capacity
-        low_wm_remaining = (
-            format_size(node_info["remaining_to_low_watermark_gb"])
-            if node_info["remaining_to_low_watermark_gb"] > 0
-            else "[red]Exceeded[/red]"
-        )
-        high_wm_remaining = (
-            format_size(node_info["remaining_to_high_watermark_gb"])
-            if node_info["remaining_to_high_watermark_gb"] > 0
-            else "[red]Exceeded[/red]"
-        )
-
-        node_table.add_row(
-            node_info["name"],
-            node_info["zone"],
-            str(node_info["shards"]),
-            format_size(node_info["size_gb"]),
-            format_percentage(node_info["disk_usage_percent"]),
-            format_size(node_info["available_space_gb"]),
-            low_wm_remaining,
-            high_wm_remaining,
-        )
-
-    console.print(node_table)
-
-    # Table-specific analysis if requested
-    if table:
-        console.print()
-        console.print(Panel.fit(f"[bold blue]Analysis for table: {table}[/bold blue]"))
-
-        stats = analyzer.analyze_distribution(table)
-
-        table_summary = Table(title=f"Table {table} Distribution", box=box.ROUNDED)
-        table_summary.add_column("Metric", style="cyan")
-        table_summary.add_column("Value", style="magenta")
-
-        table_summary.add_row("Total Shards", str(stats.total_shards))
-        table_summary.add_row("Total Size", format_size(stats.total_size_gb))
-        table_summary.add_row("Zone Balance Score", f"{stats.zone_balance_score:.1f}/100")
-        table_summary.add_row("Node Balance Score", f"{stats.node_balance_score:.1f}/100")
-
-        console.print(table_summary)
+    reporter = ShardReporter(analyzer)
+    reporter.distribution(table=table)
 
 
 @main.command()
-@click.option("--table", "-t", help="Find candidates for specific table only")
 @click.option("--min-size", default=40.0, help="Minimum shard size in GB (default: 40)")
 @click.option("--max-size", default=60.0, help="Maximum shard size in GB (default: 60)")
 @click.option("--limit", default=20, help="Maximum number of candidates to show (default: 20)")
+@click.option("--table", "-t", help="Find candidates for specific table only")
 @click.option("--node", help="Only show candidates from this specific source node (e.g., data-hot-4)")
 @click.pass_context
-def find_candidates(ctx, table: Optional[str], min_size: float, max_size: float, limit: int, node: Optional[str]):
-    """Find shard candidates for movement based on size criteria
-
-    Results are sorted by nodes with least available space first,
-    then by shard size (smallest first) for easier moves.
-    """
+def find_candidates(ctx, min_size: float, max_size: float, limit: int, table: Optional[str], node: Optional[str]):
+    """Find shard candidates for movement based on size criteria"""
     client = ctx.obj["client"]
     analyzer = ShardAnalyzer(client)
-
-    console.print(Panel.fit(f"[bold blue]Finding Moveable Shards ({min_size}-{max_size}GB)[/bold blue]"))
-
-    if node:
-        console.print(f"[dim]Filtering: Only showing candidates from source node '{node}'[/dim]")
-
-    # Find moveable candidates (only healthy shards suitable for operations)
-    candidates = analyzer.find_moveable_shards(min_size, max_size, table)
-
-    # Filter by node if specified
-    if node:
-        candidates = [c for c in candidates if c.node_name == node]
-
-    if not candidates:
-        if node:
-            console.print(f"[yellow]No moveable shards found on node '{node}' in the specified size range.[/yellow]")
-            console.print("[dim]Tip: Try different size ranges or remove --node filter to see all candidates[/dim]")
-        else:
-            console.print("[yellow]No moveable shards found in the specified size range.[/yellow]")
-        return
-
-    # Show limited results
-    shown_candidates = candidates[:limit]
-
-    candidates_table = Table(
-        title=f"Moveable Shard Candidates (showing {len(shown_candidates)} of {len(candidates)})", box=box.ROUNDED
+    reporter = ShardReporter(analyzer)
+    reporter.movement_candidates(
+        criteria=SizeCriteria(
+            min_size=min_size,
+            max_size=max_size,
+            table_name=table,
+            source_node=node,
+        ),
+        limit=limit,
     )
-    candidates_table.add_column("Table", style="cyan")
-    candidates_table.add_column("Shard ID", justify="right", style="magenta")
-    candidates_table.add_column("Type", style="blue")
-    candidates_table.add_column("Node", style="green")
-    candidates_table.add_column("Zone", style="yellow")
-    candidates_table.add_column("Size", justify="right", style="red")
-    candidates_table.add_column("Node Free Space", justify="right", style="white")
-    candidates_table.add_column("Documents", justify="right", style="dim")
-
-    # Create a mapping of node names to available space for display
-    node_space_map = {node.name: node.available_space_gb for node in analyzer.nodes}
-
-    for shard in shown_candidates:
-        node_free_space = node_space_map.get(shard.node_name, 0)
-        candidates_table.add_row(
-            f"{shard.schema_name}.{shard.table_name}",
-            str(shard.shard_id),
-            shard.shard_type,
-            shard.node_name,
-            shard.zone,
-            format_size(shard.size_gb),
-            format_size(node_free_space),
-            f"{shard.num_docs:,}",
-        )
-
-    console.print(candidates_table)
-
-    if len(candidates) > limit:
-        console.print(f"\n[dim]... and {len(candidates) - limit} more candidates[/dim]")
 
 
 @main.command()
@@ -499,130 +350,16 @@ def validate_move(ctx, schema_table: str, shard_id: int, from_node: str, to_node
     """
     client = ctx.obj["client"]
     analyzer = ShardAnalyzer(client)
-
-    # Parse schema and table
-    if "." not in schema_table:
-        console.print("[red]Error: Schema and table must be in format 'schema.table'[/red]")
-        return
-
-    schema_name, table_name = schema_table.split(".", 1)
-
-    console.print(Panel.fit("[bold blue]Validating Shard Move[/bold blue]"))
-    console.print(f"[dim]Move: {schema_name}.{table_name}[{shard_id}] from {from_node} to {to_node}[/dim]")
-    console.print()
-
-    # Find the nodes
-    from_node_info = None
-    to_node_info = None
-    for node in analyzer.nodes:
-        if node.name == from_node:
-            from_node_info = node
-        if node.name == to_node:
-            to_node_info = node
-
-    if not from_node_info:
-        console.print(f"[red]✗ Source node '{from_node}' not found in cluster[/red]")
-        return
-
-    if not to_node_info:
-        console.print(f"[red]✗ Target node '{to_node}' not found in cluster[/red]")
-        return
-
-    # Find the specific shard
-    target_shard = None
-    for shard in analyzer.shards:
-        if (
-            shard.schema_name == schema_name
-            and shard.table_name == table_name
-            and shard.shard_id == shard_id
-            and shard.node_name == from_node
-        ):
-            target_shard = shard
-            break
-
-    if not target_shard:
-        console.print(f"[red]✗ Shard {shard_id} not found on node {from_node}[/red]")
-        console.print("[dim]Use 'xmover find-candidates' to see available shards[/dim]")
-        return
-
-    # Create a move recommendation for validation
-    recommendation = MoveRecommendation(
-        table_name=table_name,
-        schema_name=schema_name,
-        shard_id=shard_id,
-        from_node=from_node,
-        to_node=to_node,
-        from_zone=from_node_info.zone,
-        to_zone=to_node_info.zone,
-        shard_type=target_shard.shard_type,
-        size_gb=target_shard.size_gb,
-        reason="Manual validation",
-    )
-
-    # Display shard details
-    details_table = Table(title="Shard Details", box=box.ROUNDED)
-    details_table.add_column("Property", style="cyan")
-    details_table.add_column("Value", style="magenta")
-
-    details_table.add_row("Table", f"{schema_name}.{table_name}")
-    details_table.add_row("Shard ID", str(shard_id))
-    details_table.add_row("Type", target_shard.shard_type)
-    details_table.add_row("Size", format_size(target_shard.size_gb))
-    details_table.add_row("Documents", f"{target_shard.num_docs:,}")
-    details_table.add_row("State", target_shard.state)
-    details_table.add_row("Routing State", target_shard.routing_state)
-    details_table.add_row("From Node", f"{from_node} ({from_node_info.zone})")
-    details_table.add_row("To Node", f"{to_node} ({to_node_info.zone})")
-    details_table.add_row("Zone Change", "Yes" if from_node_info.zone != to_node_info.zone else "No")
-
-    console.print(details_table)
-    console.print()
-
-    # Perform comprehensive validation
-    is_safe, safety_msg = analyzer.validate_move_safety(recommendation, max_disk_usage_percent=max_disk_usage)
-
-    if is_safe:
-        console.print("[green]✓ VALIDATION PASSED - Move appears safe[/green]")
-        console.print(f"[green]✓ {safety_msg}[/green]")
-        console.print()
-
-        # Show the SQL command
-        console.print(Panel.fit("[bold green]Ready to Execute[/bold green]"))
-        console.print("[dim]# Copy and paste this command to execute the move[/dim]")
-        console.print()
-        console.print(f"{recommendation.to_sql()}")
-        console.print()
-        console.print("[dim]# Monitor shard health after execution[/dim]")
-        console.print(
-            "[dim]# Check with: SELECT * FROM sys.shards WHERE table_name = '{table_name}' AND id = {shard_id};[/dim]"
+    reporter = ShardReporter(analyzer)
+    reporter.validate_move(
+        request=ShardMoveRequest(
+            schema_table=schema_table,
+            shard_id=shard_id,
+            from_node=from_node,
+            to_node=to_node,
+            max_disk_usage=max_disk_usage,
         )
-    else:
-        console.print("[red]✗ VALIDATION FAILED - Move not safe[/red]")
-        console.print(f"[red]✗ {safety_msg}[/red]")
-        console.print()
-
-        # Provide troubleshooting guidance
-        if "zone conflict" in safety_msg.lower():
-            console.print("[yellow]💡 Troubleshooting Zone Conflicts:[/yellow]")
-            console.print("  • Check current shard distribution: xmover zone-analysis --show-shards")
-            console.print("  • Try moving to a different zone")
-            console.print("  • Verify cluster has proper zone-awareness configuration")
-        elif "node conflict" in safety_msg.lower():
-            console.print("[yellow]💡 Troubleshooting Node Conflicts:[/yellow]")
-            console.print("  • The target node already has a copy of this shard")
-            console.print("  • Choose a different target node")
-            console.print("  • Check shard distribution: xmover analyze")
-        elif "space" in safety_msg.lower():
-            console.print("[yellow]💡 Troubleshooting Space Issues:[/yellow]")
-            console.print("  • Free up space on the target node")
-            console.print("  • Choose a node with more available capacity")
-            console.print("  • Check node capacity: xmover analyze")
-        elif "usage" in safety_msg.lower():
-            console.print("[yellow]💡 Troubleshooting High Disk Usage:[/yellow]")
-            console.print("  • Wait for target node disk usage to decrease")
-            console.print("  • Choose a node with lower disk usage")
-            console.print("  • Check cluster health: xmover analyze")
-            console.print("  • Consider using --max-disk-usage option for urgent moves")
+    )
 
 
 @main.command()
diff --git a/cratedb_toolkit/admin/xmover/model.py b/cratedb_toolkit/admin/xmover/model.py
index 12286597..056d834c 100644
--- a/cratedb_toolkit/admin/xmover/model.py
+++ b/cratedb_toolkit/admin/xmover/model.py
@@ -103,7 +103,18 @@ def translog_percentage(self) -> float:
 
 
 @dataclass
-class MoveRecommendation:
+class ShardMoveRequest:
+    """Request for moving a shard"""
+
+    schema_table: str
+    shard_id: int
+    from_node: str
+    to_node: str
+    max_disk_usage: float
+
+
+@dataclass
+class ShardMoveRecommendation:
     """Recommendation for moving a shard"""
 
     table_name: str
@@ -154,6 +165,14 @@ class DistributionStats:
     node_balance_score: float  # 0-100, higher is better
 
 
+@dataclasses.dataclass
+class SizeCriteria:
+    min_size: float = 40.0
+    max_size: float = 60.0
+    table_name: Optional[str] = None
+    source_node: Optional[str] = None
+
+
 @dataclasses.dataclass
 class RecommendationConstraints:
     min_size: float = 40.0
diff --git a/cratedb_toolkit/admin/xmover/reporter.py b/cratedb_toolkit/admin/xmover/reporter.py
new file mode 100644
index 00000000..6912bc54
--- /dev/null
+++ b/cratedb_toolkit/admin/xmover/reporter.py
@@ -0,0 +1,325 @@
+from typing import Any, Dict
+
+from rich import box
+from rich.console import Console
+from rich.panel import Panel
+from rich.table import Table
+
+from cratedb_toolkit.admin.xmover.analyzer import ShardAnalyzer
+from cratedb_toolkit.admin.xmover.model import ShardMoveRecommendation, ShardMoveRequest, SizeCriteria
+from cratedb_toolkit.admin.xmover.util import format_percentage, format_size
+
+console = Console()
+
+
+class ShardReporter:
+    def __init__(self, analyzer: ShardAnalyzer):
+        self.analyzer = analyzer
+
+    def distribution(self, table: str = None):
+        """Analyze current shard distribution across nodes and zones"""
+        console.print(Panel.fit("[bold blue]CrateDB Cluster Analysis[/bold blue]"))
+
+        # Get cluster overview (includes all shards for complete analysis)
+        overview: Dict[str, Any] = self.analyzer.get_cluster_overview()
+
+        # Cluster summary table
+        summary_table = Table(title="Cluster Summary", box=box.ROUNDED)
+        summary_table.add_column("Metric", style="cyan")
+        summary_table.add_column("Value", style="magenta")
+
+        summary_table.add_row("Nodes", str(overview["nodes"]))
+        summary_table.add_row("Availability Zones", str(overview["zones"]))
+        summary_table.add_row("Total Shards", str(overview["total_shards"]))
+        summary_table.add_row("Primary Shards", str(overview["primary_shards"]))
+        summary_table.add_row("Replica Shards", str(overview["replica_shards"]))
+        summary_table.add_row("Total Size", format_size(overview["total_size_gb"]))
+
+        console.print(summary_table)
+        console.print()
+
+        # Disk watermarks table
+        if overview.get("watermarks"):
+            watermarks_table = Table(title="Disk Allocation Watermarks", box=box.ROUNDED)
+            watermarks_table.add_column("Setting", style="cyan")
+            watermarks_table.add_column("Value", style="magenta")
+
+            watermarks = overview["watermarks"]
+            watermarks_table.add_row("Low Watermark", str(watermarks.get("low", "Not set")))
+            watermarks_table.add_row("High Watermark", str(watermarks.get("high", "Not set")))
+            watermarks_table.add_row("Flood Stage", str(watermarks.get("flood_stage", "Not set")))
+            watermarks_table.add_row(
+                "Enable for Single Node", str(watermarks.get("enable_for_single_data_node", "Not set"))
+            )
+
+            console.print(watermarks_table)
+            console.print()
+
+        # Zone distribution table
+        zone_table = Table(title="Zone Distribution", box=box.ROUNDED)
+        zone_table.add_column("Zone", style="cyan")
+        zone_table.add_column("Shards", justify="right", style="magenta")
+        zone_table.add_column("Percentage", justify="right", style="green")
+
+        total_shards = overview["total_shards"]
+        for zone, count in overview["zone_distribution"].items():
+            percentage = (count / total_shards * 100) if total_shards > 0 else 0
+            zone_table.add_row(zone, str(count), f"{percentage:.1f}%")
+
+        console.print(zone_table)
+        console.print()
+
+        # Node health table
+        node_table = Table(title="Node Health", box=box.ROUNDED)
+        node_table.add_column("Node", style="cyan")
+        node_table.add_column("Zone", style="blue")
+        node_table.add_column("Shards", justify="right", style="magenta")
+        node_table.add_column("Size", justify="right", style="green")
+        node_table.add_column("Disk Usage", justify="right")
+        node_table.add_column("Available Space", justify="right", style="green")
+        node_table.add_column("Until Low WM", justify="right", style="yellow")
+        node_table.add_column("Until High WM", justify="right", style="red")
+
+        for node_info in overview["node_health"]:
+            # Format watermark remaining capacity
+            low_wm_remaining = (
+                format_size(node_info["remaining_to_low_watermark_gb"])
+                if node_info["remaining_to_low_watermark_gb"] > 0
+                else "[red]Exceeded[/red]"
+            )
+            high_wm_remaining = (
+                format_size(node_info["remaining_to_high_watermark_gb"])
+                if node_info["remaining_to_high_watermark_gb"] > 0
+                else "[red]Exceeded[/red]"
+            )
+
+            node_table.add_row(
+                node_info["name"],
+                node_info["zone"],
+                str(node_info["shards"]),
+                format_size(node_info["size_gb"]),
+                format_percentage(node_info["disk_usage_percent"]),
+                format_size(node_info["available_space_gb"]),
+                low_wm_remaining,
+                high_wm_remaining,
+            )
+
+        console.print(node_table)
+
+        # Table-specific analysis if requested
+        if table:
+            console.print()
+            console.print(Panel.fit(f"[bold blue]Analysis for table: {table}[/bold blue]"))
+
+            stats = self.analyzer.analyze_distribution(table)
+
+            table_summary = Table(title=f"Table {table} Distribution", box=box.ROUNDED)
+            table_summary.add_column("Metric", style="cyan")
+            table_summary.add_column("Value", style="magenta")
+
+            table_summary.add_row("Total Shards", str(stats.total_shards))
+            table_summary.add_row("Total Size", format_size(stats.total_size_gb))
+            table_summary.add_row("Zone Balance Score", f"{stats.zone_balance_score:.1f}/100")
+            table_summary.add_row("Node Balance Score", f"{stats.node_balance_score:.1f}/100")
+
+            console.print(table_summary)
+
+    def movement_candidates(self, criteria: SizeCriteria, limit: int):
+        """
+        Find shard candidates for movement based on size criteria
+
+        Results are sorted by nodes with least available space first,
+        then by shard size (smallest first) for easier moves.
+        """
+
+        console.print(
+            Panel.fit(f"[bold blue]Finding Moveable Shards ({criteria.min_size}-{criteria.max_size}GB)[/bold blue]")
+        )
+
+        if criteria.source_node:
+            console.print(f"[dim]Filtering: Only showing candidates from source node '{criteria.source_node}'[/dim]")
+
+        # Find moveable candidates (only healthy shards suitable for operations)
+        candidates = self.analyzer.find_moveable_shards(criteria.min_size, criteria.max_size, criteria.table_name)
+
+        # Filter by node if specified
+        if criteria.source_node:
+            candidates = [c for c in candidates if c.node_name == criteria.source_node]
+
+        if not candidates:
+            if criteria.source_node:
+                console.print(
+                    f"[yellow]No moveable shards found on node '{criteria.source_node}' "
+                    f"in the specified size range.[/yellow]"
+                )
+                console.print("[dim]Tip: Try different size ranges or remove --node filter to see all candidates[/dim]")
+            else:
+                console.print("[yellow]No moveable shards found in the specified size range.[/yellow]")
+            return
+
+        # Show limited results
+        shown_candidates = candidates[:limit]
+
+        candidates_table = Table(
+            title=f"Moveable Shard Candidates (showing {len(shown_candidates)} of {len(candidates)})", box=box.ROUNDED
+        )
+        candidates_table.add_column("Table", style="cyan")
+        candidates_table.add_column("Shard ID", justify="right", style="magenta")
+        candidates_table.add_column("Type", style="blue")
+        candidates_table.add_column("Node", style="green")
+        candidates_table.add_column("Zone", style="yellow")
+        candidates_table.add_column("Size", justify="right", style="red")
+        candidates_table.add_column("Node Free Space", justify="right", style="white")
+        candidates_table.add_column("Documents", justify="right", style="dim")
+
+        # Create a mapping of node names to available space for display
+        node_space_map = {node.name: node.available_space_gb for node in self.analyzer.nodes}
+
+        for shard in shown_candidates:
+            node_free_space = node_space_map.get(shard.node_name, 0)
+            candidates_table.add_row(
+                f"{shard.schema_name}.{shard.table_name}",
+                str(shard.shard_id),
+                shard.shard_type,
+                shard.node_name,
+                shard.zone,
+                format_size(shard.size_gb),
+                format_size(node_free_space),
+                f"{shard.num_docs:,}",
+            )
+
+        console.print(candidates_table)
+
+        if len(candidates) > limit:
+            console.print(f"\n[dim]... and {len(candidates) - limit} more candidates[/dim]")
+
+    def validate_move(self, request: ShardMoveRequest):
+        # Parse schema and table
+        if "." not in request.schema_table:
+            console.print("[red]Error: Schema and table must be in format 'schema.table'[/red]")
+            return
+
+        schema_name, table_name = request.schema_table.split(".", 1)
+
+        console.print(Panel.fit("[bold blue]Validating Shard Move[/bold blue]"))
+        console.print(
+            f"[dim]Move: {schema_name}.{table_name}[{request.shard_id}] "
+            f"from {request.from_node} to {request.to_node}[/dim]"
+        )
+        console.print()
+
+        # Find the nodes
+        from_node_info = None
+        to_node_info = None
+        for node in self.analyzer.nodes:
+            if node.name == request.from_node:
+                from_node_info = node
+            if node.name == request.to_node:
+                to_node_info = node
+
+        if not from_node_info:
+            console.print(f"[red]✗ Source node '{request.from_node}' not found in cluster[/red]")
+            return
+
+        if not to_node_info:
+            console.print(f"[red]✗ Target node '{request.to_node}' not found in cluster[/red]")
+            return
+
+        # Find the specific shard
+        target_shard = None
+        for shard in self.analyzer.shards:
+            if (
+                shard.schema_name == schema_name
+                and shard.table_name == table_name
+                and shard.shard_id == request.shard_id
+                and shard.node_name == request.from_node
+            ):
+                target_shard = shard
+                break
+
+        if not target_shard:
+            console.print(f"[red]✗ Shard {request.shard_id} not found on node {request.from_node}[/red]")
+            console.print("[dim]Use 'xmover find-candidates' to see available shards[/dim]")
+            return
+
+        # Create a move recommendation for validation
+        recommendation = ShardMoveRecommendation(
+            table_name=table_name,
+            schema_name=schema_name,
+            shard_id=request.shard_id,
+            from_node=request.from_node,
+            to_node=request.to_node,
+            from_zone=from_node_info.zone,
+            to_zone=to_node_info.zone,
+            shard_type=target_shard.shard_type,
+            size_gb=target_shard.size_gb,
+            reason="Manual validation",
+        )
+
+        # Display shard details
+        details_table = Table(title="Shard Details", box=box.ROUNDED)
+        details_table.add_column("Property", style="cyan")
+        details_table.add_column("Value", style="magenta")
+
+        details_table.add_row("Table", f"{schema_name}.{table_name}")
+        details_table.add_row("Shard ID", str(request.shard_id))
+        details_table.add_row("Type", target_shard.shard_type)
+        details_table.add_row("Size", format_size(target_shard.size_gb))
+        details_table.add_row("Documents", f"{target_shard.num_docs:,}")
+        details_table.add_row("State", target_shard.state)
+        details_table.add_row("Routing State", target_shard.routing_state)
+        details_table.add_row("From Node", f"{request.from_node} ({from_node_info.zone})")
+        details_table.add_row("To Node", f"{request.to_node} ({to_node_info.zone})")
+        details_table.add_row("Zone Change", "Yes" if from_node_info.zone != to_node_info.zone else "No")
+
+        console.print(details_table)
+        console.print()
+
+        # Perform comprehensive validation
+        is_safe, safety_msg = self.analyzer.validate_move_safety(
+            recommendation, max_disk_usage_percent=request.max_disk_usage
+        )
+
+        if is_safe:
+            console.print("[green]✓ VALIDATION PASSED - Move appears safe[/green]")
+            console.print(f"[green]✓ {safety_msg}[/green]")
+            console.print()
+
+            # Show the SQL command
+            console.print(Panel.fit("[bold green]Ready to Execute[/bold green]"))
+            console.print("[dim]# Copy and paste this command to execute the move[/dim]")
+            console.print()
+            console.print(f"{recommendation.to_sql()}")
+            console.print()
+            console.print("[dim]# Monitor shard health after execution[/dim]")
+            console.print(
+                "[dim]# Check with: SELECT * FROM sys.shards "
+                "WHERE table_name = '{table_name}' AND id = {shard_id};[/dim]"
+            )
+        else:
+            console.print("[red]✗ VALIDATION FAILED - Move not safe[/red]")
+            console.print(f"[red]✗ {safety_msg}[/red]")
+            console.print()
+
+            # Provide troubleshooting guidance
+            if "zone conflict" in safety_msg.lower():
+                console.print("[yellow]💡 Troubleshooting Zone Conflicts:[/yellow]")
+                console.print("  • Check current shard distribution: xmover zone-analysis --show-shards")
+                console.print("  • Try moving to a different zone")
+                console.print("  • Verify cluster has proper zone-awareness configuration")
+            elif "node conflict" in safety_msg.lower():
+                console.print("[yellow]💡 Troubleshooting Node Conflicts:[/yellow]")
+                console.print("  • The target node already has a copy of this shard")
+                console.print("  • Choose a different target node")
+                console.print("  • Check shard distribution: xmover analyze")
+            elif "space" in safety_msg.lower():
+                console.print("[yellow]💡 Troubleshooting Space Issues:[/yellow]")
+                console.print("  • Free up space on the target node")
+                console.print("  • Choose a node with more available capacity")
+                console.print("  • Check node capacity: xmover analyze")
+            elif "usage" in safety_msg.lower():
+                console.print("[yellow]💡 Troubleshooting High Disk Usage:[/yellow]")
+                console.print("  • Wait for target node disk usage to decrease")
+                console.print("  • Choose a node with lower disk usage")
+                console.print("  • Check cluster health: xmover analyze")
+                console.print("  • Consider using --max-disk-usage option for urgent moves")

From 745b23a735ed8ad60e5032378aee157e2a8a9f10 Mon Sep 17 00:00:00 2001
From: Andreas Motl <andreas.motl@crate.io>
Date: Wed, 20 Aug 2025 20:37:58 +0200
Subject: [PATCH 09/18] Admin/XMover: Refactor -- analyze -- shard vs. zone

---
 .../admin/xmover/analyze/__init__.py          |   0
 .../xmover/{reporter.py => analyze/report.py} |   2 +-
 .../xmover/{analyzer.py => analyze/shard.py}  |  10 +-
 cratedb_toolkit/admin/xmover/analyze/zone.py  | 159 ++++++++++++++++++
 cratedb_toolkit/admin/xmover/cli.py           | 152 +----------------
 cratedb_toolkit/admin/xmover/recommender.py   |   2 +-
 pyproject.toml                                |   2 +-
 7 files changed, 178 insertions(+), 149 deletions(-)
 create mode 100644 cratedb_toolkit/admin/xmover/analyze/__init__.py
 rename cratedb_toolkit/admin/xmover/{reporter.py => analyze/report.py} (99%)
 rename cratedb_toolkit/admin/xmover/{analyzer.py => analyze/shard.py} (99%)
 create mode 100644 cratedb_toolkit/admin/xmover/analyze/zone.py

diff --git a/cratedb_toolkit/admin/xmover/analyze/__init__.py b/cratedb_toolkit/admin/xmover/analyze/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/cratedb_toolkit/admin/xmover/reporter.py b/cratedb_toolkit/admin/xmover/analyze/report.py
similarity index 99%
rename from cratedb_toolkit/admin/xmover/reporter.py
rename to cratedb_toolkit/admin/xmover/analyze/report.py
index 6912bc54..ec0e6e3b 100644
--- a/cratedb_toolkit/admin/xmover/reporter.py
+++ b/cratedb_toolkit/admin/xmover/analyze/report.py
@@ -5,7 +5,7 @@
 from rich.panel import Panel
 from rich.table import Table
 
-from cratedb_toolkit.admin.xmover.analyzer import ShardAnalyzer
+from cratedb_toolkit.admin.xmover.analyze.shard import ShardAnalyzer
 from cratedb_toolkit.admin.xmover.model import ShardMoveRecommendation, ShardMoveRequest, SizeCriteria
 from cratedb_toolkit.admin.xmover.util import format_percentage, format_size
 
diff --git a/cratedb_toolkit/admin/xmover/analyzer.py b/cratedb_toolkit/admin/xmover/analyze/shard.py
similarity index 99%
rename from cratedb_toolkit/admin/xmover/analyzer.py
rename to cratedb_toolkit/admin/xmover/analyze/shard.py
index 98af6a21..f4c179fd 100644
--- a/cratedb_toolkit/admin/xmover/analyzer.py
+++ b/cratedb_toolkit/admin/xmover/analyze/shard.py
@@ -7,8 +7,14 @@
 from collections import defaultdict
 from typing import Any, Dict, List, Optional, Set, Tuple, Union
 
-from .database import CrateDBClient
-from .model import DistributionStats, NodeInfo, RecommendationConstraints, ShardInfo, ShardMoveRecommendation
+from cratedb_toolkit.admin.xmover.database import CrateDBClient
+from cratedb_toolkit.admin.xmover.model import (
+    DistributionStats,
+    NodeInfo,
+    RecommendationConstraints,
+    ShardInfo,
+    ShardMoveRecommendation,
+)
 
 logger = logging.getLogger(__name__)
 
diff --git a/cratedb_toolkit/admin/xmover/analyze/zone.py b/cratedb_toolkit/admin/xmover/analyze/zone.py
new file mode 100644
index 00000000..5b208c2f
--- /dev/null
+++ b/cratedb_toolkit/admin/xmover/analyze/zone.py
@@ -0,0 +1,159 @@
+from typing import Dict, List, Optional
+
+from rich import box
+from rich.console import Console
+from rich.panel import Panel
+from rich.table import Table
+
+from cratedb_toolkit.admin.xmover.analyze.shard import ShardAnalyzer
+from cratedb_toolkit.admin.xmover.database import CrateDBClient
+from cratedb_toolkit.admin.xmover.model import ShardInfo
+
+console = Console()
+
+
+class ZoneReport:
+    def __init__(self, client: CrateDBClient):
+        self.client = client
+        self.analyzer = ShardAnalyzer(self.client)
+
+    def shard_balance(self, tolerance: float, table: Optional[str] = None):
+        """Check zone balance for shards"""
+        console.print(Panel.fit("[bold blue]Zone Balance Check[/bold blue]"))
+        console.print("[dim]Note: Analyzing all shards regardless of state for complete cluster view[/dim]")
+        console.print()
+
+        zone_stats = self.analyzer.check_zone_balance(table, tolerance)
+
+        if not zone_stats:
+            console.print("[yellow]No shards found for analysis[/yellow]")
+            return
+
+        # Calculate totals and targets
+        total_shards = sum(stats["TOTAL"] for stats in zone_stats.values())
+        zones = list(zone_stats.keys())
+        target_per_zone = total_shards // len(zones) if zones else 0
+        tolerance_range = (target_per_zone * (1 - tolerance / 100), target_per_zone * (1 + tolerance / 100))
+
+        balance_table = Table(title=f"Zone Balance Analysis (Target: {target_per_zone} ±{tolerance}%)", box=box.ROUNDED)
+        balance_table.add_column("Zone", style="cyan")
+        balance_table.add_column("Primary", justify="right", style="blue")
+        balance_table.add_column("Replica", justify="right", style="green")
+        balance_table.add_column("Total", justify="right", style="magenta")
+        balance_table.add_column("Status", style="bold")
+
+        for zone, stats in zone_stats.items():
+            total = stats["TOTAL"]
+
+            if tolerance_range[0] <= total <= tolerance_range[1]:
+                status = "[green]✓ Balanced[/green]"
+            elif total < tolerance_range[0]:
+                status = f"[yellow]⚠ Under ({total - target_per_zone:+})[/yellow]"
+            else:
+                status = f"[red]⚠ Over ({total - target_per_zone:+})[/red]"
+
+            balance_table.add_row(zone, str(stats["PRIMARY"]), str(stats["REPLICA"]), str(total), status)
+
+        console.print(balance_table)
+
+    def distribution_conflicts(self, shard_details: bool = False, table: Optional[str] = None):
+        """Detailed analysis of zone distribution and potential conflicts"""
+        console.print(Panel.fit("[bold blue]Detailed Zone Analysis[/bold blue]"))
+        console.print("[dim]Comprehensive zone distribution analysis for CrateDB cluster[/dim]")
+        console.print()
+
+        # Get all shards for analysis
+        shards = self.client.get_shards_info(table_name=table, for_analysis=True)
+
+        if not shards:
+            console.print("[yellow]No shards found for analysis[/yellow]")
+            return
+
+        # Organize by table and shard
+        tables: Dict[str, Dict[int, List[ShardInfo]]] = {}
+        for shard in shards:
+            table_key = f"{shard.schema_name}.{shard.table_name}"
+            if table_key not in tables:
+                tables[table_key] = {}
+
+            shard_key = shard.shard_id
+            if shard_key not in tables[table_key]:
+                tables[table_key][shard_key] = []
+
+            tables[table_key][shard_key].append(shard)
+
+        # Analyze each table
+        zone_conflicts = 0
+        under_replicated = 0
+
+        for table_name, table_shards in tables.items():
+            console.print(f"\n[bold cyan]Table: {table_name}[/bold cyan]")
+
+            # Create analysis table
+            analysis_table = Table(title=f"Shard Distribution for {table_name}", box=box.ROUNDED)
+            analysis_table.add_column("Shard ID", justify="right", style="magenta")
+            analysis_table.add_column("Primary Zone", style="blue")
+            analysis_table.add_column("Replica Zones", style="green")
+            analysis_table.add_column("Total Copies", justify="right", style="cyan")
+            analysis_table.add_column("Status", style="bold")
+
+            for shard_id, shard_copies in sorted(table_shards.items()):
+                primary_zone = "Unknown"
+                replica_zones = set()
+                total_copies = len(shard_copies)
+                zones_with_copies = set()
+
+                for shard_copy in shard_copies:
+                    zones_with_copies.add(shard_copy.zone)
+                    if shard_copy.is_primary:
+                        primary_zone = shard_copy.zone
+                    else:
+                        replica_zones.add(shard_copy.zone)
+
+                # Determine status
+                status_parts = []
+                if len(zones_with_copies) == 1:
+                    zone_conflicts += 1
+                    status_parts.append("[red]⚠ ZONE CONFLICT[/red]")
+
+                if total_copies < 2:  # Assuming we want at least 1 replica
+                    under_replicated += 1
+                    status_parts.append("[yellow]⚠ Under-replicated[/yellow]")
+
+                if not status_parts:
+                    status_parts.append("[green]✓ Good[/green]")
+
+                replica_zones_str = ", ".join(sorted(replica_zones)) if replica_zones else "None"
+
+                analysis_table.add_row(
+                    str(shard_id), primary_zone, replica_zones_str, str(total_copies), " ".join(status_parts)
+                )
+
+                # Show individual shard details if requested
+                if shard_details:
+                    for shard_copy in shard_copies:
+                        health_indicator = "✓" if shard_copy.routing_state == "STARTED" else "⚠"
+                        console.print(
+                            f"    {health_indicator} {shard_copy.shard_type} "
+                            f"on {shard_copy.node_name} ({shard_copy.zone}) - {shard_copy.routing_state}"
+                        )
+
+            console.print(analysis_table)
+
+        # Summary
+        console.print("\n[bold]Zone Analysis Summary:[/bold]")
+        console.print(f"  • Tables analyzed: [cyan]{len(tables)}[/cyan]")
+        console.print(f"  • Zone conflicts detected: [red]{zone_conflicts}[/red]")
+        console.print(f"  • Under-replicated shards: [yellow]{under_replicated}[/yellow]")
+
+        if zone_conflicts > 0:
+            console.print(f"\n[red]⚠ Found {zone_conflicts} zone conflicts that need attention![/red]")
+            console.print("[dim]Zone conflicts occur when all copies of a shard are in the same zone.[/dim]")
+            console.print("[dim]This violates CrateDB's zone-awareness and creates availability risks.[/dim]")
+
+        if under_replicated > 0:
+            console.print(f"\n[yellow]⚠ Found {under_replicated} under-replicated shards.[/yellow]")
+            console.print("[dim]Consider increasing replication for better availability.[/dim]")
+
+        if zone_conflicts == 0 and under_replicated == 0:
+            console.print("\n[green]✓ No critical zone distribution issues detected![/green]")
diff --git a/cratedb_toolkit/admin/xmover/cli.py b/cratedb_toolkit/admin/xmover/cli.py
index 76143895..159bcd0c 100644
--- a/cratedb_toolkit/admin/xmover/cli.py
+++ b/cratedb_toolkit/admin/xmover/cli.py
@@ -5,24 +5,22 @@
 """
 
 import sys
-from typing import Dict, List, Optional, cast
+from typing import List, Optional, cast
 
 import click
-from rich import box
 from rich.console import Console
 from rich.panel import Panel
-from rich.table import Table
 
+from cratedb_toolkit.admin.xmover.analyze.report import ShardReporter
+from cratedb_toolkit.admin.xmover.analyze.shard import ShardAnalyzer
+from cratedb_toolkit.admin.xmover.analyze.zone import ZoneReport
 from cratedb_toolkit.admin.xmover.model import (
     RecommendationConstraints,
-    ShardInfo,
     ShardMoveRequest,
     SizeCriteria,
 )
 from cratedb_toolkit.admin.xmover.recommender import Recommender
-from cratedb_toolkit.admin.xmover.reporter import ShardReporter
 
-from .analyzer import ShardAnalyzer
 from .database import CrateDBClient
 from .recovery import RecoveryMonitor, RecoveryOptions
 
@@ -182,44 +180,8 @@ def test_connection(ctx, connection_string: Optional[str]):
 def check_balance(ctx, table: Optional[str], tolerance: float):
     """Check zone balance for shards"""
     client = ctx.obj["client"]
-    analyzer = ShardAnalyzer(client)
-
-    console.print(Panel.fit("[bold blue]Zone Balance Check[/bold blue]"))
-    console.print("[dim]Note: Analyzing all shards regardless of state for complete cluster view[/dim]")
-    console.print()
-
-    zone_stats = analyzer.check_zone_balance(table, tolerance)
-
-    if not zone_stats:
-        console.print("[yellow]No shards found for analysis[/yellow]")
-        return
-
-    # Calculate totals and targets
-    total_shards = sum(stats["TOTAL"] for stats in zone_stats.values())
-    zones = list(zone_stats.keys())
-    target_per_zone = total_shards // len(zones) if zones else 0
-    tolerance_range = (target_per_zone * (1 - tolerance / 100), target_per_zone * (1 + tolerance / 100))
-
-    balance_table = Table(title=f"Zone Balance Analysis (Target: {target_per_zone} ±{tolerance}%)", box=box.ROUNDED)
-    balance_table.add_column("Zone", style="cyan")
-    balance_table.add_column("Primary", justify="right", style="blue")
-    balance_table.add_column("Replica", justify="right", style="green")
-    balance_table.add_column("Total", justify="right", style="magenta")
-    balance_table.add_column("Status", style="bold")
-
-    for zone, stats in zone_stats.items():
-        total = stats["TOTAL"]
-
-        if tolerance_range[0] <= total <= tolerance_range[1]:
-            status = "[green]✓ Balanced[/green]"
-        elif total < tolerance_range[0]:
-            status = f"[yellow]⚠ Under ({total - target_per_zone:+})[/yellow]"
-        else:
-            status = f"[red]⚠ Over ({total - target_per_zone:+})[/red]"
-
-        balance_table.add_row(zone, str(stats["PRIMARY"]), str(stats["REPLICA"]), str(total), status)
-
-    console.print(balance_table)
+    report = ZoneReport(client=client)
+    report.shard_balance(tolerance=tolerance, table=table)
 
 
 @main.command()
@@ -229,106 +191,8 @@ def check_balance(ctx, table: Optional[str], tolerance: float):
 def zone_analysis(ctx, table: Optional[str], show_shards: bool):
     """Detailed analysis of zone distribution and potential conflicts"""
     client = ctx.obj["client"]
-
-    console.print(Panel.fit("[bold blue]Detailed Zone Analysis[/bold blue]"))
-    console.print("[dim]Comprehensive zone distribution analysis for CrateDB cluster[/dim]")
-    console.print()
-
-    # Get all shards for analysis
-    shards = client.get_shards_info(table_name=table, for_analysis=True)
-
-    if not shards:
-        console.print("[yellow]No shards found for analysis[/yellow]")
-        return
-
-    # Organize by table and shard
-    tables: Dict[str, Dict[str, List[ShardInfo]]] = {}
-    for shard in shards:
-        table_key = f"{shard.schema_name}.{shard.table_name}"
-        if table_key not in tables:
-            tables[table_key] = {}
-
-        shard_key = shard.shard_id
-        if shard_key not in tables[table_key]:
-            tables[table_key][shard_key] = []
-
-        tables[table_key][shard_key].append(shard)
-
-    # Analyze each table
-    zone_conflicts = 0
-    under_replicated = 0
-
-    for table_name, table_shards in tables.items():
-        console.print(f"\n[bold cyan]Table: {table_name}[/bold cyan]")
-
-        # Create analysis table
-        analysis_table = Table(title=f"Shard Distribution for {table_name}", box=box.ROUNDED)
-        analysis_table.add_column("Shard ID", justify="right", style="magenta")
-        analysis_table.add_column("Primary Zone", style="blue")
-        analysis_table.add_column("Replica Zones", style="green")
-        analysis_table.add_column("Total Copies", justify="right", style="cyan")
-        analysis_table.add_column("Status", style="bold")
-
-        for shard_id, shard_copies in sorted(table_shards.items()):
-            primary_zone = "Unknown"
-            replica_zones = set()
-            total_copies = len(shard_copies)
-            zones_with_copies = set()
-
-            for shard_copy in shard_copies:
-                zones_with_copies.add(shard_copy.zone)
-                if shard_copy.is_primary:
-                    primary_zone = shard_copy.zone
-                else:
-                    replica_zones.add(shard_copy.zone)
-
-            # Determine status
-            status_parts = []
-            if len(zones_with_copies) == 1:
-                zone_conflicts += 1
-                status_parts.append("[red]⚠ ZONE CONFLICT[/red]")
-
-            if total_copies < 2:  # Assuming we want at least 1 replica
-                under_replicated += 1
-                status_parts.append("[yellow]⚠ Under-replicated[/yellow]")
-
-            if not status_parts:
-                status_parts.append("[green]✓ Good[/green]")
-
-            replica_zones_str = ", ".join(sorted(replica_zones)) if replica_zones else "None"
-
-            analysis_table.add_row(
-                str(shard_id), primary_zone, replica_zones_str, str(total_copies), " ".join(status_parts)
-            )
-
-            # Show individual shard details if requested
-            if show_shards:
-                for shard_copy in shard_copies:
-                    health_indicator = "✓" if shard_copy.routing_state == "STARTED" else "⚠"
-                    console.print(
-                        f"    {health_indicator} {shard_copy.shard_type} "
-                        f"on {shard_copy.node_name} ({shard_copy.zone}) - {shard_copy.routing_state}"
-                    )
-
-        console.print(analysis_table)
-
-    # Summary
-    console.print("\n[bold]Zone Analysis Summary:[/bold]")
-    console.print(f"  • Tables analyzed: [cyan]{len(tables)}[/cyan]")
-    console.print(f"  • Zone conflicts detected: [red]{zone_conflicts}[/red]")
-    console.print(f"  • Under-replicated shards: [yellow]{under_replicated}[/yellow]")
-
-    if zone_conflicts > 0:
-        console.print(f"\n[red]⚠ Found {zone_conflicts} zone conflicts that need attention![/red]")
-        console.print("[dim]Zone conflicts occur when all copies of a shard are in the same zone.[/dim]")
-        console.print("[dim]This violates CrateDB's zone-awareness and creates availability risks.[/dim]")
-
-    if under_replicated > 0:
-        console.print(f"\n[yellow]⚠ Found {under_replicated} under-replicated shards.[/yellow]")
-        console.print("[dim]Consider increasing replication for better availability.[/dim]")
-
-    if zone_conflicts == 0 and under_replicated == 0:
-        console.print("\n[green]✓ No critical zone distribution issues detected![/green]")
+    report = ZoneReport(client=client)
+    report.distribution_conflicts(shard_details=show_shards, table=table)
 
 
 @main.command()
diff --git a/cratedb_toolkit/admin/xmover/recommender.py b/cratedb_toolkit/admin/xmover/recommender.py
index 7e780600..ceeba003 100644
--- a/cratedb_toolkit/admin/xmover/recommender.py
+++ b/cratedb_toolkit/admin/xmover/recommender.py
@@ -5,7 +5,7 @@
 from rich.panel import Panel
 from rich.table import Table
 
-from .analyzer import ShardAnalyzer
+from .analyze.shard import ShardAnalyzer
 from .database import CrateDBClient
 from .model import RecommendationConstraints
 from .recovery import RecoveryMonitor, RecoveryOptions
diff --git a/pyproject.toml b/pyproject.toml
index bd3fa6be..f1f32203 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -324,7 +324,7 @@ lint.extend-ignore = [
   "S108",
 ]
 
-lint.per-file-ignores."cratedb_toolkit/admin/xmover/analyzer.py" = [ "T201" ] # Allow `print`
+lint.per-file-ignores."cratedb_toolkit/admin/xmover/analyze/shard.py" = [ "T201" ] # Allow `print`
 lint.per-file-ignores."cratedb_toolkit/retention/cli.py" = [ "T201" ]         # Allow `print`
 lint.per-file-ignores."cratedb_toolkit/sqlalchemy/__init__.py" = [ "F401" ]   # Allow `module´ imported but unused
 lint.per-file-ignores."doc/conf.py" = [ "A001", "ERA001" ]

From d2cff3f337a4b2cc6075b75d4ff23f88f8d928c4 Mon Sep 17 00:00:00 2001
From: Andreas Motl <andreas.motl@crate.io>
Date: Wed, 20 Aug 2025 20:46:53 +0200
Subject: [PATCH 10/18] Admin/XMover: Refactor -- tune

---
 cratedb_toolkit/admin/xmover/cli.py                    |  4 ++--
 cratedb_toolkit/admin/xmover/tune/__init__.py          |  0
 .../admin/xmover/{recommender.py => tune/recommend.py} | 10 +++++-----
 .../admin/xmover/{recovery.py => tune/recover.py}      |  0
 pyproject.toml                                         |  8 ++++----
 5 files changed, 11 insertions(+), 11 deletions(-)
 create mode 100644 cratedb_toolkit/admin/xmover/tune/__init__.py
 rename cratedb_toolkit/admin/xmover/{recommender.py => tune/recommend.py} (97%)
 rename cratedb_toolkit/admin/xmover/{recovery.py => tune/recover.py} (100%)

diff --git a/cratedb_toolkit/admin/xmover/cli.py b/cratedb_toolkit/admin/xmover/cli.py
index 159bcd0c..d3a55fa8 100644
--- a/cratedb_toolkit/admin/xmover/cli.py
+++ b/cratedb_toolkit/admin/xmover/cli.py
@@ -19,10 +19,10 @@
     ShardMoveRequest,
     SizeCriteria,
 )
-from cratedb_toolkit.admin.xmover.recommender import Recommender
+from cratedb_toolkit.admin.xmover.tune.recommend import Recommender
+from cratedb_toolkit.admin.xmover.tune.recover import RecoveryMonitor, RecoveryOptions
 
 from .database import CrateDBClient
-from .recovery import RecoveryMonitor, RecoveryOptions
 
 console = Console()
 
diff --git a/cratedb_toolkit/admin/xmover/tune/__init__.py b/cratedb_toolkit/admin/xmover/tune/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/cratedb_toolkit/admin/xmover/recommender.py b/cratedb_toolkit/admin/xmover/tune/recommend.py
similarity index 97%
rename from cratedb_toolkit/admin/xmover/recommender.py
rename to cratedb_toolkit/admin/xmover/tune/recommend.py
index ceeba003..ea427bd7 100644
--- a/cratedb_toolkit/admin/xmover/recommender.py
+++ b/cratedb_toolkit/admin/xmover/tune/recommend.py
@@ -5,11 +5,11 @@
 from rich.panel import Panel
 from rich.table import Table
 
-from .analyze.shard import ShardAnalyzer
-from .database import CrateDBClient
-from .model import RecommendationConstraints
-from .recovery import RecoveryMonitor, RecoveryOptions
-from .util import format_size
+from cratedb_toolkit.admin.xmover.analyze.shard import ShardAnalyzer
+from cratedb_toolkit.admin.xmover.database import CrateDBClient
+from cratedb_toolkit.admin.xmover.model import RecommendationConstraints
+from cratedb_toolkit.admin.xmover.tune.recover import RecoveryMonitor, RecoveryOptions
+from cratedb_toolkit.admin.xmover.util import format_size
 
 console = Console()
 
diff --git a/cratedb_toolkit/admin/xmover/recovery.py b/cratedb_toolkit/admin/xmover/tune/recover.py
similarity index 100%
rename from cratedb_toolkit/admin/xmover/recovery.py
rename to cratedb_toolkit/admin/xmover/tune/recover.py
diff --git a/pyproject.toml b/pyproject.toml
index f1f32203..e296dd5d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -325,11 +325,11 @@ lint.extend-ignore = [
 ]
 
 lint.per-file-ignores."cratedb_toolkit/admin/xmover/analyze/shard.py" = [ "T201" ] # Allow `print`
-lint.per-file-ignores."cratedb_toolkit/retention/cli.py" = [ "T201" ]         # Allow `print`
-lint.per-file-ignores."cratedb_toolkit/sqlalchemy/__init__.py" = [ "F401" ]   # Allow `module´ imported but unused
+lint.per-file-ignores."cratedb_toolkit/retention/cli.py" = [ "T201" ]              # Allow `print`
+lint.per-file-ignores."cratedb_toolkit/sqlalchemy/__init__.py" = [ "F401" ]        # Allow `module´ imported but unused
 lint.per-file-ignores."doc/conf.py" = [ "A001", "ERA001" ]
-lint.per-file-ignores."examples/*" = [ "ERA001", "F401", "T201", "T203" ]     # Allow `print` and `pprint`
-lint.per-file-ignores."tests/*" = [ "S101" ]                                  # Allow use of `assert`, and `print`.
+lint.per-file-ignores."examples/*" = [ "ERA001", "F401", "T201", "T203" ]          # Allow `print` and `pprint`
+lint.per-file-ignores."tests/*" = [ "S101" ]                                       # Allow use of `assert`, and `print`.
 lint.per-file-ignores."tests/adapter/test_rockset.py" = [ "E402" ]
 lint.per-file-ignores."tests/info/test_http.py" = [ "E402" ]
 

From a91c8a170fae604fc1e09a850e4bbee6bb22a52a Mon Sep 17 00:00:00 2001
From: Andreas Motl <andreas.motl@crate.io>
Date: Wed, 20 Aug 2025 20:52:56 +0200
Subject: [PATCH 11/18] Admin/XMover: Refactor -- util

---
 .../admin/xmover/analyze/report.py            |   2 +-
 cratedb_toolkit/admin/xmover/analyze/shard.py |   2 +-
 cratedb_toolkit/admin/xmover/analyze/zone.py  |   2 +-
 cratedb_toolkit/admin/xmover/cli.py           | 132 +----------------
 .../admin/xmover/tune/recommend.py            |   4 +-
 cratedb_toolkit/admin/xmover/tune/recover.py  |   4 +-
 cratedb_toolkit/admin/xmover/util/__init__.py |   0
 .../admin/xmover/{ => util}/database.py       |   0
 cratedb_toolkit/admin/xmover/util/error.py    | 133 ++++++++++++++++++
 .../admin/xmover/{util.py => util/format.py}  |   0
 10 files changed, 144 insertions(+), 135 deletions(-)
 create mode 100644 cratedb_toolkit/admin/xmover/util/__init__.py
 rename cratedb_toolkit/admin/xmover/{ => util}/database.py (100%)
 create mode 100644 cratedb_toolkit/admin/xmover/util/error.py
 rename cratedb_toolkit/admin/xmover/{util.py => util/format.py} (100%)

diff --git a/cratedb_toolkit/admin/xmover/analyze/report.py b/cratedb_toolkit/admin/xmover/analyze/report.py
index ec0e6e3b..eff0f399 100644
--- a/cratedb_toolkit/admin/xmover/analyze/report.py
+++ b/cratedb_toolkit/admin/xmover/analyze/report.py
@@ -7,7 +7,7 @@
 
 from cratedb_toolkit.admin.xmover.analyze.shard import ShardAnalyzer
 from cratedb_toolkit.admin.xmover.model import ShardMoveRecommendation, ShardMoveRequest, SizeCriteria
-from cratedb_toolkit.admin.xmover.util import format_percentage, format_size
+from cratedb_toolkit.admin.xmover.util.format import format_percentage, format_size
 
 console = Console()
 
diff --git a/cratedb_toolkit/admin/xmover/analyze/shard.py b/cratedb_toolkit/admin/xmover/analyze/shard.py
index f4c179fd..334b394e 100644
--- a/cratedb_toolkit/admin/xmover/analyze/shard.py
+++ b/cratedb_toolkit/admin/xmover/analyze/shard.py
@@ -7,7 +7,6 @@
 from collections import defaultdict
 from typing import Any, Dict, List, Optional, Set, Tuple, Union
 
-from cratedb_toolkit.admin.xmover.database import CrateDBClient
 from cratedb_toolkit.admin.xmover.model import (
     DistributionStats,
     NodeInfo,
@@ -15,6 +14,7 @@
     ShardInfo,
     ShardMoveRecommendation,
 )
+from cratedb_toolkit.admin.xmover.util.database import CrateDBClient
 
 logger = logging.getLogger(__name__)
 
diff --git a/cratedb_toolkit/admin/xmover/analyze/zone.py b/cratedb_toolkit/admin/xmover/analyze/zone.py
index 5b208c2f..18d032eb 100644
--- a/cratedb_toolkit/admin/xmover/analyze/zone.py
+++ b/cratedb_toolkit/admin/xmover/analyze/zone.py
@@ -6,8 +6,8 @@
 from rich.table import Table
 
 from cratedb_toolkit.admin.xmover.analyze.shard import ShardAnalyzer
-from cratedb_toolkit.admin.xmover.database import CrateDBClient
 from cratedb_toolkit.admin.xmover.model import ShardInfo
+from cratedb_toolkit.admin.xmover.util.database import CrateDBClient
 
 console = Console()
 
diff --git a/cratedb_toolkit/admin/xmover/cli.py b/cratedb_toolkit/admin/xmover/cli.py
index d3a55fa8..d9bb5627 100644
--- a/cratedb_toolkit/admin/xmover/cli.py
+++ b/cratedb_toolkit/admin/xmover/cli.py
@@ -5,11 +5,10 @@
 """
 
 import sys
-from typing import List, Optional, cast
+from typing import Optional
 
 import click
 from rich.console import Console
-from rich.panel import Panel
 
 from cratedb_toolkit.admin.xmover.analyze.report import ShardReporter
 from cratedb_toolkit.admin.xmover.analyze.shard import ShardAnalyzer
@@ -21,8 +20,8 @@
 )
 from cratedb_toolkit.admin.xmover.tune.recommend import Recommender
 from cratedb_toolkit.admin.xmover.tune.recover import RecoveryMonitor, RecoveryOptions
-
-from .database import CrateDBClient
+from cratedb_toolkit.admin.xmover.util.database import CrateDBClient
+from cratedb_toolkit.admin.xmover.util.error import explain_cratedb_error
 
 console = Console()
 
@@ -236,130 +235,7 @@ def explain_error(ctx, error_message: Optional[str]):
 
     Example: xmover explain-error "NO(a copy of this shard is already allocated to this node)"
     """
-    console.print(Panel.fit("[bold blue]CrateDB Error Message Decoder[/bold blue]"))
-    console.print("[dim]Helps decode and troubleshoot CrateDB shard allocation errors[/dim]")
-    console.print()
-
-    if not error_message:
-        console.print("Please paste the CrateDB error message (press Enter twice when done):")
-        lines: List[str] = []
-        while True:
-            try:
-                line = input()
-                if line.strip() == "" and lines:
-                    break
-                lines.append(line)
-            except (EOFError, KeyboardInterrupt):
-                break
-        error_message = "\n".join(lines)
-
-    if not error_message.strip():
-        console.print("[yellow]No error message provided[/yellow]")
-        return
-
-    console.print("[dim]Analyzing error message...[/dim]")
-    console.print()
-
-    # Common CrateDB allocation error patterns and solutions
-    error_patterns = [
-        {
-            "pattern": "a copy of this shard is already allocated to this node",
-            "title": "Node Already Has Shard Copy",
-            "explanation": "The target node already contains a copy (primary or replica) of this shard.",
-            "solutions": [
-                "Choose a different target node that doesn't have this shard",
-                "Use 'xmover zone-analysis --show-shards' to see current distribution",
-                "Verify the shard ID and table name are correct",
-            ],
-            "prevention": "Always check current shard locations before moving",
-        },
-        {
-            "pattern": "there are too many copies of the shard allocated to nodes with attribute",
-            "title": "Zone Allocation Limit Exceeded",
-            "explanation": "CrateDB's zone awareness prevents too many copies in the same zone.",
-            "solutions": [
-                "Move the shard to a different availability zone",
-                "Check zone balance with 'xmover check-balance'",
-                "Ensure target zone doesn't already have copies of this shard",
-            ],
-            "prevention": "Use 'xmover recommend' which respects zone constraints",
-        },
-        {
-            "pattern": "not enough disk space",
-            "title": "Insufficient Disk Space",
-            "explanation": "The target node doesn't have enough free disk space for the shard.",
-            "solutions": [
-                "Free up space on the target node",
-                "Choose a node with more available capacity",
-                "Check available space with 'xmover analyze'",
-            ],
-            "prevention": "Use '--min-free-space' parameter in recommendations",
-        },
-        {
-            "pattern": "shard recovery limit",
-            "title": "Recovery Limit Exceeded",
-            "explanation": "Too many shards are currently being moved/recovered simultaneously.",
-            "solutions": [
-                "Wait for current recoveries to complete",
-                "Check recovery status in CrateDB admin UI",
-                "Reduce concurrent recoveries in cluster settings",
-            ],
-            "prevention": "Move shards gradually, monitor recovery progress",
-        },
-        {
-            "pattern": "allocation is disabled",
-            "title": "Allocation Disabled",
-            "explanation": "Shard allocation is temporarily disabled in the cluster.",
-            "solutions": [
-                "Re-enable allocation: PUT /_cluster/settings "
-                '{"persistent":{"cluster.routing.allocation.enable":"all"}}',
-                "Check if allocation was disabled for maintenance",
-                "Verify cluster health before re-enabling",
-            ],
-            "prevention": "Check allocation status before performing moves",
-        },
-    ]
-
-    # Find matching patterns
-    matches = []
-    error_lower = error_message.lower()
-
-    for pattern_info in error_patterns:
-        if cast(str, pattern_info["pattern"]).lower() in error_lower:
-            matches.append(pattern_info)
-
-    if matches:
-        for i, match in enumerate(matches):
-            if i > 0:
-                console.print("\n" + "─" * 60 + "\n")
-
-            console.print(f"[bold red]🚨 {match['title']}[/bold red]")
-            console.print(f"[yellow]📝 Explanation:[/yellow] {match['explanation']}")
-            console.print()
-
-            console.print("[green]💡 Solutions:[/green]")
-            for j, solution in enumerate(match["solutions"], 1):
-                console.print(f"  {j}. {solution}")
-            console.print()
-
-            console.print(f"[blue]🛡️ Prevention:[/blue] {match['prevention']}")
-    else:
-        console.print("[yellow]⚠ No specific pattern match found[/yellow]")
-        console.print()
-        console.print("[bold]General Troubleshooting Steps:[/bold]")
-        console.print("1. Check current shard distribution: [cyan]xmover analyze[/cyan]")
-        console.print(
-            "2. Validate the specific move: [cyan]xmover validate-move schema.table shard_id from_node to_node[/cyan]"
-        )
-        console.print("3. Check zone conflicts: [cyan]xmover zone-analysis --show-shards[/cyan]")
-        console.print("4. Verify node capacity: [cyan]xmover analyze[/cyan]")
-        console.print("5. Review CrateDB documentation on shard allocation")
-
-    console.print()
-    console.print("[dim]💡 Tip: Use 'xmover validate-move' to check moves before execution[/dim]")
-    console.print(
-        "[dim]📚 For more help: https://crate.io/docs/crate/reference/en/latest/admin/system-information.html[/dim]"
-    )
+    explain_cratedb_error(error_message)
 
 
 @main.command()
diff --git a/cratedb_toolkit/admin/xmover/tune/recommend.py b/cratedb_toolkit/admin/xmover/tune/recommend.py
index ea427bd7..704912b1 100644
--- a/cratedb_toolkit/admin/xmover/tune/recommend.py
+++ b/cratedb_toolkit/admin/xmover/tune/recommend.py
@@ -6,10 +6,10 @@
 from rich.table import Table
 
 from cratedb_toolkit.admin.xmover.analyze.shard import ShardAnalyzer
-from cratedb_toolkit.admin.xmover.database import CrateDBClient
 from cratedb_toolkit.admin.xmover.model import RecommendationConstraints
 from cratedb_toolkit.admin.xmover.tune.recover import RecoveryMonitor, RecoveryOptions
-from cratedb_toolkit.admin.xmover.util import format_size
+from cratedb_toolkit.admin.xmover.util.database import CrateDBClient
+from cratedb_toolkit.admin.xmover.util.format import format_size
 
 console = Console()
 
diff --git a/cratedb_toolkit/admin/xmover/tune/recover.py b/cratedb_toolkit/admin/xmover/tune/recover.py
index 958aed15..d88a295f 100644
--- a/cratedb_toolkit/admin/xmover/tune/recover.py
+++ b/cratedb_toolkit/admin/xmover/tune/recover.py
@@ -5,9 +5,9 @@
 
 from rich.console import Console
 
-from cratedb_toolkit.admin.xmover.database import CrateDBClient
 from cratedb_toolkit.admin.xmover.model import RecoveryInfo
-from cratedb_toolkit.admin.xmover.util import format_translog_info
+from cratedb_toolkit.admin.xmover.util.database import CrateDBClient
+from cratedb_toolkit.admin.xmover.util.format import format_translog_info
 
 console = Console()
 
diff --git a/cratedb_toolkit/admin/xmover/util/__init__.py b/cratedb_toolkit/admin/xmover/util/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/cratedb_toolkit/admin/xmover/database.py b/cratedb_toolkit/admin/xmover/util/database.py
similarity index 100%
rename from cratedb_toolkit/admin/xmover/database.py
rename to cratedb_toolkit/admin/xmover/util/database.py
diff --git a/cratedb_toolkit/admin/xmover/util/error.py b/cratedb_toolkit/admin/xmover/util/error.py
new file mode 100644
index 00000000..11dd5f39
--- /dev/null
+++ b/cratedb_toolkit/admin/xmover/util/error.py
@@ -0,0 +1,133 @@
+from typing import List, Optional, cast
+
+from rich.console import Console
+from rich.panel import Panel
+
+console = Console()
+
+
+def explain_cratedb_error(error_message: Optional[str]):
+    console.print(Panel.fit("[bold blue]CrateDB Error Message Decoder[/bold blue]"))
+    console.print("[dim]Helps decode and troubleshoot CrateDB shard allocation errors[/dim]")
+    console.print()
+
+    if not error_message:
+        console.print("Please paste the CrateDB error message (press Enter twice when done):")
+        lines: List[str] = []
+        while True:
+            try:
+                line = input()
+                if line.strip() == "" and lines:
+                    break
+                lines.append(line)
+            except (EOFError, KeyboardInterrupt):
+                break
+        error_message = "\n".join(lines)
+
+    if not error_message.strip():
+        console.print("[yellow]No error message provided[/yellow]")
+        return
+
+    console.print("[dim]Analyzing error message...[/dim]")
+    console.print()
+
+    # Common CrateDB allocation error patterns and solutions
+    error_patterns = [
+        {
+            "pattern": "a copy of this shard is already allocated to this node",
+            "title": "Node Already Has Shard Copy",
+            "explanation": "The target node already contains a copy (primary or replica) of this shard.",
+            "solutions": [
+                "Choose a different target node that doesn't have this shard",
+                "Use 'xmover zone-analysis --show-shards' to see current distribution",
+                "Verify the shard ID and table name are correct",
+            ],
+            "prevention": "Always check current shard locations before moving",
+        },
+        {
+            "pattern": "there are too many copies of the shard allocated to nodes with attribute",
+            "title": "Zone Allocation Limit Exceeded",
+            "explanation": "CrateDB's zone awareness prevents too many copies in the same zone.",
+            "solutions": [
+                "Move the shard to a different availability zone",
+                "Check zone balance with 'xmover check-balance'",
+                "Ensure target zone doesn't already have copies of this shard",
+            ],
+            "prevention": "Use 'xmover recommend' which respects zone constraints",
+        },
+        {
+            "pattern": "not enough disk space",
+            "title": "Insufficient Disk Space",
+            "explanation": "The target node doesn't have enough free disk space for the shard.",
+            "solutions": [
+                "Free up space on the target node",
+                "Choose a node with more available capacity",
+                "Check available space with 'xmover analyze'",
+            ],
+            "prevention": "Use '--min-free-space' parameter in recommendations",
+        },
+        {
+            "pattern": "shard recovery limit",
+            "title": "Recovery Limit Exceeded",
+            "explanation": "Too many shards are currently being moved/recovered simultaneously.",
+            "solutions": [
+                "Wait for current recoveries to complete",
+                "Check recovery status in CrateDB admin UI",
+                "Reduce concurrent recoveries in cluster settings",
+            ],
+            "prevention": "Move shards gradually, monitor recovery progress",
+        },
+        {
+            "pattern": "allocation is disabled",
+            "title": "Allocation Disabled",
+            "explanation": "Shard allocation is temporarily disabled in the cluster.",
+            "solutions": [
+                "Re-enable allocation: PUT /_cluster/settings "
+                '{"persistent":{"cluster.routing.allocation.enable":"all"}}',
+                "Check if allocation was disabled for maintenance",
+                "Verify cluster health before re-enabling",
+            ],
+            "prevention": "Check allocation status before performing moves",
+        },
+    ]
+
+    # Find matching patterns
+    matches = []
+    error_lower = error_message.lower()
+
+    for pattern_info in error_patterns:
+        if cast(str, pattern_info["pattern"]).lower() in error_lower:
+            matches.append(pattern_info)
+
+    if matches:
+        for i, match in enumerate(matches):
+            if i > 0:
+                console.print("\n" + "─" * 60 + "\n")
+
+            console.print(f"[bold red]🚨 {match['title']}[/bold red]")
+            console.print(f"[yellow]📝 Explanation:[/yellow] {match['explanation']}")
+            console.print()
+
+            console.print("[green]💡 Solutions:[/green]")
+            for j, solution in enumerate(match["solutions"], 1):
+                console.print(f"  {j}. {solution}")
+            console.print()
+
+            console.print(f"[blue]🛡️ Prevention:[/blue] {match['prevention']}")
+    else:
+        console.print("[yellow]⚠ No specific pattern match found[/yellow]")
+        console.print()
+        console.print("[bold]General Troubleshooting Steps:[/bold]")
+        console.print("1. Check current shard distribution: [cyan]xmover analyze[/cyan]")
+        console.print(
+            "2. Validate the specific move: [cyan]xmover validate-move schema.table shard_id from_node to_node[/cyan]"
+        )
+        console.print("3. Check zone conflicts: [cyan]xmover zone-analysis --show-shards[/cyan]")
+        console.print("4. Verify node capacity: [cyan]xmover analyze[/cyan]")
+        console.print("5. Review CrateDB documentation on shard allocation")
+
+    console.print()
+    console.print("[dim]💡 Tip: Use 'xmover validate-move' to check moves before execution[/dim]")
+    console.print(
+        "[dim]📚 For more help: https://crate.io/docs/crate/reference/en/latest/admin/system-information.html[/dim]"
+    )
diff --git a/cratedb_toolkit/admin/xmover/util.py b/cratedb_toolkit/admin/xmover/util/format.py
similarity index 100%
rename from cratedb_toolkit/admin/xmover/util.py
rename to cratedb_toolkit/admin/xmover/util/format.py

From 2fb889d26ba9ebbbfd2bfc79c2a6752f31a75861 Mon Sep 17 00:00:00 2001
From: Andreas Motl <andreas.motl@crate.io>
Date: Thu, 21 Aug 2025 02:24:58 +0200
Subject: [PATCH 12/18] Admin/XMover: Naming things -- `analyze` vs.
 `operational`

---
 .../xmover/{analyze => analysis}/__init__.py  |  0
 .../xmover/{analyze => analysis}/report.py    | 73 +---------------
 .../xmover/{analyze => analysis}/shard.py     |  0
 .../xmover/{analyze => analysis}/zone.py      |  2 +-
 cratedb_toolkit/admin/xmover/cli.py           | 17 ++--
 .../xmover/{tune => operational}/__init__.py  |  0
 .../admin/xmover/operational/candidates.py    | 84 +++++++++++++++++++
 .../xmover/{tune => operational}/recommend.py |  4 +-
 .../xmover/{tune => operational}/recover.py   |  0
 pyproject.toml                                | 10 +--
 10 files changed, 103 insertions(+), 87 deletions(-)
 rename cratedb_toolkit/admin/xmover/{analyze => analysis}/__init__.py (100%)
 rename cratedb_toolkit/admin/xmover/{analyze => analysis}/report.py (78%)
 rename cratedb_toolkit/admin/xmover/{analyze => analysis}/shard.py (100%)
 rename cratedb_toolkit/admin/xmover/{analyze => analysis}/zone.py (99%)
 rename cratedb_toolkit/admin/xmover/{tune => operational}/__init__.py (100%)
 create mode 100644 cratedb_toolkit/admin/xmover/operational/candidates.py
 rename cratedb_toolkit/admin/xmover/{tune => operational}/recommend.py (99%)
 rename cratedb_toolkit/admin/xmover/{tune => operational}/recover.py (100%)

diff --git a/cratedb_toolkit/admin/xmover/analyze/__init__.py b/cratedb_toolkit/admin/xmover/analysis/__init__.py
similarity index 100%
rename from cratedb_toolkit/admin/xmover/analyze/__init__.py
rename to cratedb_toolkit/admin/xmover/analysis/__init__.py
diff --git a/cratedb_toolkit/admin/xmover/analyze/report.py b/cratedb_toolkit/admin/xmover/analysis/report.py
similarity index 78%
rename from cratedb_toolkit/admin/xmover/analyze/report.py
rename to cratedb_toolkit/admin/xmover/analysis/report.py
index eff0f399..f6faf879 100644
--- a/cratedb_toolkit/admin/xmover/analyze/report.py
+++ b/cratedb_toolkit/admin/xmover/analysis/report.py
@@ -5,8 +5,8 @@
 from rich.panel import Panel
 from rich.table import Table
 
-from cratedb_toolkit.admin.xmover.analyze.shard import ShardAnalyzer
-from cratedb_toolkit.admin.xmover.model import ShardMoveRecommendation, ShardMoveRequest, SizeCriteria
+from cratedb_toolkit.admin.xmover.analysis.shard import ShardAnalyzer
+from cratedb_toolkit.admin.xmover.model import ShardMoveRecommendation, ShardMoveRequest
 from cratedb_toolkit.admin.xmover.util.format import format_percentage, format_size
 
 console = Console()
@@ -124,75 +124,6 @@ def distribution(self, table: str = None):
 
             console.print(table_summary)
 
-    def movement_candidates(self, criteria: SizeCriteria, limit: int):
-        """
-        Find shard candidates for movement based on size criteria
-
-        Results are sorted by nodes with least available space first,
-        then by shard size (smallest first) for easier moves.
-        """
-
-        console.print(
-            Panel.fit(f"[bold blue]Finding Moveable Shards ({criteria.min_size}-{criteria.max_size}GB)[/bold blue]")
-        )
-
-        if criteria.source_node:
-            console.print(f"[dim]Filtering: Only showing candidates from source node '{criteria.source_node}'[/dim]")
-
-        # Find moveable candidates (only healthy shards suitable for operations)
-        candidates = self.analyzer.find_moveable_shards(criteria.min_size, criteria.max_size, criteria.table_name)
-
-        # Filter by node if specified
-        if criteria.source_node:
-            candidates = [c for c in candidates if c.node_name == criteria.source_node]
-
-        if not candidates:
-            if criteria.source_node:
-                console.print(
-                    f"[yellow]No moveable shards found on node '{criteria.source_node}' "
-                    f"in the specified size range.[/yellow]"
-                )
-                console.print("[dim]Tip: Try different size ranges or remove --node filter to see all candidates[/dim]")
-            else:
-                console.print("[yellow]No moveable shards found in the specified size range.[/yellow]")
-            return
-
-        # Show limited results
-        shown_candidates = candidates[:limit]
-
-        candidates_table = Table(
-            title=f"Moveable Shard Candidates (showing {len(shown_candidates)} of {len(candidates)})", box=box.ROUNDED
-        )
-        candidates_table.add_column("Table", style="cyan")
-        candidates_table.add_column("Shard ID", justify="right", style="magenta")
-        candidates_table.add_column("Type", style="blue")
-        candidates_table.add_column("Node", style="green")
-        candidates_table.add_column("Zone", style="yellow")
-        candidates_table.add_column("Size", justify="right", style="red")
-        candidates_table.add_column("Node Free Space", justify="right", style="white")
-        candidates_table.add_column("Documents", justify="right", style="dim")
-
-        # Create a mapping of node names to available space for display
-        node_space_map = {node.name: node.available_space_gb for node in self.analyzer.nodes}
-
-        for shard in shown_candidates:
-            node_free_space = node_space_map.get(shard.node_name, 0)
-            candidates_table.add_row(
-                f"{shard.schema_name}.{shard.table_name}",
-                str(shard.shard_id),
-                shard.shard_type,
-                shard.node_name,
-                shard.zone,
-                format_size(shard.size_gb),
-                format_size(node_free_space),
-                f"{shard.num_docs:,}",
-            )
-
-        console.print(candidates_table)
-
-        if len(candidates) > limit:
-            console.print(f"\n[dim]... and {len(candidates) - limit} more candidates[/dim]")
-
     def validate_move(self, request: ShardMoveRequest):
         # Parse schema and table
         if "." not in request.schema_table:
diff --git a/cratedb_toolkit/admin/xmover/analyze/shard.py b/cratedb_toolkit/admin/xmover/analysis/shard.py
similarity index 100%
rename from cratedb_toolkit/admin/xmover/analyze/shard.py
rename to cratedb_toolkit/admin/xmover/analysis/shard.py
diff --git a/cratedb_toolkit/admin/xmover/analyze/zone.py b/cratedb_toolkit/admin/xmover/analysis/zone.py
similarity index 99%
rename from cratedb_toolkit/admin/xmover/analyze/zone.py
rename to cratedb_toolkit/admin/xmover/analysis/zone.py
index 18d032eb..718d88f0 100644
--- a/cratedb_toolkit/admin/xmover/analyze/zone.py
+++ b/cratedb_toolkit/admin/xmover/analysis/zone.py
@@ -5,7 +5,7 @@
 from rich.panel import Panel
 from rich.table import Table
 
-from cratedb_toolkit.admin.xmover.analyze.shard import ShardAnalyzer
+from cratedb_toolkit.admin.xmover.analysis.shard import ShardAnalyzer
 from cratedb_toolkit.admin.xmover.model import ShardInfo
 from cratedb_toolkit.admin.xmover.util.database import CrateDBClient
 
diff --git a/cratedb_toolkit/admin/xmover/cli.py b/cratedb_toolkit/admin/xmover/cli.py
index d9bb5627..53cce1f9 100644
--- a/cratedb_toolkit/admin/xmover/cli.py
+++ b/cratedb_toolkit/admin/xmover/cli.py
@@ -10,16 +10,17 @@
 import click
 from rich.console import Console
 
-from cratedb_toolkit.admin.xmover.analyze.report import ShardReporter
-from cratedb_toolkit.admin.xmover.analyze.shard import ShardAnalyzer
-from cratedb_toolkit.admin.xmover.analyze.zone import ZoneReport
+from cratedb_toolkit.admin.xmover.analysis.report import ShardReporter
+from cratedb_toolkit.admin.xmover.analysis.shard import ShardAnalyzer
+from cratedb_toolkit.admin.xmover.analysis.zone import ZoneReport
 from cratedb_toolkit.admin.xmover.model import (
     RecommendationConstraints,
     ShardMoveRequest,
     SizeCriteria,
 )
-from cratedb_toolkit.admin.xmover.tune.recommend import Recommender
-from cratedb_toolkit.admin.xmover.tune.recover import RecoveryMonitor, RecoveryOptions
+from cratedb_toolkit.admin.xmover.operational.candidates import CandidateFinder
+from cratedb_toolkit.admin.xmover.operational.recommend import Recommender
+from cratedb_toolkit.admin.xmover.operational.recover import RecoveryMonitor, RecoveryOptions
 from cratedb_toolkit.admin.xmover.util.database import CrateDBClient
 from cratedb_toolkit.admin.xmover.util.error import explain_cratedb_error
 
@@ -72,8 +73,8 @@ def find_candidates(ctx, min_size: float, max_size: float, limit: int, table: Op
     """Find shard candidates for movement based on size criteria"""
     client = ctx.obj["client"]
     analyzer = ShardAnalyzer(client)
-    reporter = ShardReporter(analyzer)
-    reporter.movement_candidates(
+    finder = CandidateFinder(analyzer)
+    finder.movement_candidates(
         criteria=SizeCriteria(
             min_size=min_size,
             max_size=max_size,
@@ -209,7 +210,7 @@ def validate_move(ctx, schema_table: str, shard_id: int, from_node: str, to_node
     FROM_NODE: Source node name
     TO_NODE: Target node name
 
-    Example: xmover validate-move CUROV.maddoxxFormfactor 4 data-hot-1 data-hot-3
+    Example: xmover validate-move CUROV.maddoxxS 4 data-hot-1 data-hot-3
     """
     client = ctx.obj["client"]
     analyzer = ShardAnalyzer(client)
diff --git a/cratedb_toolkit/admin/xmover/tune/__init__.py b/cratedb_toolkit/admin/xmover/operational/__init__.py
similarity index 100%
rename from cratedb_toolkit/admin/xmover/tune/__init__.py
rename to cratedb_toolkit/admin/xmover/operational/__init__.py
diff --git a/cratedb_toolkit/admin/xmover/operational/candidates.py b/cratedb_toolkit/admin/xmover/operational/candidates.py
new file mode 100644
index 00000000..dd7d4930
--- /dev/null
+++ b/cratedb_toolkit/admin/xmover/operational/candidates.py
@@ -0,0 +1,84 @@
+from rich import box
+from rich.console import Console
+from rich.panel import Panel
+from rich.table import Table
+
+from cratedb_toolkit.admin.xmover.analysis.shard import ShardAnalyzer
+from cratedb_toolkit.admin.xmover.model import SizeCriteria
+from cratedb_toolkit.admin.xmover.util.format import format_size
+
+console = Console()
+
+
+class CandidateFinder:
+    def __init__(self, analyzer: ShardAnalyzer):
+        self.analyzer = analyzer
+
+    def movement_candidates(self, criteria: SizeCriteria, limit: int):
+        """
+        Find shard candidates for movement based on size criteria
+
+        Results are sorted by nodes with least available space first,
+        then by shard size (smallest first) for easier moves.
+        """
+
+        console.print(
+            Panel.fit(f"[bold blue]Finding Moveable Shards ({criteria.min_size}-{criteria.max_size}GB)[/bold blue]")
+        )
+
+        if criteria.source_node:
+            console.print(f"[dim]Filtering: Only showing candidates from source node '{criteria.source_node}'[/dim]")
+
+        # Find moveable candidates (only healthy shards suitable for operations)
+        candidates = self.analyzer.find_moveable_shards(criteria.min_size, criteria.max_size, criteria.table_name)
+
+        # Filter by node if specified
+        if criteria.source_node:
+            candidates = [c for c in candidates if c.node_name == criteria.source_node]
+
+        if not candidates:
+            if criteria.source_node:
+                console.print(
+                    f"[yellow]No moveable shards found on node '{criteria.source_node}' "
+                    f"in the specified size range.[/yellow]"
+                )
+                console.print("[dim]Tip: Try different size ranges or remove --node filter to see all candidates[/dim]")
+            else:
+                console.print("[yellow]No moveable shards found in the specified size range.[/yellow]")
+            return
+
+        # Show limited results
+        shown_candidates = candidates[:limit]
+
+        candidates_table = Table(
+            title=f"Moveable Shard Candidates (showing {len(shown_candidates)} of {len(candidates)})", box=box.ROUNDED
+        )
+        candidates_table.add_column("Table", style="cyan")
+        candidates_table.add_column("Shard ID", justify="right", style="magenta")
+        candidates_table.add_column("Type", style="blue")
+        candidates_table.add_column("Node", style="green")
+        candidates_table.add_column("Zone", style="yellow")
+        candidates_table.add_column("Size", justify="right", style="red")
+        candidates_table.add_column("Node Free Space", justify="right", style="white")
+        candidates_table.add_column("Documents", justify="right", style="dim")
+
+        # Create a mapping of node names to available space for display
+        node_space_map = {node.name: node.available_space_gb for node in self.analyzer.nodes}
+
+        for shard in shown_candidates:
+            node_free_space = node_space_map.get(shard.node_name, 0)
+            candidates_table.add_row(
+                f"{shard.schema_name}.{shard.table_name}",
+                str(shard.shard_id),
+                shard.shard_type,
+                shard.node_name,
+                shard.zone,
+                format_size(shard.size_gb),
+                format_size(node_free_space),
+                f"{shard.num_docs:,}",
+            )
+
+        console.print(candidates_table)
+
+        if len(candidates) > limit:
+            console.print(f"\n[dim]... and {len(candidates) - limit} more candidates[/dim]")
diff --git a/cratedb_toolkit/admin/xmover/tune/recommend.py b/cratedb_toolkit/admin/xmover/operational/recommend.py
similarity index 99%
rename from cratedb_toolkit/admin/xmover/tune/recommend.py
rename to cratedb_toolkit/admin/xmover/operational/recommend.py
index 704912b1..d7ff0a07 100644
--- a/cratedb_toolkit/admin/xmover/tune/recommend.py
+++ b/cratedb_toolkit/admin/xmover/operational/recommend.py
@@ -5,9 +5,9 @@
 from rich.panel import Panel
 from rich.table import Table
 
-from cratedb_toolkit.admin.xmover.analyze.shard import ShardAnalyzer
+from cratedb_toolkit.admin.xmover.analysis.shard import ShardAnalyzer
 from cratedb_toolkit.admin.xmover.model import RecommendationConstraints
-from cratedb_toolkit.admin.xmover.tune.recover import RecoveryMonitor, RecoveryOptions
+from cratedb_toolkit.admin.xmover.operational.recover import RecoveryMonitor, RecoveryOptions
 from cratedb_toolkit.admin.xmover.util.database import CrateDBClient
 from cratedb_toolkit.admin.xmover.util.format import format_size
 
diff --git a/cratedb_toolkit/admin/xmover/tune/recover.py b/cratedb_toolkit/admin/xmover/operational/recover.py
similarity index 100%
rename from cratedb_toolkit/admin/xmover/tune/recover.py
rename to cratedb_toolkit/admin/xmover/operational/recover.py
diff --git a/pyproject.toml b/pyproject.toml
index e296dd5d..fa3309d5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -324,12 +324,12 @@ lint.extend-ignore = [
   "S108",
 ]
 
-lint.per-file-ignores."cratedb_toolkit/admin/xmover/analyze/shard.py" = [ "T201" ] # Allow `print`
-lint.per-file-ignores."cratedb_toolkit/retention/cli.py" = [ "T201" ]              # Allow `print`
-lint.per-file-ignores."cratedb_toolkit/sqlalchemy/__init__.py" = [ "F401" ]        # Allow `module´ imported but unused
+lint.per-file-ignores."cratedb_toolkit/admin/xmover/analysis/shard.py" = [ "T201" ] # Allow `print`
+lint.per-file-ignores."cratedb_toolkit/retention/cli.py" = [ "T201" ]               # Allow `print`
+lint.per-file-ignores."cratedb_toolkit/sqlalchemy/__init__.py" = [ "F401" ]         # Allow `module´ imported but unused
 lint.per-file-ignores."doc/conf.py" = [ "A001", "ERA001" ]
-lint.per-file-ignores."examples/*" = [ "ERA001", "F401", "T201", "T203" ]          # Allow `print` and `pprint`
-lint.per-file-ignores."tests/*" = [ "S101" ]                                       # Allow use of `assert`, and `print`.
+lint.per-file-ignores."examples/*" = [ "ERA001", "F401", "T201", "T203" ]           # Allow `print` and `pprint`
+lint.per-file-ignores."tests/*" = [ "S101" ]                                        # Allow use of `assert`, and `print`.
 lint.per-file-ignores."tests/adapter/test_rockset.py" = [ "E402" ]
 lint.per-file-ignores."tests/info/test_http.py" = [ "E402" ]
 

From d009652bdd31a6cc8b7865ce44eeaa3ce228e237 Mon Sep 17 00:00:00 2001
From: Andreas Motl <andreas.motl@crate.io>
Date: Thu, 21 Aug 2025 02:42:24 +0200
Subject: [PATCH 13/18] Admin/XMover: Naming things. OO.

---
 .../admin/xmover/analysis/report.py           | 256 ------------------
 .../admin/xmover/analysis/shard.py            | 145 +++++++++-
 cratedb_toolkit/admin/xmover/cli.py           |  27 +-
 cratedb_toolkit/admin/xmover/model.py         |  10 +-
 .../admin/xmover/operational/recommend.py     | 177 ++++++++++--
 5 files changed, 305 insertions(+), 310 deletions(-)
 delete mode 100644 cratedb_toolkit/admin/xmover/analysis/report.py

diff --git a/cratedb_toolkit/admin/xmover/analysis/report.py b/cratedb_toolkit/admin/xmover/analysis/report.py
deleted file mode 100644
index f6faf879..00000000
--- a/cratedb_toolkit/admin/xmover/analysis/report.py
+++ /dev/null
@@ -1,256 +0,0 @@
-from typing import Any, Dict
-
-from rich import box
-from rich.console import Console
-from rich.panel import Panel
-from rich.table import Table
-
-from cratedb_toolkit.admin.xmover.analysis.shard import ShardAnalyzer
-from cratedb_toolkit.admin.xmover.model import ShardMoveRecommendation, ShardMoveRequest
-from cratedb_toolkit.admin.xmover.util.format import format_percentage, format_size
-
-console = Console()
-
-
-class ShardReporter:
-    def __init__(self, analyzer: ShardAnalyzer):
-        self.analyzer = analyzer
-
-    def distribution(self, table: str = None):
-        """Analyze current shard distribution across nodes and zones"""
-        console.print(Panel.fit("[bold blue]CrateDB Cluster Analysis[/bold blue]"))
-
-        # Get cluster overview (includes all shards for complete analysis)
-        overview: Dict[str, Any] = self.analyzer.get_cluster_overview()
-
-        # Cluster summary table
-        summary_table = Table(title="Cluster Summary", box=box.ROUNDED)
-        summary_table.add_column("Metric", style="cyan")
-        summary_table.add_column("Value", style="magenta")
-
-        summary_table.add_row("Nodes", str(overview["nodes"]))
-        summary_table.add_row("Availability Zones", str(overview["zones"]))
-        summary_table.add_row("Total Shards", str(overview["total_shards"]))
-        summary_table.add_row("Primary Shards", str(overview["primary_shards"]))
-        summary_table.add_row("Replica Shards", str(overview["replica_shards"]))
-        summary_table.add_row("Total Size", format_size(overview["total_size_gb"]))
-
-        console.print(summary_table)
-        console.print()
-
-        # Disk watermarks table
-        if overview.get("watermarks"):
-            watermarks_table = Table(title="Disk Allocation Watermarks", box=box.ROUNDED)
-            watermarks_table.add_column("Setting", style="cyan")
-            watermarks_table.add_column("Value", style="magenta")
-
-            watermarks = overview["watermarks"]
-            watermarks_table.add_row("Low Watermark", str(watermarks.get("low", "Not set")))
-            watermarks_table.add_row("High Watermark", str(watermarks.get("high", "Not set")))
-            watermarks_table.add_row("Flood Stage", str(watermarks.get("flood_stage", "Not set")))
-            watermarks_table.add_row(
-                "Enable for Single Node", str(watermarks.get("enable_for_single_data_node", "Not set"))
-            )
-
-            console.print(watermarks_table)
-            console.print()
-
-        # Zone distribution table
-        zone_table = Table(title="Zone Distribution", box=box.ROUNDED)
-        zone_table.add_column("Zone", style="cyan")
-        zone_table.add_column("Shards", justify="right", style="magenta")
-        zone_table.add_column("Percentage", justify="right", style="green")
-
-        total_shards = overview["total_shards"]
-        for zone, count in overview["zone_distribution"].items():
-            percentage = (count / total_shards * 100) if total_shards > 0 else 0
-            zone_table.add_row(zone, str(count), f"{percentage:.1f}%")
-
-        console.print(zone_table)
-        console.print()
-
-        # Node health table
-        node_table = Table(title="Node Health", box=box.ROUNDED)
-        node_table.add_column("Node", style="cyan")
-        node_table.add_column("Zone", style="blue")
-        node_table.add_column("Shards", justify="right", style="magenta")
-        node_table.add_column("Size", justify="right", style="green")
-        node_table.add_column("Disk Usage", justify="right")
-        node_table.add_column("Available Space", justify="right", style="green")
-        node_table.add_column("Until Low WM", justify="right", style="yellow")
-        node_table.add_column("Until High WM", justify="right", style="red")
-
-        for node_info in overview["node_health"]:
-            # Format watermark remaining capacity
-            low_wm_remaining = (
-                format_size(node_info["remaining_to_low_watermark_gb"])
-                if node_info["remaining_to_low_watermark_gb"] > 0
-                else "[red]Exceeded[/red]"
-            )
-            high_wm_remaining = (
-                format_size(node_info["remaining_to_high_watermark_gb"])
-                if node_info["remaining_to_high_watermark_gb"] > 0
-                else "[red]Exceeded[/red]"
-            )
-
-            node_table.add_row(
-                node_info["name"],
-                node_info["zone"],
-                str(node_info["shards"]),
-                format_size(node_info["size_gb"]),
-                format_percentage(node_info["disk_usage_percent"]),
-                format_size(node_info["available_space_gb"]),
-                low_wm_remaining,
-                high_wm_remaining,
-            )
-
-        console.print(node_table)
-
-        # Table-specific analysis if requested
-        if table:
-            console.print()
-            console.print(Panel.fit(f"[bold blue]Analysis for table: {table}[/bold blue]"))
-
-            stats = self.analyzer.analyze_distribution(table)
-
-            table_summary = Table(title=f"Table {table} Distribution", box=box.ROUNDED)
-            table_summary.add_column("Metric", style="cyan")
-            table_summary.add_column("Value", style="magenta")
-
-            table_summary.add_row("Total Shards", str(stats.total_shards))
-            table_summary.add_row("Total Size", format_size(stats.total_size_gb))
-            table_summary.add_row("Zone Balance Score", f"{stats.zone_balance_score:.1f}/100")
-            table_summary.add_row("Node Balance Score", f"{stats.node_balance_score:.1f}/100")
-
-            console.print(table_summary)
-
-    def validate_move(self, request: ShardMoveRequest):
-        # Parse schema and table
-        if "." not in request.schema_table:
-            console.print("[red]Error: Schema and table must be in format 'schema.table'[/red]")
-            return
-
-        schema_name, table_name = request.schema_table.split(".", 1)
-
-        console.print(Panel.fit("[bold blue]Validating Shard Move[/bold blue]"))
-        console.print(
-            f"[dim]Move: {schema_name}.{table_name}[{request.shard_id}] "
-            f"from {request.from_node} to {request.to_node}[/dim]"
-        )
-        console.print()
-
-        # Find the nodes
-        from_node_info = None
-        to_node_info = None
-        for node in self.analyzer.nodes:
-            if node.name == request.from_node:
-                from_node_info = node
-            if node.name == request.to_node:
-                to_node_info = node
-
-        if not from_node_info:
-            console.print(f"[red]✗ Source node '{request.from_node}' not found in cluster[/red]")
-            return
-
-        if not to_node_info:
-            console.print(f"[red]✗ Target node '{request.to_node}' not found in cluster[/red]")
-            return
-
-        # Find the specific shard
-        target_shard = None
-        for shard in self.analyzer.shards:
-            if (
-                shard.schema_name == schema_name
-                and shard.table_name == table_name
-                and shard.shard_id == request.shard_id
-                and shard.node_name == request.from_node
-            ):
-                target_shard = shard
-                break
-
-        if not target_shard:
-            console.print(f"[red]✗ Shard {request.shard_id} not found on node {request.from_node}[/red]")
-            console.print("[dim]Use 'xmover find-candidates' to see available shards[/dim]")
-            return
-
-        # Create a move recommendation for validation
-        recommendation = ShardMoveRecommendation(
-            table_name=table_name,
-            schema_name=schema_name,
-            shard_id=request.shard_id,
-            from_node=request.from_node,
-            to_node=request.to_node,
-            from_zone=from_node_info.zone,
-            to_zone=to_node_info.zone,
-            shard_type=target_shard.shard_type,
-            size_gb=target_shard.size_gb,
-            reason="Manual validation",
-        )
-
-        # Display shard details
-        details_table = Table(title="Shard Details", box=box.ROUNDED)
-        details_table.add_column("Property", style="cyan")
-        details_table.add_column("Value", style="magenta")
-
-        details_table.add_row("Table", f"{schema_name}.{table_name}")
-        details_table.add_row("Shard ID", str(request.shard_id))
-        details_table.add_row("Type", target_shard.shard_type)
-        details_table.add_row("Size", format_size(target_shard.size_gb))
-        details_table.add_row("Documents", f"{target_shard.num_docs:,}")
-        details_table.add_row("State", target_shard.state)
-        details_table.add_row("Routing State", target_shard.routing_state)
-        details_table.add_row("From Node", f"{request.from_node} ({from_node_info.zone})")
-        details_table.add_row("To Node", f"{request.to_node} ({to_node_info.zone})")
-        details_table.add_row("Zone Change", "Yes" if from_node_info.zone != to_node_info.zone else "No")
-
-        console.print(details_table)
-        console.print()
-
-        # Perform comprehensive validation
-        is_safe, safety_msg = self.analyzer.validate_move_safety(
-            recommendation, max_disk_usage_percent=request.max_disk_usage
-        )
-
-        if is_safe:
-            console.print("[green]✓ VALIDATION PASSED - Move appears safe[/green]")
-            console.print(f"[green]✓ {safety_msg}[/green]")
-            console.print()
-
-            # Show the SQL command
-            console.print(Panel.fit("[bold green]Ready to Execute[/bold green]"))
-            console.print("[dim]# Copy and paste this command to execute the move[/dim]")
-            console.print()
-            console.print(f"{recommendation.to_sql()}")
-            console.print()
-            console.print("[dim]# Monitor shard health after execution[/dim]")
-            console.print(
-                "[dim]# Check with: SELECT * FROM sys.shards "
-                "WHERE table_name = '{table_name}' AND id = {shard_id};[/dim]"
-            )
-        else:
-            console.print("[red]✗ VALIDATION FAILED - Move not safe[/red]")
-            console.print(f"[red]✗ {safety_msg}[/red]")
-            console.print()
-
-            # Provide troubleshooting guidance
-            if "zone conflict" in safety_msg.lower():
-                console.print("[yellow]💡 Troubleshooting Zone Conflicts:[/yellow]")
-                console.print("  • Check current shard distribution: xmover zone-analysis --show-shards")
-                console.print("  • Try moving to a different zone")
-                console.print("  • Verify cluster has proper zone-awareness configuration")
-            elif "node conflict" in safety_msg.lower():
-                console.print("[yellow]💡 Troubleshooting Node Conflicts:[/yellow]")
-                console.print("  • The target node already has a copy of this shard")
-                console.print("  • Choose a different target node")
-                console.print("  • Check shard distribution: xmover analyze")
-            elif "space" in safety_msg.lower():
-                console.print("[yellow]💡 Troubleshooting Space Issues:[/yellow]")
-                console.print("  • Free up space on the target node")
-                console.print("  • Choose a node with more available capacity")
-                console.print("  • Check node capacity: xmover analyze")
-            elif "usage" in safety_msg.lower():
-                console.print("[yellow]💡 Troubleshooting High Disk Usage:[/yellow]")
-                console.print("  • Wait for target node disk usage to decrease")
-                console.print("  • Choose a node with lower disk usage")
-                console.print("  • Check cluster health: xmover analyze")
-                console.print("  • Consider using --max-disk-usage option for urgent moves")
diff --git a/cratedb_toolkit/admin/xmover/analysis/shard.py b/cratedb_toolkit/admin/xmover/analysis/shard.py
index 334b394e..f6f24b6b 100644
--- a/cratedb_toolkit/admin/xmover/analysis/shard.py
+++ b/cratedb_toolkit/admin/xmover/analysis/shard.py
@@ -7,17 +7,25 @@
 from collections import defaultdict
 from typing import Any, Dict, List, Optional, Set, Tuple, Union
 
+from rich import box
+from rich.console import Console
+from rich.panel import Panel
+from rich.table import Table
+
 from cratedb_toolkit.admin.xmover.model import (
     DistributionStats,
     NodeInfo,
-    RecommendationConstraints,
     ShardInfo,
-    ShardMoveRecommendation,
+    ShardRelocationConstraints,
+    ShardRelocationResponse,
 )
 from cratedb_toolkit.admin.xmover.util.database import CrateDBClient
+from cratedb_toolkit.admin.xmover.util.format import format_percentage, format_size
 
 logger = logging.getLogger(__name__)
 
+console = Console()
+
 
 class ShardAnalyzer:
     """Analyzer for CrateDB shard distribution and rebalancing"""
@@ -181,8 +189,8 @@ def find_nodes_with_capacity(
         return available_nodes
 
     def generate_rebalancing_recommendations(
-        self, constraints: RecommendationConstraints
-    ) -> List[ShardMoveRecommendation]:
+        self, constraints: ShardRelocationConstraints
+    ) -> List[ShardRelocationResponse]:
         """Generate recommendations for rebalancing shards
 
         Args:
@@ -191,7 +199,7 @@ def generate_rebalancing_recommendations(
             source_node: If specified, only generate recommendations for shards on this node
             max_disk_usage_percent: Maximum disk usage percentage for target nodes
         """
-        recommendations: List[ShardMoveRecommendation] = []
+        recommendations: List[ShardRelocationResponse] = []
 
         # Get moveable shards (only healthy ones for actual operations)
         moveable_shards = self.find_moveable_shards(constraints.min_size, constraints.max_size, constraints.table_name)
@@ -279,7 +287,7 @@ def generate_rebalancing_recommendations(
             safe_target_nodes = []
             for candidate_node in target_nodes:
                 # Create a temporary recommendation to test safety
-                temp_rec = ShardMoveRecommendation(
+                temp_rec = ShardRelocationResponse(
                     table_name=shard.table_name,
                     schema_name=shard.schema_name,
                     shard_id=shard.shard_id,
@@ -341,7 +349,7 @@ def generate_rebalancing_recommendations(
                 if shard.zone == target_node.zone:
                     reason = f"Node balancing within {shard.zone}"
 
-            recommendation = ShardMoveRecommendation(
+            recommendation = ShardRelocationResponse(
                 table_name=shard.table_name,
                 schema_name=shard.schema_name,
                 shard_id=shard.shard_id,
@@ -363,7 +371,7 @@ def generate_rebalancing_recommendations(
         return recommendations
 
     def validate_move_safety(
-        self, recommendation: ShardMoveRecommendation, max_disk_usage_percent: float = 90.0
+        self, recommendation: ShardRelocationResponse, max_disk_usage_percent: float = 90.0
     ) -> Tuple[bool, str]:
         """Validate that a move recommendation is safe to execute"""
         # Find target node (with caching)
@@ -409,7 +417,7 @@ def _get_node_cached(self, node_name: str):
         self._node_lookup_cache[node_name] = target_node
         return target_node
 
-    def _check_zone_conflict_cached(self, recommendation: ShardMoveRecommendation) -> Optional[str]:
+    def _check_zone_conflict_cached(self, recommendation: ShardRelocationResponse) -> Optional[str]:
         """Check zone conflicts with caching"""
         # Create cache key: table, shard, target zone
         target_zone = self._get_node_zone(recommendation.to_node)
@@ -467,7 +475,7 @@ def _find_nodes_with_capacity_cached(
         self._target_nodes_cache[cache_key] = result
         return result
 
-    def _check_zone_conflict(self, recommendation: ShardMoveRecommendation) -> Optional[str]:
+    def _check_zone_conflict(self, recommendation: ShardRelocationResponse) -> Optional[str]:
         """Check if moving this shard would create a zone conflict
 
         Performs comprehensive zone safety analysis:
@@ -753,7 +761,7 @@ def plan_node_decommission(self, node_name: str, min_free_space_gb: float = 100.
             safe_targets = []
             for target in potential_targets:
                 # Create a temporary recommendation to test zone safety
-                temp_rec = ShardMoveRecommendation(
+                temp_rec = ShardRelocationResponse(
                     table_name=shard.table_name,
                     schema_name=shard.schema_name,
                     shard_id=shard.shard_id,
@@ -778,7 +786,7 @@ def plan_node_decommission(self, node_name: str, min_free_space_gb: float = 100.
                 # Choose the target with most available space
                 best_target = safe_targets[0]
                 move_plan.append(
-                    ShardMoveRecommendation(
+                    ShardRelocationResponse(
                         table_name=shard.table_name,
                         schema_name=shard.schema_name,
                         shard_id=shard.shard_id,
@@ -826,3 +834,116 @@ def plan_node_decommission(self, node_name: str, min_free_space_gb: float = 100.
             "estimated_time_hours": len(move_plan) * 0.1,  # Rough estimate: 6 minutes per move
             "message": "Decommission plan generated" if feasible else "Decommission not currently feasible",
         }
+
+
+class ShardReporter:
+    def __init__(self, analyzer: ShardAnalyzer):
+        self.analyzer = analyzer
+
+    def distribution(self, table: str = None):
+        """Analyze current shard distribution across nodes and zones"""
+        console.print(Panel.fit("[bold blue]CrateDB Cluster Analysis[/bold blue]"))
+
+        # Get cluster overview (includes all shards for complete analysis)
+        overview: Dict[str, Any] = self.analyzer.get_cluster_overview()
+
+        # Cluster summary table
+        summary_table = Table(title="Cluster Summary", box=box.ROUNDED)
+        summary_table.add_column("Metric", style="cyan")
+        summary_table.add_column("Value", style="magenta")
+
+        summary_table.add_row("Nodes", str(overview["nodes"]))
+        summary_table.add_row("Availability Zones", str(overview["zones"]))
+        summary_table.add_row("Total Shards", str(overview["total_shards"]))
+        summary_table.add_row("Primary Shards", str(overview["primary_shards"]))
+        summary_table.add_row("Replica Shards", str(overview["replica_shards"]))
+        summary_table.add_row("Total Size", format_size(overview["total_size_gb"]))
+
+        console.print(summary_table)
+        console.print()
+
+        # Disk watermarks table
+        if overview.get("watermarks"):
+            watermarks_table = Table(title="Disk Allocation Watermarks", box=box.ROUNDED)
+            watermarks_table.add_column("Setting", style="cyan")
+            watermarks_table.add_column("Value", style="magenta")
+
+            watermarks = overview["watermarks"]
+            watermarks_table.add_row("Low Watermark", str(watermarks.get("low", "Not set")))
+            watermarks_table.add_row("High Watermark", str(watermarks.get("high", "Not set")))
+            watermarks_table.add_row("Flood Stage", str(watermarks.get("flood_stage", "Not set")))
+            watermarks_table.add_row(
+                "Enable for Single Node", str(watermarks.get("enable_for_single_data_node", "Not set"))
+            )
+
+            console.print(watermarks_table)
+            console.print()
+
+        # Zone distribution table
+        zone_table = Table(title="Zone Distribution", box=box.ROUNDED)
+        zone_table.add_column("Zone", style="cyan")
+        zone_table.add_column("Shards", justify="right", style="magenta")
+        zone_table.add_column("Percentage", justify="right", style="green")
+
+        total_shards = overview["total_shards"]
+        for zone, count in overview["zone_distribution"].items():
+            percentage = (count / total_shards * 100) if total_shards > 0 else 0
+            zone_table.add_row(zone, str(count), f"{percentage:.1f}%")
+
+        console.print(zone_table)
+        console.print()
+
+        # Node health table
+        node_table = Table(title="Node Health", box=box.ROUNDED)
+        node_table.add_column("Node", style="cyan")
+        node_table.add_column("Zone", style="blue")
+        node_table.add_column("Shards", justify="right", style="magenta")
+        node_table.add_column("Size", justify="right", style="green")
+        node_table.add_column("Disk Usage", justify="right")
+        node_table.add_column("Available Space", justify="right", style="green")
+        node_table.add_column("Until Low WM", justify="right", style="yellow")
+        node_table.add_column("Until High WM", justify="right", style="red")
+
+        for node_info in overview["node_health"]:
+            # Format watermark remaining capacity
+            low_wm_remaining = (
+                format_size(node_info["remaining_to_low_watermark_gb"])
+                if node_info["remaining_to_low_watermark_gb"] > 0
+                else "[red]Exceeded[/red]"
+            )
+            high_wm_remaining = (
+                format_size(node_info["remaining_to_high_watermark_gb"])
+                if node_info["remaining_to_high_watermark_gb"] > 0
+                else "[red]Exceeded[/red]"
+            )
+
+            node_table.add_row(
+                node_info["name"],
+                node_info["zone"],
+                str(node_info["shards"]),
+                format_size(node_info["size_gb"]),
+                format_percentage(node_info["disk_usage_percent"]),
+                format_size(node_info["available_space_gb"]),
+                low_wm_remaining,
+                high_wm_remaining,
+            )
+
+        console.print(node_table)
+
+        # Table-specific analysis if requested
+        if table:
+            console.print()
+            console.print(Panel.fit(f"[bold blue]Analysis for table: {table}[/bold blue]"))
+
+            stats = self.analyzer.analyze_distribution(table)
+
+            table_summary = Table(title=f"Table {table} Distribution", box=box.ROUNDED)
+            table_summary.add_column("Metric", style="cyan")
+            table_summary.add_column("Value", style="magenta")
+
+            table_summary.add_row("Total Shards", str(stats.total_shards))
+            table_summary.add_row("Total Size", format_size(stats.total_size_gb))
+            table_summary.add_row("Zone Balance Score", f"{stats.zone_balance_score:.1f}/100")
+            table_summary.add_row("Node Balance Score", f"{stats.node_balance_score:.1f}/100")
+
+            console.print(table_summary)
diff --git a/cratedb_toolkit/admin/xmover/cli.py b/cratedb_toolkit/admin/xmover/cli.py
index 53cce1f9..f77259b6 100644
--- a/cratedb_toolkit/admin/xmover/cli.py
+++ b/cratedb_toolkit/admin/xmover/cli.py
@@ -10,16 +10,15 @@
 import click
 from rich.console import Console
 
-from cratedb_toolkit.admin.xmover.analysis.report import ShardReporter
-from cratedb_toolkit.admin.xmover.analysis.shard import ShardAnalyzer
+from cratedb_toolkit.admin.xmover.analysis.shard import ShardAnalyzer, ShardReporter
 from cratedb_toolkit.admin.xmover.analysis.zone import ZoneReport
 from cratedb_toolkit.admin.xmover.model import (
-    RecommendationConstraints,
-    ShardMoveRequest,
+    ShardRelocationConstraints,
+    ShardRelocationRequest,
     SizeCriteria,
 )
 from cratedb_toolkit.admin.xmover.operational.candidates import CandidateFinder
-from cratedb_toolkit.admin.xmover.operational.recommend import Recommender
+from cratedb_toolkit.admin.xmover.operational.recommend import ShardRelocationRecommender
 from cratedb_toolkit.admin.xmover.operational.recover import RecoveryMonitor, RecoveryOptions
 from cratedb_toolkit.admin.xmover.util.database import CrateDBClient
 from cratedb_toolkit.admin.xmover.util.error import explain_cratedb_error
@@ -128,9 +127,9 @@ def recommend(
     auto_execute: bool,
 ):
     """Generate shard movement recommendations for rebalancing"""
-    recommender = Recommender(
-        client=ctx.obj["client"],
-        constraints=RecommendationConstraints(
+    recommender = ShardRelocationRecommender(client=ctx.obj["client"])
+    recommender.execute(
+        constraints=ShardRelocationConstraints(
             table_name=table,
             source_node=node,
             min_size=min_size,
@@ -141,8 +140,10 @@ def recommend(
             max_disk_usage=max_disk_usage,
             prioritize_space=prioritize_space,
         ),
+        auto_execute=auto_execute,
+        validate=validate,
+        dry_run=dry_run,
     )
-    recommender.start(auto_execute=auto_execute, validate=validate, dry_run=dry_run)
 
 
 @main.command()
@@ -212,11 +213,9 @@ def validate_move(ctx, schema_table: str, shard_id: int, from_node: str, to_node
 
     Example: xmover validate-move CUROV.maddoxxS 4 data-hot-1 data-hot-3
     """
-    client = ctx.obj["client"]
-    analyzer = ShardAnalyzer(client)
-    reporter = ShardReporter(analyzer)
-    reporter.validate_move(
-        request=ShardMoveRequest(
+    recommender = ShardRelocationRecommender(client=ctx.obj["client"])
+    recommender.validate(
+        request=ShardRelocationRequest(
             schema_table=schema_table,
             shard_id=shard_id,
             from_node=from_node,
diff --git a/cratedb_toolkit/admin/xmover/model.py b/cratedb_toolkit/admin/xmover/model.py
index 056d834c..34e43f77 100644
--- a/cratedb_toolkit/admin/xmover/model.py
+++ b/cratedb_toolkit/admin/xmover/model.py
@@ -103,7 +103,7 @@ def translog_percentage(self) -> float:
 
 
 @dataclass
-class ShardMoveRequest:
+class ShardRelocationRequest:
     """Request for moving a shard"""
 
     schema_table: str
@@ -114,7 +114,7 @@ class ShardMoveRequest:
 
 
 @dataclass
-class ShardMoveRecommendation:
+class ShardRelocationResponse:
     """Recommendation for moving a shard"""
 
     table_name: str
@@ -174,9 +174,9 @@ class SizeCriteria:
 
 
 @dataclasses.dataclass
-class RecommendationConstraints:
-    min_size: float = 40.0
-    max_size: float = 60.0
+class ShardRelocationConstraints:
+    min_size: float = SizeCriteria().min_size
+    max_size: float = SizeCriteria().max_size
     table_name: Optional[str] = None
     source_node: Optional[str] = None
     zone_tolerance: float = 10.0
diff --git a/cratedb_toolkit/admin/xmover/operational/recommend.py b/cratedb_toolkit/admin/xmover/operational/recommend.py
index d7ff0a07..8eb37b13 100644
--- a/cratedb_toolkit/admin/xmover/operational/recommend.py
+++ b/cratedb_toolkit/admin/xmover/operational/recommend.py
@@ -6,7 +6,11 @@
 from rich.table import Table
 
 from cratedb_toolkit.admin.xmover.analysis.shard import ShardAnalyzer
-from cratedb_toolkit.admin.xmover.model import RecommendationConstraints
+from cratedb_toolkit.admin.xmover.model import (
+    ShardRelocationConstraints,
+    ShardRelocationRequest,
+    ShardRelocationResponse,
+)
 from cratedb_toolkit.admin.xmover.operational.recover import RecoveryMonitor, RecoveryOptions
 from cratedb_toolkit.admin.xmover.util.database import CrateDBClient
 from cratedb_toolkit.admin.xmover.util.format import format_size
@@ -14,14 +18,145 @@
 console = Console()
 
 
-class Recommender:
-    def __init__(self, client: CrateDBClient, constraints: RecommendationConstraints):
+class ShardRelocationRecommender:
+    def __init__(self, client: CrateDBClient):
         self.client = client
-        self.constraints = constraints
         self.analyzer = ShardAnalyzer(self.client)
 
-    def start(
+    def validate(self, request: ShardRelocationRequest):
+        # Parse schema and table
+        if "." not in request.schema_table:
+            console.print("[red]Error: Schema and table must be in format 'schema.table'[/red]")
+            return
+
+        schema_name, table_name = request.schema_table.split(".", 1)
+
+        console.print(Panel.fit("[bold blue]Validating Shard Move[/bold blue]"))
+        console.print(
+            f"[dim]Move: {schema_name}.{table_name}[{request.shard_id}] "
+            f"from {request.from_node} to {request.to_node}[/dim]"
+        )
+        console.print()
+
+        # Find the nodes
+        from_node_info = None
+        to_node_info = None
+        for node in self.analyzer.nodes:
+            if node.name == request.from_node:
+                from_node_info = node
+            if node.name == request.to_node:
+                to_node_info = node
+
+        if not from_node_info:
+            console.print(f"[red]✗ Source node '{request.from_node}' not found in cluster[/red]")
+            return
+
+        if not to_node_info:
+            console.print(f"[red]✗ Target node '{request.to_node}' not found in cluster[/red]")
+            return
+
+        # Find the specific shard
+        target_shard = None
+        for shard in self.analyzer.shards:
+            if (
+                shard.schema_name == schema_name
+                and shard.table_name == table_name
+                and shard.shard_id == request.shard_id
+                and shard.node_name == request.from_node
+            ):
+                target_shard = shard
+                break
+
+        if not target_shard:
+            console.print(f"[red]✗ Shard {request.shard_id} not found on node {request.from_node}[/red]")
+            console.print("[dim]Use 'xmover find-candidates' to see available shards[/dim]")
+            return
+
+        # Create a move recommendation for validation
+        recommendation = ShardRelocationResponse(
+            table_name=table_name,
+            schema_name=schema_name,
+            shard_id=request.shard_id,
+            from_node=request.from_node,
+            to_node=request.to_node,
+            from_zone=from_node_info.zone,
+            to_zone=to_node_info.zone,
+            shard_type=target_shard.shard_type,
+            size_gb=target_shard.size_gb,
+            reason="Manual validation",
+        )
+
+        # Display shard details
+        details_table = Table(title="Shard Details", box=box.ROUNDED)
+        details_table.add_column("Property", style="cyan")
+        details_table.add_column("Value", style="magenta")
+
+        details_table.add_row("Table", f"{schema_name}.{table_name}")
+        details_table.add_row("Shard ID", str(request.shard_id))
+        details_table.add_row("Type", target_shard.shard_type)
+        details_table.add_row("Size", format_size(target_shard.size_gb))
+        details_table.add_row("Documents", f"{target_shard.num_docs:,}")
+        details_table.add_row("State", target_shard.state)
+        details_table.add_row("Routing State", target_shard.routing_state)
+        details_table.add_row("From Node", f"{request.from_node} ({from_node_info.zone})")
+        details_table.add_row("To Node", f"{request.to_node} ({to_node_info.zone})")
+        details_table.add_row("Zone Change", "Yes" if from_node_info.zone != to_node_info.zone else "No")
+
+        console.print(details_table)
+        console.print()
+
+        # Perform comprehensive validation
+        is_safe, safety_msg = self.analyzer.validate_move_safety(
+            recommendation, max_disk_usage_percent=request.max_disk_usage
+        )
+
+        if is_safe:
+            console.print("[green]✓ VALIDATION PASSED - Move appears safe[/green]")
+            console.print(f"[green]✓ {safety_msg}[/green]")
+            console.print()
+
+            # Show the SQL command
+            console.print(Panel.fit("[bold green]Ready to Execute[/bold green]"))
+            console.print("[dim]# Copy and paste this command to execute the move[/dim]")
+            console.print()
+            console.print(f"{recommendation.to_sql()}")
+            console.print()
+            console.print("[dim]# Monitor shard health after execution[/dim]")
+            console.print(
+                "[dim]# Check with: SELECT * FROM sys.shards "
+                "WHERE table_name = '{table_name}' AND id = {shard_id};[/dim]"
+            )
+        else:
+            console.print("[red]✗ VALIDATION FAILED - Move not safe[/red]")
+            console.print(f"[red]✗ {safety_msg}[/red]")
+            console.print()
+
+            # Provide troubleshooting guidance
+            if "zone conflict" in safety_msg.lower():
+                console.print("[yellow]💡 Troubleshooting Zone Conflicts:[/yellow]")
+                console.print("  • Check current shard distribution: xmover zone-analysis --show-shards")
+                console.print("  • Try moving to a different zone")
+                console.print("  • Verify cluster has proper zone-awareness configuration")
+            elif "node conflict" in safety_msg.lower():
+                console.print("[yellow]💡 Troubleshooting Node Conflicts:[/yellow]")
+                console.print("  • The target node already has a copy of this shard")
+                console.print("  • Choose a different target node")
+                console.print("  • Check shard distribution: xmover analyze")
+            elif "space" in safety_msg.lower():
+                console.print("[yellow]💡 Troubleshooting Space Issues:[/yellow]")
+                console.print("  • Free up space on the target node")
+                console.print("  • Choose a node with more available capacity")
+                console.print("  • Check node capacity: xmover analyze")
+            elif "usage" in safety_msg.lower():
+                console.print("[yellow]💡 Troubleshooting High Disk Usage:[/yellow]")
+                console.print("  • Wait for target node disk usage to decrease")
+                console.print("  • Choose a node with lower disk usage")
+                console.print("  • Check cluster health: xmover analyze")
+                console.print("  • Consider using --max-disk-usage option for urgent moves")
+
+    def execute(
         self,
+        constraints: ShardRelocationConstraints,
         auto_execute: bool,
         validate: bool,
         dry_run: bool,
@@ -41,17 +176,17 @@ def start(
         )
         console.print("[dim]Note: Only analyzing healthy shards (STARTED + 100% recovered) for safe operations[/dim]")
         console.print("[dim]Zone conflict detection: Prevents moves that would violate CrateDB's zone awareness[/dim]")
-        if self.constraints.prioritize_space:
+        if constraints.prioritize_space:
             console.print("[dim]Mode: Prioritizing available space over zone balancing[/dim]")
         else:
             console.print("[dim]Mode: Prioritizing zone balancing over available space[/dim]")
 
-        if self.constraints.source_node:
-            console.print(f"[dim]Filtering: Only showing moves from source node '{self.constraints.source_node}'[/dim]")
+        if constraints.source_node:
+            console.print(f"[dim]Filtering: Only showing moves from source node '{constraints.source_node}'[/dim]")
 
         console.print(
-            f"[dim]Safety thresholds: Max disk usage {self.constraints.max_disk_usage}%, "
-            f"Min free space {self.constraints.min_free_space}GB[/dim]"
+            f"[dim]Safety thresholds: Max disk usage {constraints.max_disk_usage}%, "
+            f"Min free space {constraints.min_free_space}GB[/dim]"
         )
 
         if dry_run:
@@ -60,24 +195,20 @@ def start(
             console.print("[red]EXECUTION MODE - SQL commands will be generated for actual moves[/red]")
         console.print()
 
-        recommendations = self.analyzer.generate_rebalancing_recommendations(constraints=self.constraints)
+        recommendations = self.analyzer.generate_rebalancing_recommendations(constraints=constraints)
 
         if not recommendations:
-            if self.constraints.source_node:
-                console.print(
-                    f"[yellow]No safe recommendations found for node '{self.constraints.source_node}'[/yellow]"
-                )
+            if constraints.source_node:
+                console.print(f"[yellow]No safe recommendations found for node '{constraints.source_node}'[/yellow]")
                 console.print("[dim]This could be due to:[/dim]")
                 console.print("[dim]  • Zone conflicts preventing safe moves[/dim]")
                 console.print(
-                    f"[dim]  • Target nodes exceeding {self.constraints.max_disk_usage}% disk usage threshold[/dim]"
-                )
-                console.print(
-                    f"[dim]  • Insufficient free space on target nodes (need {self.constraints.min_free_space}GB)[/dim]"
+                    f"[dim]  • Target nodes exceeding {constraints.max_disk_usage}% disk usage threshold[/dim]"
                 )
                 console.print(
-                    f"[dim]  • No shards in size range {self.constraints.min_size}-{self.constraints.max_size}GB[/dim]"
+                    f"[dim]  • Insufficient free space on target nodes (need {constraints.min_free_space}GB)[/dim]"
                 )
+                console.print(f"[dim]  • No shards in size range {constraints.min_size}-{constraints.max_size}GB[/dim]")
                 console.print("[dim]Suggestions:[/dim]")
                 console.print("[dim]  • Try: --max-disk-usage 95 (allow higher disk usage)[/dim]")
                 console.print("[dim]  • Try: --min-free-space 50 (reduce space requirements)[/dim]")
@@ -121,7 +252,7 @@ def start(
 
             if validate:
                 is_safe, safety_msg = self.analyzer.validate_move_safety(
-                    rec, max_disk_usage_percent=self.constraints.max_disk_usage
+                    rec, max_disk_usage_percent=constraints.max_disk_usage
                 )
                 safety_status = "[green]✓ SAFE[/green]" if is_safe else f"[red]✗ {safety_msg}[/red]"
                 row.append(safety_status)
@@ -145,7 +276,7 @@ def start(
             for i, rec in enumerate(recommendations, 1):
                 if validate:
                     is_safe, safety_msg = self.analyzer.validate_move_safety(
-                        rec, max_disk_usage_percent=self.constraints.max_disk_usage
+                        rec, max_disk_usage_percent=constraints.max_disk_usage
                     )
                     if not is_safe:
                         if "zone conflict" in safety_msg.lower():
@@ -189,7 +320,7 @@ def start(
             for i, rec in enumerate(recommendations, 1):
                 if validate:
                     is_safe, safety_msg = self.analyzer.validate_move_safety(
-                        rec, max_disk_usage_percent=self.constraints.max_disk_usage
+                        rec, max_disk_usage_percent=constraints.max_disk_usage
                     )
                     if not is_safe:
                         if "Zone conflict" in safety_msg:

From eb7aa7ddfb9146caf35f9f357e7f6fe4e3ded637 Mon Sep 17 00:00:00 2001
From: Andreas Motl <andreas.motl@crate.io>
Date: Thu, 21 Aug 2025 02:45:26 +0200
Subject: [PATCH 14/18] Admin/XMover: Naming things. s/recover/monitor/

---
 cratedb_toolkit/admin/xmover/cli.py                             | 2 +-
 .../admin/xmover/operational/{recover.py => monitor.py}         | 0
 cratedb_toolkit/admin/xmover/operational/recommend.py           | 2 +-
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename cratedb_toolkit/admin/xmover/operational/{recover.py => monitor.py} (100%)

diff --git a/cratedb_toolkit/admin/xmover/cli.py b/cratedb_toolkit/admin/xmover/cli.py
index f77259b6..339f9e7f 100644
--- a/cratedb_toolkit/admin/xmover/cli.py
+++ b/cratedb_toolkit/admin/xmover/cli.py
@@ -18,8 +18,8 @@
     SizeCriteria,
 )
 from cratedb_toolkit.admin.xmover.operational.candidates import CandidateFinder
+from cratedb_toolkit.admin.xmover.operational.monitor import RecoveryMonitor, RecoveryOptions
 from cratedb_toolkit.admin.xmover.operational.recommend import ShardRelocationRecommender
-from cratedb_toolkit.admin.xmover.operational.recover import RecoveryMonitor, RecoveryOptions
 from cratedb_toolkit.admin.xmover.util.database import CrateDBClient
 from cratedb_toolkit.admin.xmover.util.error import explain_cratedb_error
 
diff --git a/cratedb_toolkit/admin/xmover/operational/recover.py b/cratedb_toolkit/admin/xmover/operational/monitor.py
similarity index 100%
rename from cratedb_toolkit/admin/xmover/operational/recover.py
rename to cratedb_toolkit/admin/xmover/operational/monitor.py
diff --git a/cratedb_toolkit/admin/xmover/operational/recommend.py b/cratedb_toolkit/admin/xmover/operational/recommend.py
index 8eb37b13..ab5156e6 100644
--- a/cratedb_toolkit/admin/xmover/operational/recommend.py
+++ b/cratedb_toolkit/admin/xmover/operational/recommend.py
@@ -11,7 +11,7 @@
     ShardRelocationRequest,
     ShardRelocationResponse,
 )
-from cratedb_toolkit.admin.xmover.operational.recover import RecoveryMonitor, RecoveryOptions
+from cratedb_toolkit.admin.xmover.operational.monitor import RecoveryMonitor, RecoveryOptions
 from cratedb_toolkit.admin.xmover.util.database import CrateDBClient
 from cratedb_toolkit.admin.xmover.util.format import format_size
 

From 46e9c00e7fe92c515e2516a1e9ae282ea38f065d Mon Sep 17 00:00:00 2001
From: Walter Behmann <walter@crate.io>
Date: Thu, 21 Aug 2025 13:55:25 +0200
Subject: [PATCH 15/18] Admin/XMover: Suppress SSL warnings when SSL
 verification is disabled

---
 cratedb_toolkit/admin/xmover/util/database.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/cratedb_toolkit/admin/xmover/util/database.py b/cratedb_toolkit/admin/xmover/util/database.py
index 1cb16bb1..21950ab0 100644
--- a/cratedb_toolkit/admin/xmover/util/database.py
+++ b/cratedb_toolkit/admin/xmover/util/database.py
@@ -7,6 +7,7 @@
 from typing import Any, Dict, List, Optional, Union
 
 import requests
+import urllib3
 from dotenv import load_dotenv
 
 from cratedb_toolkit.admin.xmover.model import NodeInfo, RecoveryInfo, ShardInfo
@@ -30,6 +31,10 @@ def __init__(self, connection_string: Optional[str] = None):
         self.password = os.getenv("CRATE_PASSWORD")
         self.ssl_verify = os.getenv("CRATE_SSL_VERIFY", "true").lower() == "true"
 
+        # Suppress SSL warnings when SSL verification is disabled
+        if not self.ssl_verify:
+            urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
         # Ensure connection string ends with _sql endpoint
         if not self.connection_string.endswith("/_sql"):
             self.connection_string = self.connection_string.rstrip("/") + "/_sql"

From 3704c96d5263f4033a50ce1275ceaa28b1fe08c8 Mon Sep 17 00:00:00 2001
From: Walter Behmann <walter@crate.io>
Date: Thu, 21 Aug 2025 14:02:46 +0200
Subject: [PATCH 16/18] Admin/XMover: Add shard distribution analysis for
 (large) tables

---
 .../admin/xmover/analysis/table.py            | 783 ++++++++++++++++++
 cratedb_toolkit/admin/xmover/cli.py           |  64 ++
 doc/admin/xmover/handbook.md                  |  13 +
 doc/admin/xmover/index.md                     |   1 +
 doc/admin/xmover/queries.md                   |  31 +
 tests/admin/test_cli.py                       |   1 +
 6 files changed, 893 insertions(+)
 create mode 100644 cratedb_toolkit/admin/xmover/analysis/table.py

diff --git a/cratedb_toolkit/admin/xmover/analysis/table.py b/cratedb_toolkit/admin/xmover/analysis/table.py
new file mode 100644
index 00000000..b8f1a7ce
--- /dev/null
+++ b/cratedb_toolkit/admin/xmover/analysis/table.py
@@ -0,0 +1,783 @@
+"""
+Shard Distribution Analysis for CrateDB Clusters
+
+This module analyzes shard distribution across nodes to detect imbalances
+and provide recommendations for optimization.
+"""
+
+import statistics
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional, Tuple
+
+from rich import print as rprint
+from rich.console import Console
+from rich.table import Table
+
+from cratedb_toolkit.admin.xmover.util.database import CrateDBClient
+
+
+def format_storage_size(size_gb: float) -> str:
+    """Format storage size with appropriate units and spacing"""
+    if size_gb < 0.001:
+        return "0 B"
+    elif size_gb < 1.0:
+        size_mb = size_gb * 1024
+        return f"{size_mb:.0f} MB"
+    elif size_gb < 1024:
+        return f"{size_gb:.1f} GB"
+    else:
+        size_tb = size_gb / 1024
+        return f"{size_tb:.2f} TB"
+
+
+@dataclass
+class TableDistribution:
+    """Represents shard distribution for a single table"""
+
+    schema_name: str
+    table_name: str
+    total_primary_size_gb: float
+    node_distributions: Dict[str, Dict[str, Any]]  # node_name -> metrics
+
+    @property
+    def full_table_name(self) -> str:
+        return f"{self.schema_name}.{self.table_name}" if self.schema_name != "doc" else self.table_name
+
+
+@dataclass
+class DistributionAnomaly:
+    """Represents a detected distribution anomaly"""
+
+    table: TableDistribution
+    anomaly_type: str
+    severity_score: float
+    impact_score: float
+    combined_score: float
+    description: str
+    details: Dict[str, Any]
+    recommendations: List[str]
+
+
+class DistributionAnalyzer:
+    """Analyzes shard distribution across cluster nodes"""
+
+    def __init__(self, client: CrateDBClient):
+        self.client = client
+        self.console = Console()
+
+    def find_table_by_name(self, table_name: str) -> Optional[str]:
+        """Find table by name and resolve schema ambiguity"""
+
+        query = """
+                SELECT DISTINCT schema_name, table_name
+                FROM sys.shards
+                WHERE table_name = ?
+                  AND schema_name NOT IN ('sys', 'information_schema', 'pg_catalog')
+                  AND routing_state = 'STARTED'
+                ORDER BY schema_name \
+                """
+
+        result = self.client.execute_query(query, [table_name])
+        rows = result.get("rows", [])
+
+        if not rows:
+            return None
+        elif len(rows) == 1:
+            schema, table = rows[0]
+            return f"{schema}.{table}" if schema != "doc" else table
+        else:
+            # Multiple schemas have this table - ask user
+            rprint(f"[yellow]Multiple schemas contain table '{table_name}':[/yellow]")
+            for i, (schema, table) in enumerate(rows, 1):
+                full_name = f"{schema}.{table}" if schema != "doc" else table
+                rprint(f"  {i}. {full_name}")
+
+            try:
+                choice = input("\nSelect table (enter number): ").strip()
+                idx = int(choice) - 1
+                if 0 <= idx < len(rows):
+                    schema, table = rows[idx]
+                    return f"{schema}.{table}" if schema != "doc" else table
+                else:
+                    rprint("[red]Invalid selection[/red]")
+                    return None
+            except (ValueError, KeyboardInterrupt):
+                rprint("\n[yellow]Selection cancelled[/yellow]")
+                return None
+
+    def get_table_distribution_detailed(self, table_identifier: str) -> Optional[TableDistribution]:
+        """Get detailed distribution data for a specific table"""
+
+        # Parse schema and table name
+        if "." in table_identifier:
+            schema_name, table_name = table_identifier.split(".", 1)
+        else:
+            schema_name = "doc"
+            table_name = table_identifier
+
+        query = """
+                SELECT s.schema_name, \
+                       s.table_name, \
+                       s.node['name']                                                                                 as node_name, \
+                       COUNT(CASE WHEN s."primary" = true THEN 1 END)                                                 as primary_shards, \
+                       COUNT(CASE WHEN s."primary" = false THEN 1 END)                                                as replica_shards, \
+                       COUNT(*)                                                                                       as total_shards, \
+                       ROUND(SUM(s.size) / 1024.0 / 1024.0 / 1024.0, 2)                                               as total_size_gb, \
+                       ROUND(SUM(CASE WHEN s."primary" = true THEN s.size ELSE 0 END) / 1024.0 / 1024.0 / 1024.0, \
+                             2)                                                                                       as primary_size_gb, \
+                       ROUND(SUM(CASE WHEN s."primary" = false THEN s.size ELSE 0 END) / 1024.0 / 1024.0 / 1024.0, \
+                             2)                                                                                       as replica_size_gb, \
+                       SUM(s.num_docs)                                                                                as total_documents
+                FROM sys.shards s
+                WHERE s.schema_name = ? \
+                  AND s.table_name = ?
+                  AND s.routing_state = 'STARTED'
+                GROUP BY s.schema_name, s.table_name, s.node['name']
+                ORDER BY s.node['name'] \
+                """
+
+        result = self.client.execute_query(query, [schema_name, table_name])
+        rows = result.get("rows", [])
+
+        if not rows:
+            return None
+
+        # Build node distributions
+        node_distributions = {}
+        for row in rows:
+            node_distributions[row[2]] = {
+                "primary_shards": row[3],
+                "replica_shards": row[4],
+                "total_shards": row[5],
+                "total_size_gb": row[6],
+                "primary_size_gb": row[7],
+                "replica_size_gb": row[8],
+                "total_documents": row[9],
+            }
+
+        # Calculate total primary size
+        total_primary_size = sum(node["primary_size_gb"] for node in node_distributions.values())
+
+        return TableDistribution(
+            schema_name=rows[0][0],
+            table_name=rows[0][1],
+            total_primary_size_gb=total_primary_size,
+            node_distributions=node_distributions,
+        )
+
+    def format_table_health_report(self, table_dist: TableDistribution) -> None:
+        """Format and display comprehensive table health report"""
+
+        rprint(f"\n[bold blue]📋 Table Health Report: {table_dist.full_table_name}[/bold blue]")
+        rprint("=" * 80)
+
+        # Calculate overview stats
+        all_nodes_info = self.client.get_nodes_info()
+        cluster_nodes = {node.name for node in all_nodes_info if node.name}
+        table_nodes = set(table_dist.node_distributions.keys())
+        missing_nodes = cluster_nodes - table_nodes
+
+        total_shards = sum(node["total_shards"] for node in table_dist.node_distributions.values())
+        total_primary_shards = sum(node["primary_shards"] for node in table_dist.node_distributions.values())
+        total_replica_shards = sum(node["replica_shards"] for node in table_dist.node_distributions.values())
+        total_size_gb = sum(node["total_size_gb"] for node in table_dist.node_distributions.values())
+        total_documents = sum(node["total_documents"] for node in table_dist.node_distributions.values())
+
+        # Table Overview
+        rprint("\n[bold]🎯 Overview[/bold]")
+        rprint(f"• Primary Data Size: {format_storage_size(table_dist.total_primary_size_gb)}")
+        rprint(f"• Total Size (with replicas): {format_storage_size(total_size_gb)}")
+        rprint(f"• Total Shards: {total_shards} ({total_primary_shards} primary + {total_replica_shards} replica)")
+        rprint(f"• Total Documents: {total_documents:,}")
+        rprint(
+            f"• Node Coverage: {len(table_nodes)}/{len(cluster_nodes)} nodes ({len(table_nodes) / len(cluster_nodes) * 100:.0f}%)"
+        )
+
+        if missing_nodes:
+            rprint(f"• [yellow]Missing from nodes: {', '.join(sorted(missing_nodes))}[/yellow]")
+
+        # Shard Distribution Table
+        rprint("\n[bold]📊 Shard Distribution by Node[/bold]")
+
+        shard_table = Table(show_header=True)
+        shard_table.add_column("Node", width=15)
+        shard_table.add_column("Primary", width=8, justify="right")
+        shard_table.add_column("Replica", width=8, justify="right")
+        shard_table.add_column("Total", width=8, justify="right")
+        shard_table.add_column("Primary Size", width=12, justify="right")
+        shard_table.add_column("Replica Size", width=12, justify="right")
+        shard_table.add_column("Total Size", width=12, justify="right")
+        shard_table.add_column("Documents", width=12, justify="right")
+
+        for node_name in sorted(table_dist.node_distributions.keys()):
+            node_data = table_dist.node_distributions[node_name]
+
+            # Color coding based on shard count compared to average
+            avg_total_shards = total_shards / len(table_dist.node_distributions)
+            if node_data["total_shards"] > avg_total_shards * 1.5:
+                node_color = "red"
+            elif node_data["total_shards"] < avg_total_shards * 0.5:
+                node_color = "yellow"
+            else:
+                node_color = "white"
+
+            shard_table.add_row(
+                f"[{node_color}]{node_name}[/{node_color}]",
+                str(node_data["primary_shards"]),
+                str(node_data["replica_shards"]),
+                f"[{node_color}]{node_data['total_shards']}[/{node_color}]",
+                format_storage_size(node_data["primary_size_gb"]),
+                format_storage_size(node_data["replica_size_gb"]),
+                f"[{node_color}]{format_storage_size(node_data['total_size_gb'])}[/{node_color}]",
+                f"{node_data['total_documents']:,}",
+            )
+
+        self.console.print(shard_table)
+
+        # Distribution Analysis
+        rprint("\n[bold]🔍 Distribution Analysis[/bold]")
+
+        # Calculate statistics
+        shard_counts = [node["total_shards"] for node in table_dist.node_distributions.values()]
+        storage_sizes = [node["total_size_gb"] for node in table_dist.node_distributions.values()]
+        doc_counts = [node["total_documents"] for node in table_dist.node_distributions.values()]
+
+        shard_cv = self.calculate_coefficient_of_variation(shard_counts)
+        storage_cv = self.calculate_coefficient_of_variation(storage_sizes)
+        doc_cv = self.calculate_coefficient_of_variation(doc_counts)
+
+        min_shards, max_shards = min(shard_counts), max(shard_counts)
+        min_storage, max_storage = min(storage_sizes), max(storage_sizes)
+        min_docs, max_docs = min(doc_counts), max(doc_counts)
+
+        # Shard distribution analysis
+        if shard_cv > 0.3:
+            rprint(
+                f"• [red]⚠  Shard Imbalance:[/red] Range {min_shards}-{max_shards} shards per node (CV: {shard_cv:.2f})"
+            )
+        else:
+            rprint(f"• [green]✓ Shard Balance:[/green] Well distributed (CV: {shard_cv:.2f})")
+
+        # Storage distribution analysis
+        if storage_cv > 0.4:
+            rprint(
+                f"• [red]⚠  Storage Imbalance:[/red] Range {format_storage_size(min_storage)}-{format_storage_size(max_storage)} per node (CV: {storage_cv:.2f})"
+            )
+        else:
+            rprint(f"• [green]✓ Storage Balance:[/green] Well distributed (CV: {storage_cv:.2f})")
+
+        # Document distribution analysis
+        if doc_cv > 0.5:
+            rprint(f"• [red]⚠  Document Skew:[/red] Range {min_docs:,}-{max_docs:,} docs per node (CV: {doc_cv:.2f})")
+        else:
+            rprint(f"• [green]✓ Document Distribution:[/green] Well balanced (CV: {doc_cv:.2f})")
+
+        # Node coverage analysis
+        coverage_ratio = len(table_nodes) / len(cluster_nodes)
+        if coverage_ratio < 0.7:
+            missing_list = ", ".join(sorted(missing_nodes)[:5])  # Show up to 5 nodes
+            if len(missing_nodes) > 5:
+                missing_list += f", +{len(missing_nodes) - 5} more"
+            rprint(f"• [red]⚠  Limited Coverage:[/red] {coverage_ratio:.0%} cluster coverage, missing: {missing_list}")
+        else:
+            rprint(f"• [green]✓ Good Coverage:[/green] {coverage_ratio:.0%} of cluster nodes have this table")
+
+        # Zone analysis if available
+        try:
+            zone_distribution = {}
+            for node_name, node_data in table_dist.node_distributions.items():
+                # Try to get zone info for each node
+                node_info = next((n for n in all_nodes_info if n.name == node_name), None)
+                if (
+                    node_info
+                    and hasattr(node_info, "attributes")
+                    and node_info.attributes
+                    and "zone" in node_info.attributes
+                ):
+                    zone = node_info.attributes["zone"]
+                    if zone not in zone_distribution:
+                        zone_distribution[zone] = {"nodes": 0, "shards": 0, "size": 0}
+                    zone_distribution[zone]["nodes"] += 1
+                    zone_distribution[zone]["shards"] += node_data["total_shards"]
+                    zone_distribution[zone]["size"] += node_data["total_size_gb"]
+
+            if zone_distribution:
+                rprint("\n[bold]🌍 Zone Distribution[/bold]")
+                for zone in sorted(zone_distribution.keys()):
+                    zone_data = zone_distribution[zone]
+                    rprint(
+                        f"• {zone}: {zone_data['nodes']} nodes, {zone_data['shards']} shards, {format_storage_size(zone_data['size'])}"
+                    )
+
+        except Exception:
+            pass  # Zone info not available
+
+        # Health Summary
+        rprint("\n[bold]💊 Health Summary[/bold]")
+        issues = []
+        recommendations = []
+
+        if shard_cv > 0.3:
+            issues.append("Shard imbalance")
+            recommendations.append("Consider moving shards between nodes for better distribution")
+
+        if storage_cv > 0.4:
+            issues.append("Storage imbalance")
+            recommendations.append("Rebalance shards to distribute storage more evenly")
+
+        if doc_cv > 0.5:
+            issues.append("Document skew")
+            recommendations.append("Review routing configuration - data may not be evenly distributed")
+
+        if coverage_ratio < 0.7:
+            issues.append("Limited node coverage")
+            recommendations.append("Consider adding replicas to improve availability and distribution")
+
+        if not issues:
+            rprint("• [green]✅ Table appears healthy with good distribution[/green]")
+        else:
+            rprint(f"• [yellow]⚠  Issues found: {', '.join(issues)}[/yellow]")
+            rprint("\n[bold]💡 Recommendations:[/bold]")
+            for rec in recommendations:
+                rprint(f"  • {rec}")
+
+        rprint()
+
+    def get_largest_tables_distribution(self, top_n: int = 10) -> List[TableDistribution]:
+        """Get distribution data for the largest tables using BIGDUDES query"""
+
+        query = """
+                WITH largest_tables AS (SELECT schema_name, \
+                                               table_name, \
+                                               SUM(CASE WHEN "primary" = true THEN size ELSE 0 END) as total_primary_size \
+                                        FROM sys.shards \
+                                        WHERE schema_name NOT IN ('sys', 'information_schema', 'pg_catalog') \
+                                          AND routing_state = 'STARTED' \
+                                        GROUP BY schema_name, table_name \
+                                        ORDER BY total_primary_size DESC
+                    LIMIT ?
+                    )
+                SELECT s.schema_name, \
+                       s.table_name, \
+                       s.node['name']                                                                                 as node_name, \
+                       COUNT(CASE WHEN s."primary" = true THEN 1 END)                                                 as primary_shards, \
+                       COUNT(CASE WHEN s."primary" = false THEN 1 END)                                                as replica_shards, \
+                       COUNT(*)                                                                                       as total_shards, \
+                       ROUND(SUM(s.size) / 1024.0 / 1024.0 / 1024.0, 2)                                               as total_size_gb, \
+                       ROUND(SUM(CASE WHEN s."primary" = true THEN s.size ELSE 0 END) / 1024.0 / 1024.0 / 1024.0, \
+                             2)                                                                                       as primary_size_gb, \
+                       ROUND(SUM(CASE WHEN s."primary" = false THEN s.size ELSE 0 END) / 1024.0 / 1024.0 / 1024.0, \
+                             2)                                                                                       as replica_size_gb, \
+                       SUM(s.num_docs)                                                                                as total_documents
+                FROM sys.shards s
+                         INNER JOIN largest_tables lt \
+                                    ON (s.schema_name = lt.schema_name AND s.table_name = lt.table_name)
+                WHERE s.routing_state = 'STARTED'
+                GROUP BY s.schema_name, s.table_name, s.node['name']
+                ORDER BY s.schema_name, s.table_name, s.node['name'] \
+                """
+
+        result = self.client.execute_query(query, [top_n])
+
+        # Extract rows from the result dictionary
+        rows = result.get("rows", [])
+
+        if not rows:
+            return []
+
+        # Group results by table
+        tables_data = {}
+        for row in rows:
+            # Ensure we have enough columns
+            if len(row) < 10:
+                continue
+
+            table_key = f"{row[0]}.{row[1]}"
+            if table_key not in tables_data:
+                tables_data[table_key] = {"schema_name": row[0], "table_name": row[1], "nodes": {}}
+
+            tables_data[table_key]["nodes"][row[2]] = {
+                "primary_shards": row[3],
+                "replica_shards": row[4],
+                "total_shards": row[5],
+                "total_size_gb": row[6],
+                "primary_size_gb": row[7],
+                "replica_size_gb": row[8],
+                "total_documents": row[9],
+            }
+
+        # Calculate total primary sizes and create TableDistribution objects
+        distributions = []
+        for table_data in tables_data.values():
+            total_primary_size = sum(node["primary_size_gb"] for node in table_data["nodes"].values())
+
+            distribution = TableDistribution(
+                schema_name=table_data["schema_name"],
+                table_name=table_data["table_name"],
+                total_primary_size_gb=total_primary_size,
+                node_distributions=table_data["nodes"],
+            )
+            distributions.append(distribution)
+
+        # Sort by primary size (descending)
+        return sorted(distributions, key=lambda x: x.total_primary_size_gb, reverse=True)
+
+    def calculate_coefficient_of_variation(self, values: List[float]) -> float:
+        """Calculate coefficient of variation (std dev / mean)"""
+        if not values or len(values) < 2:
+            return 0.0
+
+        mean_val = statistics.mean(values)
+        if mean_val == 0:
+            return 0.0
+
+        try:
+            std_dev = statistics.stdev(values)
+            return std_dev / mean_val
+        except statistics.StatisticsError:
+            return 0.0
+
+    def detect_shard_count_imbalance(self, table: TableDistribution) -> Optional[DistributionAnomaly]:
+        """Detect imbalances in shard count distribution"""
+        if not table.node_distributions:
+            return None
+
+        # Get shard counts per node
+        total_shards = [node["total_shards"] for node in table.node_distributions.values()]
+        primary_shards = [node["primary_shards"] for node in table.node_distributions.values()]
+        replica_shards = [node["replica_shards"] for node in table.node_distributions.values()]
+
+        # Calculate coefficient of variation
+        total_cv = self.calculate_coefficient_of_variation(total_shards)
+        primary_cv = self.calculate_coefficient_of_variation(primary_shards)
+        replica_cv = self.calculate_coefficient_of_variation(replica_shards)
+
+        # Severity based on highest CV (higher CV = more imbalanced)
+        max_cv = max(total_cv, primary_cv, replica_cv)
+
+        # Consider it an anomaly if CV > 0.3 (30% variation)
+        if max_cv < 0.3:
+            return None
+
+        # Impact based on table size
+        impact_score = min(table.total_primary_size_gb / 100.0, 10.0)  # Cap at 10
+        severity_score = min(max_cv * 10, 10.0)  # Scale to 0-10
+        combined_score = impact_score * severity_score
+
+        # Generate recommendations
+        recommendations = []
+        min_shards = min(total_shards)
+        max_shards = max(total_shards)
+
+        if max_shards - min_shards > 1:
+            overloaded_nodes = [
+                node for node, data in table.node_distributions.items() if data["total_shards"] == max_shards
+            ]
+            underloaded_nodes = [
+                node for node, data in table.node_distributions.items() if data["total_shards"] == min_shards
+            ]
+
+            if overloaded_nodes and underloaded_nodes:
+                recommendations.append(f"Move shards from {overloaded_nodes[0]} to {underloaded_nodes[0]}")
+
+        return DistributionAnomaly(
+            table=table,
+            anomaly_type="Shard Count Imbalance",
+            severity_score=severity_score,
+            impact_score=impact_score,
+            combined_score=combined_score,
+            description=f"Uneven shard distribution (CV: {max_cv:.2f})",
+            details={
+                "total_cv": total_cv,
+                "primary_cv": primary_cv,
+                "replica_cv": replica_cv,
+                "shard_counts": {node: data["total_shards"] for node, data in table.node_distributions.items()},
+            },
+            recommendations=recommendations,
+        )
+
+    def detect_storage_imbalance(self, table: TableDistribution) -> Optional[DistributionAnomaly]:
+        """Detect imbalances in storage distribution"""
+        if not table.node_distributions:
+            return None
+
+        storage_sizes = [node["total_size_gb"] for node in table.node_distributions.values()]
+
+        # Skip if all sizes are very small (< 1GB total)
+        if sum(storage_sizes) < 1.0:
+            return None
+
+        cv = self.calculate_coefficient_of_variation(storage_sizes)
+
+        # Consider it an anomaly if CV > 0.4 (40% variation) for storage
+        if cv < 0.4:
+            return None
+
+        impact_score = min(table.total_primary_size_gb / 50.0, 10.0)
+        severity_score = min(cv * 8, 10.0)
+        combined_score = impact_score * severity_score
+
+        # Generate recommendations
+        recommendations = []
+        min_size = min(storage_sizes)
+        max_size = max(storage_sizes)
+
+        if max_size > min_size * 2:  # If difference is > 2x
+            overloaded_node = None
+            underloaded_node = None
+
+            for node, data in table.node_distributions.items():
+                if data["total_size_gb"] == max_size:
+                    overloaded_node = node
+                elif data["total_size_gb"] == min_size:
+                    underloaded_node = node
+
+            if overloaded_node and underloaded_node:
+                recommendations.append(
+                    f"Rebalance storage from {overloaded_node} ({format_storage_size(max_size)}) to {underloaded_node} ({format_storage_size(min_size)})"
+                )
+
+        return DistributionAnomaly(
+            table=table,
+            anomaly_type="Storage Imbalance",
+            severity_score=severity_score,
+            impact_score=impact_score,
+            combined_score=combined_score,
+            description=f"Uneven storage distribution (CV: {cv:.2f})",
+            details={
+                "storage_cv": cv,
+                "storage_sizes": {node: data["total_size_gb"] for node, data in table.node_distributions.items()},
+            },
+            recommendations=recommendations,
+        )
+
+    def detect_node_coverage_issues(self, table: TableDistribution) -> Optional[DistributionAnomaly]:
+        """Detect nodes with missing shard coverage"""
+        if not table.node_distributions:
+            return None
+
+        # Get all cluster nodes
+        all_nodes = set()
+        try:
+            nodes_info = self.client.get_nodes_info()
+            all_nodes = {node.name for node in nodes_info if node.name}
+        except Exception:
+            # If we can't get node info, use nodes that have shards
+            all_nodes = set(table.node_distributions.keys())
+
+        nodes_with_shards = set(table.node_distributions.keys())
+        nodes_without_shards = all_nodes - nodes_with_shards
+
+        # Only flag as anomaly if we have missing nodes and the table is significant
+        if not nodes_without_shards or table.total_primary_size_gb < 10.0:
+            return None
+
+        coverage_ratio = len(nodes_with_shards) / len(all_nodes)
+
+        # Consider it an anomaly if coverage < 70%
+        if coverage_ratio >= 0.7:
+            return None
+
+        impact_score = min(table.total_primary_size_gb / 100.0, 10.0)
+        severity_score = (1 - coverage_ratio) * 10  # Higher severity for lower coverage
+        combined_score = impact_score * severity_score
+
+        recommendations = [f"Consider adding replicas to nodes: {', '.join(sorted(nodes_without_shards))}"]
+
+        return DistributionAnomaly(
+            table=table,
+            anomaly_type="Node Coverage Issue",
+            severity_score=severity_score,
+            impact_score=impact_score,
+            combined_score=combined_score,
+            description=f"Limited node coverage ({len(nodes_with_shards)}/{len(all_nodes)} nodes)",
+            details={
+                "coverage_ratio": coverage_ratio,
+                "nodes_with_shards": sorted(nodes_with_shards),
+                "nodes_without_shards": sorted(nodes_without_shards),
+            },
+            recommendations=recommendations,
+        )
+
+    def detect_document_imbalance(self, table: TableDistribution) -> Optional[DistributionAnomaly]:
+        """Detect imbalances in document distribution"""
+        if not table.node_distributions:
+            return None
+
+        document_counts = [node["total_documents"] for node in table.node_distributions.values()]
+
+        # Skip if total documents is very low
+        if sum(document_counts) < 10000:
+            return None
+
+        cv = self.calculate_coefficient_of_variation(document_counts)
+
+        # Consider it an anomaly if CV > 0.5 (50% variation) for documents
+        if cv < 0.5:
+            return None
+
+        impact_score = min(table.total_primary_size_gb / 100.0, 10.0)
+        severity_score = min(cv * 6, 10.0)
+        combined_score = impact_score * severity_score
+
+        # Generate recommendations
+        recommendations = ["Document imbalance may indicate data skew - consider reviewing shard routing"]
+
+        min_docs = min(document_counts)
+        max_docs = max(document_counts)
+
+        if max_docs > min_docs * 3:  # If difference is > 3x
+            recommendations.append(f"Significant document skew detected ({min_docs:,} to {max_docs:,} docs per node)")
+
+        return DistributionAnomaly(
+            table=table,
+            anomaly_type="Document Imbalance",
+            severity_score=severity_score,
+            impact_score=impact_score,
+            combined_score=combined_score,
+            description=f"Uneven document distribution (CV: {cv:.2f})",
+            details={
+                "document_cv": cv,
+                "document_counts": {node: data["total_documents"] for node, data in table.node_distributions.items()},
+            },
+            recommendations=recommendations,
+        )
+
+    def analyze_distribution(self, top_tables: int = 10) -> List[DistributionAnomaly]:
+        """Analyze shard distribution and return ranked anomalies"""
+
+        # Get table distributions
+        distributions = self.get_largest_tables_distribution(top_tables)
+
+        # Detect all anomalies
+        anomalies = []
+
+        for table_dist in distributions:
+            # Check each type of anomaly
+            for detector in [
+                self.detect_shard_count_imbalance,
+                self.detect_storage_imbalance,
+                self.detect_node_coverage_issues,
+                self.detect_document_imbalance,
+            ]:
+                anomaly = detector(table_dist)
+                if anomaly:
+                    anomalies.append(anomaly)
+
+        # Sort by combined score (highest first)
+        return sorted(anomalies, key=lambda x: x.combined_score, reverse=True), len(distributions)
+
+    def format_distribution_report(self, anomalies: List[DistributionAnomaly], tables_analyzed: int) -> None:
+        """Format and display the distribution analysis report"""
+
+        if not anomalies:
+            rprint(
+                f"[green]✓ No significant shard distribution anomalies detected in top {tables_analyzed} tables![/green]"
+            )
+            return
+
+        # Show analysis scope
+        unique_tables = set(anomaly.table.full_table_name for anomaly in anomalies)
+        rprint(
+            f"[blue]📋 Analyzed {tables_analyzed} largest tables, found issues in {len(unique_tables)} tables[/blue]"
+        )
+        rprint()
+
+        # Summary table
+        table = Table(title="🎯 Shard Distribution Anomalies", show_header=True)
+        table.add_column("Rank", width=4)
+        table.add_column("Table", min_width=20)
+        table.add_column("Issue Type", min_width=15)
+        table.add_column("Score", width=8)
+        table.add_column("Primary Size", width=12)
+        table.add_column("Description", min_width=25)
+
+        for i, anomaly in enumerate(anomalies[:10], 1):  # Top 10
+            # Color coding by severity
+            if anomaly.combined_score >= 50:
+                rank_color = "red"
+            elif anomaly.combined_score >= 25:
+                rank_color = "yellow"
+            else:
+                rank_color = "blue"
+
+            table.add_row(
+                f"[{rank_color}]{i}[/{rank_color}]",
+                anomaly.table.full_table_name,
+                anomaly.anomaly_type,
+                f"[{rank_color}]{anomaly.combined_score:.1f}[/{rank_color}]",
+                format_storage_size(anomaly.table.total_primary_size_gb),
+                anomaly.description,
+            )
+
+        self.console.print(table)
+
+        # Detailed recommendations for top issues
+        if anomalies:
+            rprint("\n[bold]🔧 Top Recommendations:[/bold]")
+
+            for i, anomaly in enumerate(anomalies[:5], 1):  # Top 5 recommendations
+                rprint(f"\n[bold]{i}. {anomaly.table.full_table_name}[/bold] - {anomaly.anomaly_type}")
+
+                # Show the problem analysis first
+                rprint(f"   [yellow]🔍 Problem:[/yellow] {anomaly.description}")
+
+                # Add specific details about what's wrong
+                if anomaly.anomaly_type == "Shard Count Imbalance":
+                    if "shard_counts" in anomaly.details:
+                        counts = anomaly.details["shard_counts"]
+                        min_count = min(counts.values())
+                        max_count = max(counts.values())
+                        overloaded = [node for node, count in counts.items() if count == max_count]
+                        underloaded = [node for node, count in counts.items() if count == min_count]
+                        rprint(
+                            f"   [red]⚠  Issue:[/red] {overloaded[0]} has {max_count} shards while {underloaded[0]} has only {min_count} shards"
+                        )
+
+                elif anomaly.anomaly_type == "Storage Imbalance":
+                    if "storage_sizes" in anomaly.details:
+                        sizes = anomaly.details["storage_sizes"]
+                        min_size = min(sizes.values())
+                        max_size = max(sizes.values())
+                        overloaded = [node for node, size in sizes.items() if size == max_size][0]
+                        underloaded = [node for node, size in sizes.items() if size == min_size][0]
+                        rprint(
+                            f"   [red]⚠  Issue:[/red] Storage ranges from {format_storage_size(min_size)} ({underloaded}) to {format_storage_size(max_size)} ({overloaded}) - {max_size / min_size:.1f}x difference"
+                        )
+
+                elif anomaly.anomaly_type == "Node Coverage Issue":
+                    if "nodes_without_shards" in anomaly.details:
+                        missing_nodes = anomaly.details["nodes_without_shards"]
+                        coverage_ratio = anomaly.details["coverage_ratio"]
+                        rprint(
+                            f"   [red]⚠  Issue:[/red] Table missing from {len(missing_nodes)} nodes ({coverage_ratio:.0%} cluster coverage)"
+                        )
+                        rprint(
+                            f"   [dim]   Missing from: {', '.join(missing_nodes[:3])}{'...' if len(missing_nodes) > 3 else ''}[/dim]"
+                        )
+
+                elif anomaly.anomaly_type == "Document Imbalance":
+                    if "document_counts" in anomaly.details:
+                        doc_counts = anomaly.details["document_counts"]
+                        min_docs = min(doc_counts.values())
+                        max_docs = max(doc_counts.values())
+                        ratio = max_docs / min_docs if min_docs > 0 else float("inf")
+                        rprint(
+                            f"   [red]⚠  Issue:[/red] Document counts range from {min_docs:,} to {max_docs:,} ({ratio:.1f}x difference)"
+                        )
+
+                # Show recommendations
+                rprint("   [green]💡 Solutions:[/green]")
+                for rec in anomaly.recommendations:
+                    rprint(f"     • {rec}")
+
+        # Summary statistics
+        unique_tables = set(anomaly.table.full_table_name for anomaly in anomalies)
+        rprint("\n[dim]📊 Analysis Summary:[/dim]")
+        rprint(f"[dim]• Tables analyzed: {tables_analyzed}[/dim]")
+        rprint(f"[dim]• Tables with issues: {len(unique_tables)}[/dim]")
+        rprint(f"[dim]• Total anomalies found: {len(anomalies)}[/dim]")
+        rprint(f"[dim]• Critical issues (score >50): {len([a for a in anomalies if a.combined_score >= 50])}[/dim]")
+        rprint(
+            f"[dim]• Warning issues (score 25-50): {len([a for a in anomalies if 25 <= a.combined_score < 50])}[/dim]"
+        )
diff --git a/cratedb_toolkit/admin/xmover/cli.py b/cratedb_toolkit/admin/xmover/cli.py
index 339f9e7f..e5e6e834 100644
--- a/cratedb_toolkit/admin/xmover/cli.py
+++ b/cratedb_toolkit/admin/xmover/cli.py
@@ -11,6 +11,7 @@
 from rich.console import Console
 
 from cratedb_toolkit.admin.xmover.analysis.shard import ShardAnalyzer, ShardReporter
+from cratedb_toolkit.admin.xmover.analysis.table import DistributionAnalyzer
 from cratedb_toolkit.admin.xmover.analysis.zone import ZoneReport
 from cratedb_toolkit.admin.xmover.model import (
     ShardRelocationConstraints,
@@ -185,6 +186,69 @@ def check_balance(ctx, table: Optional[str], tolerance: float):
     report.shard_balance(tolerance=tolerance, table=table)
 
 
+@main.command()
+@click.option("--top-tables", default=10, help="Number of largest tables to analyze (default: 10)")
+@click.option("--table", help='Analyze specific table only (e.g., "my_table" or "schema.table")')
+@click.pass_context
+def shard_distribution(ctx, top_tables: int, table: Optional[str]):
+    """Analyze shard distribution anomalies across cluster nodes
+
+    This command analyzes the largest tables in your cluster to detect:
+    • Uneven shard count distribution between nodes
+    • Storage imbalances across nodes
+    • Missing node coverage for tables
+    • Document count imbalances indicating data skew
+
+    Results are ranked by impact and severity to help prioritize fixes.
+
+    Examples:
+        xmover shard-distribution                    # Analyze top 10 tables
+        xmover shard-distribution --top-tables 20   # Analyze top 20 tables
+        xmover shard-distribution --table my_table  # Detailed report for specific table
+    """
+    try:
+        client = ctx.obj["client"]
+        analyzer = DistributionAnalyzer(client)
+
+        if table:
+            # Focused table analysis mode
+            console.print(f"[blue]🔍 Analyzing table: {table}...[/blue]")
+
+            # Find table (handles schema auto-detection)
+            table_identifier = analyzer.find_table_by_name(table)
+            if not table_identifier:
+                console.print(f"[red]❌ Table '{table}' not found[/red]")
+                return
+
+            # Get detailed distribution
+            table_dist = analyzer.get_table_distribution_detailed(table_identifier)
+            if not table_dist:
+                console.print(f"[red]❌ No shard data found for table '{table_identifier}'[/red]")
+                return
+
+            # Display comprehensive health report
+            analyzer.format_table_health_report(table_dist)
+
+        else:
+            # General anomaly detection mode
+            console.print(f"[blue]🔍 Analyzing shard distribution for top {top_tables} tables...[/blue]")
+            console.print()
+
+            # Perform analysis
+            anomalies, tables_analyzed = analyzer.analyze_distribution(top_tables)
+
+            # Display results
+            analyzer.format_distribution_report(anomalies, tables_analyzed)
+
+    except KeyboardInterrupt:
+        console.print("\n[yellow]Analysis interrupted by user[/yellow]")
+    except Exception as e:
+        console.print(f"[red]Error during distribution analysis: {e}[/red]")
+        import traceback
+
+        console.print(f"[dim]{traceback.format_exc()}[/dim]")
+
+
 @main.command()
 @click.option("--table", "-t", help="Analyze zones for specific table only")
 @click.option("--show-shards/--no-show-shards", default=False, help="Show individual shard details (default: False)")
diff --git a/doc/admin/xmover/handbook.md b/doc/admin/xmover/handbook.md
index cf9b4abe..05a3c57a 100644
--- a/doc/admin/xmover/handbook.md
+++ b/doc/admin/xmover/handbook.md
@@ -56,6 +56,19 @@ xmover recommend --execute
 xmover recommend --prioritize-space
 ```
 
+### Shard Distribution Analysis
+This view is dedicating a specific focus on large tables.
+```bash
+# Analyze distribution anomalies for top 10 largest tables
+xmover shard-distribution
+
+# Analyze more tables
+xmover shard-distribution --top-tables 20
+
+# Detailed health report for specific table
+xmover shard-distribution --table my_table
+```
+
 ### Zone Analysis
 ```bash
 # Check zone balance
diff --git a/doc/admin/xmover/index.md b/doc/admin/xmover/index.md
index affa4825..99fd4404 100644
--- a/doc/admin/xmover/index.md
+++ b/doc/admin/xmover/index.md
@@ -11,6 +11,7 @@ SQL commands for shard rebalancing and node decommissioning.
 ## Features
 
 - **Cluster Analysis**: Complete overview of shard distribution across nodes and zones
+- **Shard Distribution Analysis**: Detect and rank distribution anomalies across largest tables
 - **Shard Movement Recommendations**: Intelligent suggestions for rebalancing with safety validation
 - **Recovery Monitoring**: Track ongoing shard recovery operations with progress details
 - **Zone Conflict Detection**: Prevents moves that would violate CrateDB's zone awareness
diff --git a/doc/admin/xmover/queries.md b/doc/admin/xmover/queries.md
index 27bd89e6..9844d8f6 100644
--- a/doc/admin/xmover/queries.md
+++ b/doc/admin/xmover/queries.md
@@ -216,3 +216,34 @@ SELECT
     WHERE current_state != 'STARTED' and table_name = 'dispatchio' and shard_id = 19
     ORDER BY current_state, table_name, shard_id;
 ```
+
+## "BIGDUDES" Focuses on your **biggest storage consumers** and shows how their shards are distributed across nodes.
+
+´´´sql
+WITH largest_tables AS (
+        SELECT
+            schema_name,
+            table_name,
+            SUM(CASE WHEN "primary" = true THEN size ELSE 0 END) as total_primary_size
+        FROM sys.shards
+        WHERE schema_name NOT IN ('sys', 'information_schema', 'pg_catalog')
+        GROUP BY schema_name, table_name
+        ORDER BY total_primary_size DESC
+        LIMIT 10
+    )
+    SELECT
+        s.schema_name,
+        s.table_name,
+        s.node['name'] as node_name,
+        COUNT(CASE WHEN s."primary" = true THEN 1 END) as primary_shards,
+        COUNT(CASE WHEN s."primary" = false THEN 1 END) as replica_shards,
+        COUNT(*) as total_shards,
+        ROUND(SUM(s.size) / 1024.0 / 1024.0 / 1024.0, 2) as total_size_gb,
+        ROUND(SUM(CASE WHEN s."primary" = true THEN s.size ELSE 0 END) / 1024.0 / 1024.0 / 1024.0, 2) as primary_size_gb,
+        ROUND(SUM(CASE WHEN s."primary" = false THEN s.size ELSE 0 END) / 1024.0 / 1024.0 / 1024.0, 2) as replica_size_gb,
+        SUM(s.num_docs) as total_documents
+    FROM sys.shards s
+    INNER JOIN largest_tables lt ON (s.schema_name = lt.schema_name AND s.table_name = lt.table_name)
+    GROUP BY s.schema_name, s.table_name, s.node['name']
+    ORDER BY s.schema_name, s.table_name, s.node['name'];
+```
diff --git a/tests/admin/test_cli.py b/tests/admin/test_cli.py
index 60e8d810..de3e4624 100644
--- a/tests/admin/test_cli.py
+++ b/tests/admin/test_cli.py
@@ -15,6 +15,7 @@
         "recommend",
         "test-connection",
         "zone-analysis",
+        "shard-distribution",
     ],
 )
 def test_xmover_all(cratedb, subcommand):

From 5068671cd16755c0e5bb87657959f3f5ff5b61ab Mon Sep 17 00:00:00 2001
From: Andreas Motl <andreas.motl@crate.io>
Date: Thu, 21 Aug 2025 14:13:11 +0200
Subject: [PATCH 17/18] Admin/XMover: Code formatting. Linting. Type checking.

- More or less just line-length fixes.
- Only a single type adjustment was needed on the return value of the
  `analyze_distribution` method.
- Ruff recommended to use set comprehensions, so here we go.
- At a single spot where an exception has been `pass`ed, we added
  error output. Is it bad?
---
 .../admin/xmover/analysis/table.py            | 48 ++++++++++++-------
 1 file changed, 30 insertions(+), 18 deletions(-)

diff --git a/cratedb_toolkit/admin/xmover/analysis/table.py b/cratedb_toolkit/admin/xmover/analysis/table.py
index b8f1a7ce..ef6dbdf3 100644
--- a/cratedb_toolkit/admin/xmover/analysis/table.py
+++ b/cratedb_toolkit/admin/xmover/analysis/table.py
@@ -5,6 +5,7 @@
 and provide recommendations for optimization.
 """
 
+import logging
 import statistics
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Tuple
@@ -15,6 +16,8 @@
 
 from cratedb_toolkit.admin.xmover.util.database import CrateDBClient
 
+logger = logging.getLogger(__name__)
+
 
 def format_storage_size(size_gb: float) -> str:
     """Format storage size with appropriate units and spacing"""
@@ -134,7 +137,7 @@ def get_table_distribution_detailed(self, table_identifier: str) -> Optional[Tab
                   AND s.routing_state = 'STARTED'
                 GROUP BY s.schema_name, s.table_name, s.node['name']
                 ORDER BY s.node['name'] \
-                """
+                """  # noqa: E501
 
         result = self.client.execute_query(query, [schema_name, table_name])
         rows = result.get("rows", [])
@@ -190,7 +193,8 @@ def format_table_health_report(self, table_dist: TableDistribution) -> None:
         rprint(f"• Total Shards: {total_shards} ({total_primary_shards} primary + {total_replica_shards} replica)")
         rprint(f"• Total Documents: {total_documents:,}")
         rprint(
-            f"• Node Coverage: {len(table_nodes)}/{len(cluster_nodes)} nodes ({len(table_nodes) / len(cluster_nodes) * 100:.0f}%)"
+            f"• Node Coverage: {len(table_nodes)}/{len(cluster_nodes)} nodes "
+            f"({len(table_nodes) / len(cluster_nodes) * 100:.0f}%)"
         )
 
         if missing_nodes:
@@ -261,7 +265,8 @@ def format_table_health_report(self, table_dist: TableDistribution) -> None:
         # Storage distribution analysis
         if storage_cv > 0.4:
             rprint(
-                f"• [red]⚠  Storage Imbalance:[/red] Range {format_storage_size(min_storage)}-{format_storage_size(max_storage)} per node (CV: {storage_cv:.2f})"
+                f"• [red]⚠  Storage Imbalance:[/red] Range "
+                f"{format_storage_size(min_storage)}-{format_storage_size(max_storage)} per node (CV: {storage_cv:.2f})"
             )
         else:
             rprint(f"• [green]✓ Storage Balance:[/green] Well distributed (CV: {storage_cv:.2f})")
@@ -306,11 +311,13 @@ def format_table_health_report(self, table_dist: TableDistribution) -> None:
                 for zone in sorted(zone_distribution.keys()):
                     zone_data = zone_distribution[zone]
                     rprint(
-                        f"• {zone}: {zone_data['nodes']} nodes, {zone_data['shards']} shards, {format_storage_size(zone_data['size'])}"
+                        f"• {zone}: {zone_data['nodes']} nodes, "
+                        f"{zone_data['shards']} shards, {format_storage_size(zone_data['size'])}"
                     )
 
         except Exception:
-            pass  # Zone info not available
+            # Zone info not available
+            logger.exception("Zone info not available")
 
         # Health Summary
         rprint("\n[bold]💊 Health Summary[/bold]")
@@ -375,7 +382,7 @@ def get_largest_tables_distribution(self, top_n: int = 10) -> List[TableDistribu
                 WHERE s.routing_state = 'STARTED'
                 GROUP BY s.schema_name, s.table_name, s.node['name']
                 ORDER BY s.schema_name, s.table_name, s.node['name'] \
-                """
+                """  # noqa: E501
 
         result = self.client.execute_query(query, [top_n])
 
@@ -534,7 +541,8 @@ def detect_storage_imbalance(self, table: TableDistribution) -> Optional[Distrib
 
             if overloaded_node and underloaded_node:
                 recommendations.append(
-                    f"Rebalance storage from {overloaded_node} ({format_storage_size(max_size)}) to {underloaded_node} ({format_storage_size(min_size)})"
+                    f"Rebalance storage from {overloaded_node} ({format_storage_size(max_size)}) "
+                    f"to {underloaded_node} ({format_storage_size(min_size)})"
                 )
 
         return DistributionAnomaly(
@@ -643,7 +651,7 @@ def detect_document_imbalance(self, table: TableDistribution) -> Optional[Distri
             recommendations=recommendations,
         )
 
-    def analyze_distribution(self, top_tables: int = 10) -> List[DistributionAnomaly]:
+    def analyze_distribution(self, top_tables: int = 10) -> Tuple[List[DistributionAnomaly], int]:
         """Analyze shard distribution and return ranked anomalies"""
 
         # Get table distributions
@@ -672,12 +680,13 @@ def format_distribution_report(self, anomalies: List[DistributionAnomaly], table
 
         if not anomalies:
             rprint(
-                f"[green]✓ No significant shard distribution anomalies detected in top {tables_analyzed} tables![/green]"
+                f"[green]✓ No significant shard distribution anomalies "
+                f"detected in top {tables_analyzed} tables![/green]"
             )
             return
 
         # Show analysis scope
-        unique_tables = set(anomaly.table.full_table_name for anomaly in anomalies)
+        unique_tables = {anomaly.table.full_table_name for anomaly in anomalies}
         rprint(
             f"[blue]📋 Analyzed {tables_analyzed} largest tables, found issues in {len(unique_tables)} tables[/blue]"
         )
@@ -731,7 +740,8 @@ def format_distribution_report(self, anomalies: List[DistributionAnomaly], table
                         overloaded = [node for node, count in counts.items() if count == max_count]
                         underloaded = [node for node, count in counts.items() if count == min_count]
                         rprint(
-                            f"   [red]⚠  Issue:[/red] {overloaded[0]} has {max_count} shards while {underloaded[0]} has only {min_count} shards"
+                            f"   [red]⚠  Issue:[/red] {overloaded[0]} has {max_count} shards "
+                            f"while {underloaded[0]} has only {min_count} shards"
                         )
 
                 elif anomaly.anomaly_type == "Storage Imbalance":
@@ -742,7 +752,8 @@ def format_distribution_report(self, anomalies: List[DistributionAnomaly], table
                         overloaded = [node for node, size in sizes.items() if size == max_size][0]
                         underloaded = [node for node, size in sizes.items() if size == min_size][0]
                         rprint(
-                            f"   [red]⚠  Issue:[/red] Storage ranges from {format_storage_size(min_size)} ({underloaded}) to {format_storage_size(max_size)} ({overloaded}) - {max_size / min_size:.1f}x difference"
+                            f"   [red]⚠  Issue:[/red] Storage ranges from {format_storage_size(min_size)} ({underloaded}) "  # noqa: E501
+                            f"to {format_storage_size(max_size)} ({overloaded}) - {max_size / min_size:.1f}x difference"
                         )
 
                 elif anomaly.anomaly_type == "Node Coverage Issue":
@@ -750,11 +761,11 @@ def format_distribution_report(self, anomalies: List[DistributionAnomaly], table
                         missing_nodes = anomaly.details["nodes_without_shards"]
                         coverage_ratio = anomaly.details["coverage_ratio"]
                         rprint(
-                            f"   [red]⚠  Issue:[/red] Table missing from {len(missing_nodes)} nodes ({coverage_ratio:.0%} cluster coverage)"
-                        )
-                        rprint(
-                            f"   [dim]   Missing from: {', '.join(missing_nodes[:3])}{'...' if len(missing_nodes) > 3 else ''}[/dim]"
+                            f"   [red]⚠  Issue:[/red] Table missing from {len(missing_nodes)} nodes "
+                            f"({coverage_ratio:.0%} cluster coverage)"
                         )
+                        ellipsis = "..." if len(missing_nodes) > 3 else ""
+                        rprint(f"   [dim]   Missing from: {', '.join(missing_nodes[:3])}{ellipsis}[/dim]")
 
                 elif anomaly.anomaly_type == "Document Imbalance":
                     if "document_counts" in anomaly.details:
@@ -763,7 +774,8 @@ def format_distribution_report(self, anomalies: List[DistributionAnomaly], table
                         max_docs = max(doc_counts.values())
                         ratio = max_docs / min_docs if min_docs > 0 else float("inf")
                         rprint(
-                            f"   [red]⚠  Issue:[/red] Document counts range from {min_docs:,} to {max_docs:,} ({ratio:.1f}x difference)"
+                            f"   [red]⚠  Issue:[/red] Document counts range "
+                            f"from {min_docs:,} to {max_docs:,} ({ratio:.1f}x difference)"
                         )
 
                 # Show recommendations
@@ -772,7 +784,7 @@ def format_distribution_report(self, anomalies: List[DistributionAnomaly], table
                     rprint(f"     • {rec}")
 
         # Summary statistics
-        unique_tables = set(anomaly.table.full_table_name for anomaly in anomalies)
+        unique_tables = {anomaly.table.full_table_name for anomaly in anomalies}
         rprint("\n[dim]📊 Analysis Summary:[/dim]")
         rprint(f"[dim]• Tables analyzed: {tables_analyzed}[/dim]")
         rprint(f"[dim]• Tables with issues: {len(unique_tables)}[/dim]")

From 256ff3d27d434ceeefb4647e4a68a995fdaf9c7c Mon Sep 17 00:00:00 2001
From: Walter Behmann <walter@crate.io>
Date: Fri, 5 Sep 2025 11:32:34 +0200
Subject: [PATCH 18/18] Admin/XMover: Add module for active shard monitoring

---
 .../admin/xmover/analysis/shard.py            | 180 +++++++
 cratedb_toolkit/admin/xmover/cli.py           | 197 +++++++-
 cratedb_toolkit/admin/xmover/model.py         |  64 +++
 cratedb_toolkit/admin/xmover/util/database.py |  59 ++-
 doc/admin/xmover/handbook.md                  | 126 +++++
 pyproject.toml                                |   1 +
 tests/admin/test_active_shard_monitor.py      | 472 ++++++++++++++++++
 tests/admin/test_distribution_analyzer.py     | 294 +++++++++++
 tests/admin/test_recovery_monitor.py          | 296 +++++++++++
 9 files changed, 1687 insertions(+), 2 deletions(-)
 create mode 100644 tests/admin/test_active_shard_monitor.py
 create mode 100644 tests/admin/test_distribution_analyzer.py
 create mode 100644 tests/admin/test_recovery_monitor.py

diff --git a/cratedb_toolkit/admin/xmover/analysis/shard.py b/cratedb_toolkit/admin/xmover/analysis/shard.py
index f6f24b6b..a1869019 100644
--- a/cratedb_toolkit/admin/xmover/analysis/shard.py
+++ b/cratedb_toolkit/admin/xmover/analysis/shard.py
@@ -13,6 +13,8 @@
 from rich.table import Table
 
 from cratedb_toolkit.admin.xmover.model import (
+    ActiveShardActivity,
+    ActiveShardSnapshot,
     DistributionStats,
     NodeInfo,
     ShardInfo,
@@ -947,3 +949,181 @@ def distribution(self, table: str = None):
             table_summary.add_row("Node Balance Score", f"{stats.node_balance_score:.1f}/100")
 
             console.print(table_summary)
+
+
+class ActiveShardMonitor:
+    """Monitor active shard checkpoint progression over time"""
+
+    def __init__(self, client: CrateDBClient):
+        self.client = client
+
+    def compare_snapshots(
+        self,
+        snapshot1: List[ActiveShardSnapshot],
+        snapshot2: List[ActiveShardSnapshot],
+        min_activity_threshold: int = 0,
+    ) -> List["ActiveShardActivity"]:
+        """Compare two snapshots and return activity data for shards present in both
+
+        Args:
+            snapshot1: First snapshot (baseline)
+            snapshot2: Second snapshot (comparison)
+            min_activity_threshold: Minimum checkpoint delta to consider active (default: 0)
+        """
+
+        # Create lookup dict for snapshot1
+        snapshot1_dict = {snap.shard_identifier: snap for snap in snapshot1}
+
+        activities = []
+
+        for snap2 in snapshot2:
+            snap1 = snapshot1_dict.get(snap2.shard_identifier)
+            if snap1:
+                # Calculate local checkpoint delta
+                local_checkpoint_delta = snap2.local_checkpoint - snap1.local_checkpoint
+                time_diff = snap2.timestamp - snap1.timestamp
+
+                # Filter based on actual activity between snapshots
+                if local_checkpoint_delta >= min_activity_threshold:
+                    activity = ActiveShardActivity(
+                        schema_name=snap2.schema_name,
+                        table_name=snap2.table_name,
+                        shard_id=snap2.shard_id,
+                        node_name=snap2.node_name,
+                        is_primary=snap2.is_primary,
+                        partition_ident=snap2.partition_ident,
+                        local_checkpoint_delta=local_checkpoint_delta,
+                        snapshot1=snap1,
+                        snapshot2=snap2,
+                        time_diff_seconds=time_diff,
+                    )
+                    activities.append(activity)
+
+        # Sort by activity (highest checkpoint delta first)
+        activities.sort(key=lambda x: x.local_checkpoint_delta, reverse=True)
+
+        return activities
+
+    def format_activity_display(
+        self, activities: List["ActiveShardActivity"], show_count: int = 10, watch_mode: bool = False
+    ) -> str:
+        """Format activity data for console display"""
+        if not activities:
+            return "✅ No active shards with significant checkpoint progression found"
+
+        # Limit to requested count
+        activities = activities[:show_count]
+
+        # Calculate observation period for context
+        if activities:
+            observation_period = activities[0].time_diff_seconds
+            output = [
+                f"\n🔥 Most Active Shards ({len(activities)} shown, {observation_period:.0f}s observation period)"
+            ]
+        else:
+            output = [f"\n🔥 Most Active Shards ({len(activities)} shown, sorted by checkpoint activity)"]
+
+        output.append("")
+
+        # Add activity rate context
+        if activities:
+            total_activity = sum(a.local_checkpoint_delta for a in activities)
+            avg_rate = sum(a.activity_rate for a in activities) / len(activities)
+            output.append(
+                f"[dim]Total checkpoint activity: {total_activity:,} changes, Average rate: {avg_rate:.1f}/sec[/dim]"
+            )
+            output.append("")
+
+        # Create table headers
+        headers = ["Rank", "Schema.Table", "Shard", "Partition", "Node", "Type", "Checkpoint Δ", "Rate/sec", "Trend"]
+
+        # Calculate column widths
+        col_widths = [len(h) for h in headers]
+
+        # Prepare rows
+        rows = []
+        for i, activity in enumerate(activities, 1):
+            # Format values
+            rank = str(i)
+            table_id = activity.table_identifier
+            shard_id = str(activity.shard_id)
+            partition = (
+                activity.partition_ident[:14] + "..."
+                if len(activity.partition_ident) > 14
+                else activity.partition_ident or "-"
+            )
+            node = activity.node_name
+            shard_type = "P" if activity.is_primary else "R"
+            checkpoint_delta = f"{activity.local_checkpoint_delta:,}"
+            rate = f"{activity.activity_rate:.1f}" if activity.activity_rate >= 0.1 else "<0.1"
+
+            # Calculate activity trend indicator
+            if activity.activity_rate >= 100:
+                trend = "🔥 HOT"
+            elif activity.activity_rate >= 50:
+                trend = "📈 HIGH"
+            elif activity.activity_rate >= 10:
+                trend = "📊 MED"
+            else:
+                trend = "📉 LOW"
+
+            row = [rank, table_id, shard_id, partition, node, shard_type, checkpoint_delta, rate, trend]
+            rows.append(row)
+
+            # Update column widths
+            for j, cell in enumerate(row):
+                col_widths[j] = max(col_widths[j], len(cell))
+
+        # Format table
+        header_row = "   " + " | ".join(h.ljust(w) for h, w in zip(headers, col_widths))
+        output.append(header_row)
+        output.append("   " + "-" * (len(header_row) - 3))
+
+        # Data rows
+        for row in rows:
+            data_row = "   " + " | ".join(cell.ljust(w) for cell, w in zip(row, col_widths))
+            output.append(data_row)
+
+        # Only show legend and insights in non-watch mode
+        if not watch_mode:
+            output.append("")
+            output.append("Legend:")
+            output.append("  • Checkpoint Δ: Write operations during observation period")
+            output.append("  • Rate/sec: Checkpoint changes per second")
+            output.append("  • Partition: partition_ident (truncated if >14 chars, '-' if none)")
+            output.append("  • Type: P=Primary, R=Replica")
+            output.append("  • Trend: 🔥 HOT (≥100/s), 📈 HIGH (≥50/s), 📊 MED (≥10/s), 📉 LOW (<10/s)")
+
+            # Add insights about activity patterns
+            if activities:
+                output.append("")
+                output.append("Insights:")
+
+                # Count by trend
+                hot_count = len([a for a in activities if a.activity_rate >= 100])
+                high_count = len([a for a in activities if 50 <= a.activity_rate < 100])
+                med_count = len([a for a in activities if 10 <= a.activity_rate < 50])
+                low_count = len([a for a in activities if a.activity_rate < 10])
+
+                if hot_count > 0:
+                    output.append(f"  • {hot_count} HOT shards (≥100 changes/sec) - consider load balancing")
+                if high_count > 0:
+                    output.append(f"  • {high_count} HIGH activity shards - monitor capacity")
+                if med_count > 0:
+                    output.append(f"  • {med_count} MEDIUM activity shards - normal operation")
+                if low_count > 0:
+                    output.append(f"  • {low_count} LOW activity shards - occasional writes")
+
+                # Identify patterns
+                primary_activities = [a for a in activities if a.is_primary]
+                if len(primary_activities) == len(activities):
+                    output.append("  • All active shards are PRIMARY - normal write pattern")
+                elif len(primary_activities) < len(activities) * 0.5:
+                    output.append("  • Many REPLICA shards active - possible recovery/replication activity")
+
+                # Node concentration
+                nodes = {a.node_name for a in activities}
+                if len(nodes) <= 2:
+                    output.append(f"  • Activity concentrated on {len(nodes)} node(s) - consider redistribution")
+
+        return "\n".join(output)
diff --git a/cratedb_toolkit/admin/xmover/cli.py b/cratedb_toolkit/admin/xmover/cli.py
index e5e6e834..010f9aeb 100644
--- a/cratedb_toolkit/admin/xmover/cli.py
+++ b/cratedb_toolkit/admin/xmover/cli.py
@@ -5,12 +5,14 @@
 """
 
 import sys
+import time
 from typing import Optional
 
 import click
 from rich.console import Console
+from rich.panel import Panel
 
-from cratedb_toolkit.admin.xmover.analysis.shard import ShardAnalyzer, ShardReporter
+from cratedb_toolkit.admin.xmover.analysis.shard import ActiveShardMonitor, ShardAnalyzer, ShardReporter
 from cratedb_toolkit.admin.xmover.analysis.table import DistributionAnalyzer
 from cratedb_toolkit.admin.xmover.analysis.zone import ZoneReport
 from cratedb_toolkit.admin.xmover.model import (
@@ -249,6 +251,199 @@ def shard_distribution(ctx, top_tables: int, table: Optional[str]):
         console.print(f"[dim]{traceback.format_exc()}[/dim]")
 
 
+@main.command()
+@click.option("--count", default=10, help="Number of most active shards to show (default: 10)")
+@click.option("--interval", default=30, help="Observation interval in seconds (default: 30)")
+@click.option(
+    "--min-checkpoint-delta",
+    default=1000,
+    help="Minimum checkpoint progression between snapshots to show shard (default: 1000)",
+)
+@click.option("--table", "-t", help="Monitor specific table only")
+@click.option("--node", "-n", help="Monitor specific node only")
+@click.option("--watch", "-w", is_flag=True, help="Continuously monitor (refresh every interval)")
+@click.option("--exclude-system", is_flag=True, help="Exclude system tables (gc.*, information_schema.*)")
+@click.option("--min-rate", type=float, help="Minimum activity rate (changes/sec) to show")
+@click.option("--show-replicas/--hide-replicas", default=True, help="Show replica shards (default: True)")
+@click.pass_context
+def active_shards(
+    ctx,
+    count: int,
+    interval: int,
+    min_checkpoint_delta: int,
+    table: Optional[str],
+    node: Optional[str],
+    watch: bool,
+    exclude_system: bool,
+    min_rate: Optional[float],
+    show_replicas: bool,
+):
+    """Monitor most active shards by checkpoint progression
+
+    This command takes two snapshots of ALL started shards separated by the
+    observation interval, then shows the shards with the highest checkpoint
+    progression (activity) between the snapshots.
+
+    Unlike other commands, this tracks ALL shards and filters based on actual
+    activity between snapshots, not current state. This captures shards that
+    become active during the observation period.
+
+    Useful for identifying which shards are receiving the most write activity
+    in your cluster and understanding write patterns.
+
+    Examples:
+        xmover active-shards --count 20 --interval 60        # Top 20 over 60 seconds
+        xmover active-shards --watch --interval 30           # Continuous monitoring
+        xmover active-shards --table my_table --watch        # Monitor specific table
+        xmover active-shards --node data-hot-1 --count 5     # Top 5 on specific node
+        xmover active-shards --min-checkpoint-delta 500      # Lower activity threshold
+        xmover active-shards --exclude-system --min-rate 50  # Skip system tables, min 50/sec
+        xmover active-shards --hide-replicas --count 20      # Only primary shards
+    """
+    client = ctx.obj["client"]
+    monitor = ActiveShardMonitor(client)
+
+    def get_filtered_snapshot():
+        """Get snapshot with optional filtering"""
+        snapshots = client.get_active_shards_snapshot(min_checkpoint_delta=min_checkpoint_delta)
+
+        # Apply table filter if specified
+        if table:
+            snapshots = [s for s in snapshots if s.table_name == table or f"{s.schema_name}.{s.table_name}" == table]
+
+        # Apply node filter if specified
+        if node:
+            snapshots = [s for s in snapshots if s.node_name == node]
+
+        # Exclude system tables if requested
+        if exclude_system:
+            snapshots = [
+                s
+                for s in snapshots
+                if not (
+                    s.schema_name.startswith("gc.")
+                    or s.schema_name == "information_schema"
+                    or s.schema_name == "sys"
+                    or s.table_name.endswith("_events")
+                    or s.table_name.endswith("_log")
+                )
+            ]
+
+        return snapshots
+
+    def run_single_analysis():
+        """Run a single analysis cycle"""
+        if not watch:
+            console.print(Panel.fit("[bold blue]Active Shards Monitor[/bold blue]"))
+
+        # Show configuration - simplified for watch mode
+        if watch:
+            config_parts = [f"{interval}s interval", f"threshold: {min_checkpoint_delta:,}", f"top {count}"]
+            if table:
+                config_parts.append(f"table: {table}")
+            if node:
+                config_parts.append(f"node: {node}")
+            console.print(f"[dim]{' | '.join(config_parts)}[/dim]")
+        else:
+            config_info = [
+                f"Observation interval: {interval}s",
+                f"Min checkpoint delta: {min_checkpoint_delta:,}",
+                f"Show count: {count}",
+            ]
+            if table:
+                config_info.append(f"Table filter: {table}")
+            if node:
+                config_info.append(f"Node filter: {node}")
+            if exclude_system:
+                config_info.append("Excluding system tables")
+            if min_rate:
+                config_info.append(f"Min rate: {min_rate}/sec")
+            if not show_replicas:
+                config_info.append("Primary shards only")
+
+            console.print("[dim]" + " | ".join(config_info) + "[/dim]")
+        console.print()
+
+        # Take first snapshot
+        if not watch:
+            console.print("📷 Taking first snapshot...")
+        snapshot1 = get_filtered_snapshot()
+
+        if not snapshot1:
+            console.print("[yellow]No started shards found matching criteria[/yellow]")
+            return
+
+        if not watch:
+            console.print(f"   Tracking {len(snapshot1)} started shards for activity")
+            console.print(f"⏱️  Waiting {interval} seconds for activity...")
+
+        # Wait for observation interval
+        if watch:
+            # Simplified countdown for watch mode
+            for remaining in range(interval, 0, -1):
+                if remaining % 5 == 0 or remaining <= 3:  # Show fewer updates
+                    console.print(f"[dim]⏱️  {remaining}s...[/dim]", end="\r")
+                time.sleep(1)
+            console.print(" " * 15, end="\r")  # Clear countdown
+        else:
+            time.sleep(interval)
+
+        # Take second snapshot
+        if not watch:
+            console.print("📷 Taking second snapshot...")
+        snapshot2 = get_filtered_snapshot()
+
+        if not snapshot2:
+            console.print("[yellow]No started shards found in second snapshot[/yellow]")
+            return
+
+        if not watch:
+            console.print(f"   Tracking {len(snapshot2)} started shards for activity")
+
+        # Compare snapshots and show results
+        activities = monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=min_checkpoint_delta)
+
+        # Apply additional filters
+        if not show_replicas:
+            activities = [a for a in activities if a.is_primary]
+
+        if min_rate:
+            activities = [a for a in activities if a.activity_rate >= min_rate]
+
+        if not activities:
+            console.print(
+                f"[green]✅ No shards exceeded activity threshold ({min_checkpoint_delta:,} checkpoint changes)[/green]"
+            )
+            if min_rate:
+                console.print(f"[dim]Also filtered by minimum rate: {min_rate}/sec[/dim]")
+        else:
+            if not watch:
+                overlap_count = len({s.shard_identifier for s in snapshot1} & {s.shard_identifier for s in snapshot2})
+                console.print(f"[dim]Analyzed {overlap_count} shards present in both snapshots[/dim]")
+            console.print(monitor.format_activity_display(activities, show_count=count, watch_mode=watch))
+
+    try:
+        if watch:
+            console.print("[dim]Press Ctrl+C to stop monitoring[/dim]")
+            console.print()
+
+            while True:
+                run_single_analysis()
+                if watch:
+                    console.print(f"\n[dim]━━━ Next update in {interval}s ━━━[/dim]\n")
+                time.sleep(interval)
+        else:
+            run_single_analysis()
+
+    except KeyboardInterrupt:
+        console.print("\n[yellow]Monitoring stopped by user[/yellow]")
+    except Exception as e:
+        console.print(f"[red]Error during active shards monitoring: {e}[/red]")
+        import traceback
+
+        console.print(f"[dim]{traceback.format_exc()}[/dim]")
+
+
 @main.command()
 @click.option("--table", "-t", help="Analyze zones for specific table only")
 @click.option("--show-shards/--no-show-shards", default=False, help="Show individual shard details (default: False)")
diff --git a/cratedb_toolkit/admin/xmover/model.py b/cratedb_toolkit/admin/xmover/model.py
index 34e43f77..d8511b31 100644
--- a/cratedb_toolkit/admin/xmover/model.py
+++ b/cratedb_toolkit/admin/xmover/model.py
@@ -184,3 +184,67 @@ class ShardRelocationConstraints:
     max_recommendations: int = 10
     max_disk_usage: float = 90.0
     prioritize_space: bool = False
+
+
+@dataclass
+class ActiveShardSnapshot:
+    """Snapshot of active shard checkpoint data for tracking activity"""
+
+    schema_name: str
+    table_name: str
+    shard_id: int
+    node_name: str
+    is_primary: bool
+    partition_ident: str
+    local_checkpoint: int
+    global_checkpoint: int
+    translog_uncommitted_bytes: int
+    timestamp: float  # Unix timestamp when snapshot was taken
+
+    @property
+    def checkpoint_delta(self) -> int:
+        """Current checkpoint delta (local - global)"""
+        return self.local_checkpoint - self.global_checkpoint
+
+    @property
+    def translog_uncommitted_mb(self) -> float:
+        """Translog uncommitted size in MB"""
+        return self.translog_uncommitted_bytes / (1024 * 1024)
+
+    @property
+    def shard_identifier(self) -> str:
+        """Unique identifier for this shard including partition"""
+        shard_type = "P" if self.is_primary else "R"
+        partition = f":{self.partition_ident}" if self.partition_ident else ""
+        return f"{self.schema_name}.{self.table_name}:{self.shard_id}:{self.node_name}:{shard_type}{partition}"
+
+
+@dataclass
+class ActiveShardActivity:
+    """Activity comparison between two snapshots of the same shard"""
+
+    schema_name: str
+    table_name: str
+    shard_id: int
+    node_name: str
+    is_primary: bool
+    partition_ident: str
+    local_checkpoint_delta: int  # Change in local checkpoint between snapshots
+    snapshot1: ActiveShardSnapshot
+    snapshot2: ActiveShardSnapshot
+    time_diff_seconds: float
+
+    @property
+    def activity_rate(self) -> float:
+        """Activity rate as checkpoint changes per second"""
+        if self.time_diff_seconds > 0:
+            return self.local_checkpoint_delta / self.time_diff_seconds
+        return 0.0
+
+    @property
+    def shard_type(self) -> str:
+        return "PRIMARY" if self.is_primary else "REPLICA"
+
+    @property
+    def table_identifier(self) -> str:
+        return f"{self.schema_name}.{self.table_name}"
diff --git a/cratedb_toolkit/admin/xmover/util/database.py b/cratedb_toolkit/admin/xmover/util/database.py
index 21950ab0..5c9011bd 100644
--- a/cratedb_toolkit/admin/xmover/util/database.py
+++ b/cratedb_toolkit/admin/xmover/util/database.py
@@ -10,7 +10,7 @@
 import urllib3
 from dotenv import load_dotenv
 
-from cratedb_toolkit.admin.xmover.model import NodeInfo, RecoveryInfo, ShardInfo
+from cratedb_toolkit.admin.xmover.model import ActiveShardSnapshot, NodeInfo, RecoveryInfo, ShardInfo
 
 logger = logging.getLogger(__name__)
 
@@ -496,3 +496,60 @@ def _is_recovery_completed(self, recovery_info: RecoveryInfo) -> bool:
             and recovery_info.files_percent >= 100.0
             and recovery_info.bytes_percent >= 100.0
         )
+
+    def get_active_shards_snapshot(self, min_checkpoint_delta: int = 1000) -> List[ActiveShardSnapshot]:
+        """Get a snapshot of all started shards for activity monitoring
+
+        Note: This captures ALL started shards regardless of current activity level.
+        The min_checkpoint_delta parameter is kept for backwards compatibility but
+        filtering is now done during snapshot comparison to catch shards that
+        become active between observations.
+
+        Args:
+            min_checkpoint_delta: Kept for compatibility - filtering now done in comparison
+
+        Returns:
+            List of ActiveShardSnapshot objects for all started shards
+        """
+        import time
+
+        query = """
+                SELECT sh.schema_name, \
+                       sh.table_name, \
+                       sh.id                                 AS shard_id, \
+                       sh."primary", \
+                       node['name']                          as node_name, \
+                       sh.partition_ident, \
+                       sh.translog_stats['uncommitted_size'] AS translog_uncommitted_bytes, \
+                       sh.seq_no_stats['local_checkpoint']   AS local_checkpoint, \
+                       sh.seq_no_stats['global_checkpoint']  AS global_checkpoint
+                FROM sys.shards AS sh
+                WHERE sh.state = 'STARTED'
+                ORDER BY sh.schema_name, sh.table_name, sh.id, sh.node['name'] \
+                """
+
+        try:
+            result = self.execute_query(query)
+            snapshots = []
+            current_time = time.time()
+
+            for row in result.get("rows", []):
+                snapshot = ActiveShardSnapshot(
+                    schema_name=row[0],
+                    table_name=row[1],
+                    shard_id=row[2],
+                    is_primary=row[3],
+                    node_name=row[4],
+                    partition_ident=row[5] or "",
+                    translog_uncommitted_bytes=row[6] or 0,
+                    local_checkpoint=row[7] or 0,
+                    global_checkpoint=row[8] or 0,
+                    timestamp=current_time,
+                )
+                snapshots.append(snapshot)
+
+            return snapshots
+
+        except Exception as e:
+            logger.error(f"Error getting active shards snapshot: {e}")
+            return []
diff --git a/doc/admin/xmover/handbook.md b/doc/admin/xmover/handbook.md
index 05a3c57a..f9aee2e0 100644
--- a/doc/admin/xmover/handbook.md
+++ b/doc/admin/xmover/handbook.md
@@ -244,6 +244,132 @@ xmover monitor-recovery --watch --include-transitioning
 - **PEER**: Copying shard data from another node (replication/relocation)
 - **DISK**: Rebuilding shard from local data (after restart/disk issues)
 
+
+### `active-shards`
+Monitor the most active shards by tracking checkpoint progression over time.
+This command helps identify which shards are receiving the most write activity
+by measuring local checkpoint progression between two snapshots.
+
+**Options:**
+- `--count`: Number of most active shards to show (default: 10)
+- `--interval`: Observation interval in seconds (default: 30)
+- `--min-checkpoint-delta`: Minimum checkpoint progression between snapshots to show shard (default: 1000)
+- `--table, -t`: Monitor specific table only
+- `--node, -n`: Monitor specific node only
+- `--watch, -w`: Continuously monitor (refresh every interval)
+- `--exclude-system`: Exclude system tables (gc.*, information_schema.*, *_events, *_log)
+- `--min-rate`: Minimum activity rate (changes/sec) to show
+- `--show-replicas/--hide-replicas`: Show replica shards (default: True)
+
+**How it works:**
+1. **Takes snapshot of ALL started shards** (not just currently active ones)
+2. **Waits for observation interval** (configurable, default: 30 seconds)
+3. **Takes second snapshot** of all started shards
+4. **Compares snapshots** to find shards with checkpoint progression ≥ threshold
+5. **Shows ranked results** with activity trends and insights
+
+**Enhanced output features:**
+- **Checkpoint visibility**: Shows actual `local_checkpoint` values (CP Start → CP End → Delta)
+- **Partition awareness**: Separate tracking for partitioned tables (different partition_ident values)
+- **Activity trends**: 🔥 HOT (≥100/s), 📈 HIGH (≥50/s), 📊 MED (≥10/s), 📉 LOW (<10/s)
+- **Smart insights**: Identifies concentration patterns and load distribution (non-watch mode)
+- **Flexible filtering**: Exclude system tables, set minimum rates, hide replicas
+- **Context information**: Total activity, average rates, observation period
+- **Clean watch mode**: Streamlined output without legend/insights for continuous monitoring
+
+This approach captures shards that become active during the observation period, providing a complete view of cluster write patterns and identifying hot spots. The enhanced filtering helps focus on business-critical activity patterns.
+
+**Sample output (single run):**
+```
+🔥 Most Active Shards (3 shown, 30s observation period)
+Total checkpoint activity: 190,314 changes, Average rate: 2,109.0/sec
+   Rank | Schema.Table           | Shard | Partition      | Node       | Type | Checkpoint Δ | Rate/sec | Trend
+   -----------------------------------------------------------------------------------------------------------
+   1    | gc.scheduled_jobs_log  | 0     | -              | data-hot-8 | P    | 113,744      | 3,791.5  | 🔥 HOT
+   2    | TURVO.events           | 0     | 04732dpl6osj8d | data-hot-0 | P    | 45,837       | 1,527.9  | 🔥 HOT
+   3    | doc.user_actions       | 1     | 04732dpk70rj6d | data-hot-2 | P    | 30,733       | 1,024.4  | 🔥 HOT
+Legend:
+  • Checkpoint Δ: Write operations during observation period
+  • Partition: partition_ident (truncated if >14 chars, '-' if none)
+Insights:
+  • 3 HOT shards (≥100 changes/sec) - consider load balancing
+  • All active shards are PRIMARY - normal write pattern
+```
+
+**Sample output (watch mode - cleaner):**
+```
+30s interval | threshold: 1,000 | top 5
+🔥 Most Active Shards (3 shown, 30s observation period)
+Total checkpoint activity: 190,314 changes, Average rate: 2,109.0/sec
+   Rank | Schema.Table           | Shard | Partition      | Node       | Type | Checkpoint Δ | Rate/sec | Trend
+   -----------------------------------------------------------------------------------------------------------
+   1    | gc.scheduled_jobs_log  | 0     | -              | data-hot-8 | P    | 113,744      | 3,791.5  | 🔥 HOT
+   2    | TURVO.events           | 0     | 04732dpl6osj8d | data-hot-0 | P    | 45,837       | 1,527.9  | 🔥 HOT
+   3    | doc.user_actions       | 1     | 04732dpk70rj6d | data-hot-2 | P    | 30,733       | 1,024.4  | 🔥 HOT
+━━━ Next update in 30s ━━━
+```
+
+#### Examples
+```bash
+# Show top 10 most active shards over 30 seconds
+xmover active-shards
+
+# Top 20 shards with 60-second observation period
+xmover active-shards --count 20 --interval 60
+
+# Continuous monitoring with 30-second intervals
+xmover active-shards --watch --interval 30
+
+# Monitor specific table activity
+xmover active-shards --table my_table --watch
+
+# Monitor specific node with custom threshold
+xmover active-shards --node data-hot-1 --min-checkpoint-delta 500
+
+# Exclude system tables and event logs for business data focus
+xmover active-shards --exclude-system --count 20
+
+# Only show high-activity shards (≥50 changes/sec)
+xmover active-shards --min-rate 50 --count 15
+
+# Focus on primary shards only
+xmover active-shards --hide-replicas --count 20
+```
+
+#### Monitoring Active Shards and Write Patterns
+
+Identify which shards are receiving the most write activity:
+
+1. Quick snapshot of most active shards:
+```bash
+# Show top 10 most active shards over 30 seconds
+xmover active-shards
+
+# Longer observation period for more accurate results
+xmover active-shards --count 15 --interval 60
+```
+
+2. Continuous monitoring for real-time insights:
+```bash
+# Continuous monitoring with 30-second intervals
+xmover active-shards --watch --interval 30
+
+# Monitor specific table for focused analysis
+xmover active-shards --table critical_table --watch
+```
+
+3. Integration with rebalancing workflow:
+```bash
+# Identify hot shards first
+xmover active-shards --count 20 --interval 60
+
+# Move hot shards away from overloaded nodes
+xmover recommend --table hot_table --prioritize-space --execute
+
+# Monitor the impact
+xmover active-shards --table hot_table --watch
+```
+
 ### `test-connection`
 Tests the connection to CrateDB and displays basic cluster information.
 
diff --git a/pyproject.toml b/pyproject.toml
index fa3309d5..762d4fdf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -325,6 +325,7 @@ lint.extend-ignore = [
 ]
 
 lint.per-file-ignores."cratedb_toolkit/admin/xmover/analysis/shard.py" = [ "T201" ] # Allow `print`
+lint.per-file-ignores."tests/admin/*" = [ "T201" ]                                  # Allow use of `print`.
 lint.per-file-ignores."cratedb_toolkit/retention/cli.py" = [ "T201" ]               # Allow `print`
 lint.per-file-ignores."cratedb_toolkit/sqlalchemy/__init__.py" = [ "F401" ]         # Allow `module´ imported but unused
 lint.per-file-ignores."doc/conf.py" = [ "A001", "ERA001" ]
diff --git a/tests/admin/test_active_shard_monitor.py b/tests/admin/test_active_shard_monitor.py
new file mode 100644
index 00000000..55268b15
--- /dev/null
+++ b/tests/admin/test_active_shard_monitor.py
@@ -0,0 +1,472 @@
+"""
+Tests for ActiveShardMonitor functionality
+"""
+
+import time
+from unittest.mock import Mock, patch
+
+from cratedb_toolkit.admin.xmover.analysis.shard import ActiveShardMonitor
+from cratedb_toolkit.admin.xmover.model import ActiveShardActivity, ActiveShardSnapshot
+from cratedb_toolkit.admin.xmover.util.database import CrateDBClient
+
+
+class TestActiveShardSnapshot:
+    """Test ActiveShardSnapshot dataclass"""
+
+    def test_checkpoint_delta(self):
+        """Test checkpoint delta calculation"""
+        snapshot = ActiveShardSnapshot(
+            schema_name="test_schema",
+            table_name="test_table",
+            shard_id=1,
+            node_name="node1",
+            is_primary=True,
+            partition_ident="",
+            local_checkpoint=1500,
+            global_checkpoint=500,
+            translog_uncommitted_bytes=10485760,  # 10MB
+            timestamp=time.time(),
+        )
+
+        assert snapshot.checkpoint_delta == 1000
+        assert snapshot.translog_uncommitted_mb == 10.0
+        assert snapshot.shard_identifier == "test_schema.test_table:1:node1:P"
+
+
+class TestActiveShardActivity:
+    """Test ActiveShardActivity dataclass"""
+
+    def test_activity_calculations(self):
+        """Test activity rate and property calculations"""
+        snapshot1 = ActiveShardSnapshot(
+            schema_name="test_schema",
+            table_name="test_table",
+            shard_id=1,
+            node_name="node1",
+            is_primary=True,
+            partition_ident="",
+            local_checkpoint=1000,
+            global_checkpoint=500,
+            translog_uncommitted_bytes=5242880,  # 5MB
+            timestamp=100.0,
+        )
+
+        snapshot2 = ActiveShardSnapshot(
+            schema_name="test_schema",
+            table_name="test_table",
+            shard_id=1,
+            node_name="node1",
+            is_primary=True,
+            partition_ident="",
+            local_checkpoint=1500,
+            global_checkpoint=500,
+            translog_uncommitted_bytes=10485760,  # 10MB
+            timestamp=130.0,  # 30 seconds later
+        )
+
+        activity = ActiveShardActivity(
+            schema_name="test_schema",
+            table_name="test_table",
+            shard_id=1,
+            node_name="node1",
+            is_primary=True,
+            partition_ident="",
+            local_checkpoint_delta=500,
+            snapshot1=snapshot1,
+            snapshot2=snapshot2,
+            time_diff_seconds=30.0,
+        )
+
+        assert activity.activity_rate == 500 / 30.0  # ~16.67 changes/sec
+        assert activity.shard_type == "PRIMARY"
+        assert activity.table_identifier == "test_schema.test_table"
+
+
+class TestCrateDBClientActiveShards:
+    """Test CrateDB client active shards functionality"""
+
+    @patch.object(CrateDBClient, "execute_query")
+    def test_get_active_shards_snapshot_success(self, mock_execute):
+        """Test successful snapshot retrieval"""
+        mock_execute.return_value = {
+            "rows": [
+                ["schema1", "table1", 1, True, "node1", "", 10485760, 1500, 500],
+                ["schema1", "table2", 2, False, "node2", "part1", 20971520, 2000, 800],
+            ]
+        }
+
+        client = CrateDBClient("http://test")
+        snapshots = client.get_active_shards_snapshot(min_checkpoint_delta=1000)
+
+        assert len(snapshots) == 2
+
+        # Check first snapshot
+        snap1 = snapshots[0]
+        assert snap1.schema_name == "schema1"
+        assert snap1.table_name == "table1"
+        assert snap1.shard_id == 1
+        assert snap1.is_primary is True
+        assert snap1.node_name == "node1"
+        assert snap1.local_checkpoint == 1500
+        assert snap1.global_checkpoint == 500
+        assert snap1.checkpoint_delta == 1000
+        assert snap1.translog_uncommitted_mb == 10.0
+
+        # Check second snapshot
+        snap2 = snapshots[1]
+        assert snap2.schema_name == "schema1"
+        assert snap2.table_name == "table2"
+        assert snap2.shard_id == 2
+        assert snap2.is_primary is False
+        assert snap2.node_name == "node2"
+        assert snap2.partition_ident == "part1"
+        assert snap2.checkpoint_delta == 1200
+        assert snap2.translog_uncommitted_mb == 20.0
+
+        # Verify query was called without checkpoint delta filter (new behavior)
+        mock_execute.assert_called_once()
+        args = mock_execute.call_args[0]
+        # No longer passes min_checkpoint_delta parameter
+        assert len(args) == 1  # Only the query, no parameters
+
+    @patch.object(CrateDBClient, "execute_query")
+    def test_get_active_shards_snapshot_empty(self, mock_execute):
+        """Test snapshot retrieval with no results"""
+        mock_execute.return_value = {"rows": []}
+
+        client = CrateDBClient("http://test")
+        snapshots = client.get_active_shards_snapshot(min_checkpoint_delta=1000)
+
+        assert snapshots == []
+
+    @patch.object(CrateDBClient, "execute_query")
+    def test_get_active_shards_snapshot_error(self, mock_execute):
+        """Test snapshot retrieval with database error"""
+        mock_execute.side_effect = Exception("Database connection failed")
+
+        client = CrateDBClient("http://test")
+        snapshots = client.get_active_shards_snapshot(min_checkpoint_delta=1000)
+
+        assert snapshots == []
+
+
+class TestActiveShardMonitor:
+    """Test ActiveShardMonitor class"""
+
+    def setup_method(self):
+        """Set up test fixtures"""
+        self.mock_client = Mock(spec=CrateDBClient)
+        self.monitor = ActiveShardMonitor(self.mock_client)
+
+    def create_test_snapshot(
+        self,
+        schema: str,
+        table: str,
+        shard_id: int,
+        node: str,
+        is_primary: bool,
+        local_checkpoint: int,
+        timestamp: float,
+    ):
+        """Helper to create test snapshots"""
+        return ActiveShardSnapshot(
+            schema_name=schema,
+            table_name=table,
+            shard_id=shard_id,
+            node_name=node,
+            is_primary=is_primary,
+            partition_ident="",
+            local_checkpoint=local_checkpoint,
+            global_checkpoint=500,  # Fixed for simplicity
+            translog_uncommitted_bytes=10485760,  # 10MB
+            timestamp=timestamp,
+        )
+
+    def test_compare_snapshots_with_activity(self):
+        """Test comparing snapshots with active shards"""
+        # Create first snapshot
+        snapshot1 = [
+            self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 100.0),
+            self.create_test_snapshot("schema1", "table2", 1, "node2", False, 2000, 100.0),
+            self.create_test_snapshot("schema1", "table3", 1, "node1", True, 3000, 100.0),
+        ]
+
+        # Create second snapshot (30 seconds later with activity)
+        snapshot2 = [
+            self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1500, 130.0),  # +500
+            self.create_test_snapshot("schema1", "table2", 1, "node2", False, 2200, 130.0),  # +200
+            self.create_test_snapshot("schema1", "table3", 1, "node1", True, 3000, 130.0),  # No change
+            self.create_test_snapshot("schema1", "table4", 1, "node3", True, 1000, 130.0),  # New shard
+        ]
+
+        activities = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=1)
+
+        # Should have 2 activities (table3 had no change, table4 is new)
+        assert len(activities) == 2
+
+        # Check activities are sorted by checkpoint delta (highest first)
+        assert activities[0].local_checkpoint_delta == 500  # table1
+        assert activities[0].schema_name == "schema1"
+        assert activities[0].table_name == "table1"
+
+        assert activities[1].local_checkpoint_delta == 200  # table2
+        assert activities[1].schema_name == "schema1"
+        assert activities[1].table_name == "table2"
+
+        # Check activity rate calculation
+        assert activities[0].activity_rate == 500 / 30.0  # ~16.67/sec
+        assert activities[1].activity_rate == 200 / 30.0  # ~6.67/sec
+
+    def test_compare_snapshots_no_activity(self):
+        """Test comparing snapshots with no activity"""
+        # Create identical snapshots
+        snapshot1 = [
+            self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 100.0),
+        ]
+
+        snapshot2 = [
+            self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 130.0),  # No change
+        ]
+
+        activities = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=1)
+
+        assert activities == []
+
+    def test_compare_snapshots_no_overlap(self):
+        """Test comparing snapshots with no overlapping shards"""
+        snapshot1 = [
+            self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 100.0),
+        ]
+
+        snapshot2 = [
+            self.create_test_snapshot("schema1", "table2", 1, "node2", True, 1500, 130.0),  # Different shard
+        ]
+
+        activities = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=1)
+
+        assert activities == []
+
+    def test_format_activity_display_with_activities(self):
+        """Test formatting activity display with data"""
+        # Create test activities
+        snapshot1 = self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 100.0)
+        snapshot2 = self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1500, 130.0)
+
+        activity = ActiveShardActivity(
+            schema_name="schema1",
+            table_name="table1",
+            shard_id=1,
+            node_name="node1",
+            is_primary=True,
+            partition_ident="",
+            local_checkpoint_delta=500,
+            snapshot1=snapshot1,
+            snapshot2=snapshot2,
+            time_diff_seconds=30.0,
+        )
+
+        display = self.monitor.format_activity_display([activity], show_count=10, watch_mode=False)
+
+        # Check that output contains expected elements
+        assert "Most Active Shards" in display
+        assert "schema1.table1" in display
+        assert "500" in display  # checkpoint delta
+        assert "16.7" in display  # activity rate
+        assert "P" in display  # primary indicator
+        assert "Legend:" in display
+        assert "Trend:" in display  # new trend column explanation
+        assert "Partition:" in display  # new partition column explanation
+
+    def test_format_activity_display_empty(self):
+        """Test formatting activity display with no data"""
+        display = self.monitor.format_activity_display([], show_count=10, watch_mode=False)
+
+        assert "No active shards with significant checkpoint progression found" in display
+
+    def test_format_activity_display_count_limit(self):
+        """Test that display respects show_count limit"""
+        # Create multiple activities
+        activities = []
+        for i in range(15):
+            snapshot1 = self.create_test_snapshot("schema1", f"table{i}", 1, "node1", True, 1000, 100.0)
+            snapshot2 = self.create_test_snapshot("schema1", f"table{i}", 1, "node1", True, 1000 + (i + 1) * 100, 130.0)
+
+            activity = ActiveShardActivity(
+                schema_name="schema1",
+                table_name=f"table{i}",
+                shard_id=1,
+                node_name="node1",
+                is_primary=True,
+                partition_ident="",
+                local_checkpoint_delta=(i + 1) * 100,
+                snapshot1=snapshot1,
+                snapshot2=snapshot2,
+                time_diff_seconds=30.0,
+            )
+            activities.append(activity)
+
+        # Sort activities by checkpoint delta (highest first) - same as compare_snapshots does
+        activities.sort(key=lambda x: x.local_checkpoint_delta, reverse=True)
+
+        # Should only show top 5
+        display = self.monitor.format_activity_display(activities, show_count=5, watch_mode=False)
+
+        # Count number of table entries in display
+        table_count = display.count("schema1.table")
+        assert table_count == 5  # Should only show 5 entries
+
+        # Should show highest activity first (table14 has highest checkpoint delta)
+        assert "schema1.table14" in display
+
+    def test_compare_snapshots_with_activity_threshold(self):
+        """Test filtering activities by minimum threshold"""
+        # Create snapshots with various activity levels
+        snapshot1 = [
+            self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 100.0),  # Will have +2000 delta
+            self.create_test_snapshot("schema1", "table2", 1, "node2", False, 2000, 100.0),  # Will have +500 delta
+            self.create_test_snapshot("schema1", "table3", 1, "node1", True, 3000, 100.0),  # Will have +100 delta
+        ]
+
+        snapshot2 = [
+            self.create_test_snapshot("schema1", "table1", 1, "node1", True, 3000, 130.0),  # +2000 delta
+            self.create_test_snapshot("schema1", "table2", 1, "node2", False, 2500, 130.0),  # +500 delta
+            self.create_test_snapshot("schema1", "table3", 1, "node1", True, 3100, 130.0),  # +100 delta
+        ]
+
+        # Test with threshold of 1000 - should only show table1 (2000 delta)
+        activities_high_threshold = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=1000)
+        assert len(activities_high_threshold) == 1
+        assert activities_high_threshold[0].table_name == "table1"
+        assert activities_high_threshold[0].local_checkpoint_delta == 2000
+
+        # Test with threshold of 200 - should show table1 and table2
+        activities_medium_threshold = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=200)
+        assert len(activities_medium_threshold) == 2
+        assert activities_medium_threshold[0].local_checkpoint_delta == 2000  # table1 first (highest)
+        assert activities_medium_threshold[1].local_checkpoint_delta == 500  # table2 second
+
+        # Test with threshold of 0 - should show all three
+        activities_low_threshold = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=0)
+        assert len(activities_low_threshold) == 3
+        assert activities_low_threshold[0].local_checkpoint_delta == 2000  # Sorted by activity
+        assert activities_low_threshold[1].local_checkpoint_delta == 500
+        assert activities_low_threshold[2].local_checkpoint_delta == 100
+
+    def test_primary_replica_separation(self):
+        """Test that primary and replica shards are tracked separately"""
+        # Create snapshots with same table/shard but different primary/replica
+        snapshot1 = [
+            # Primary shard
+            self.create_test_snapshot("gc", "scheduled_jobs_log", 0, "data-hot-8", True, 15876, 100.0),
+            # Replica shard (same table/shard/node but different type)
+            self.create_test_snapshot("gc", "scheduled_jobs_log", 0, "data-hot-8", False, 129434, 100.0),
+        ]
+
+        snapshot2 = [
+            # Primary shard progresses normally
+            self.create_test_snapshot("gc", "scheduled_jobs_log", 0, "data-hot-8", True, 16000, 130.0),  # +124 delta
+            # Replica shard progresses normally
+            self.create_test_snapshot("gc", "scheduled_jobs_log", 0, "data-hot-8", False, 129500, 130.0),  # +66 delta
+        ]
+
+        activities = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=1)
+
+        # Should have 2 separate activities (primary and replica tracked separately)
+        assert len(activities) == 2
+
+        # Find primary and replica activities
+        primary_activity = next(a for a in activities if a.is_primary)
+        replica_activity = next(a for a in activities if not a.is_primary)
+
+        # Verify deltas are calculated correctly for each type
+        assert primary_activity.local_checkpoint_delta == 124  # 16000 - 15876
+        assert replica_activity.local_checkpoint_delta == 66  # 129500 - 129434
+
+        # Verify they have different shard identifiers
+        assert primary_activity.snapshot1.shard_identifier != replica_activity.snapshot1.shard_identifier
+        assert "data-hot-8:P" in primary_activity.snapshot1.shard_identifier
+        assert "data-hot-8:R" in replica_activity.snapshot1.shard_identifier
+
+        # This test prevents the bug where we mixed primary CP End with replica CP Start
+        # which created fake deltas like 129434 - 15876 = 113558
+
+    def test_partition_separation(self):
+        """Test that partitions within the same table/shard are tracked separately"""
+        # Create snapshots with same table/shard but different partitions
+        snapshot1 = [
+            # Partition 1
+            self.create_test_snapshot("TURVO", "appointmentFormFieldData_events", 0, "data-hot-8", True, 32684, 100.0),
+            # Partition 2 (same table/shard/node/type but different partition)
+            self.create_test_snapshot("TURVO", "appointmentFormFieldData_events", 0, "data-hot-8", True, 54289, 100.0),
+        ]
+
+        # Modify partition_ident for the snapshots to simulate different partitions
+        snapshot1[0].partition_ident = "04732dpl6osj8d1g60o30c1g"
+        snapshot1[1].partition_ident = "04732dpl6os3adpm60o30c1g"
+
+        snapshot2 = [
+            # Partition 1 progresses
+            self.create_test_snapshot("TURVO", "appointmentFormFieldData_events", 0, "data-hot-8", True, 32800, 130.0),
+            # +116 delta
+            # Partition 2 progresses
+            self.create_test_snapshot("TURVO", "appointmentFormFieldData_events", 0, "data-hot-8", True, 54400, 130.0),
+            # +111 delta
+        ]
+
+        # Set partition_ident for second snapshot
+        snapshot2[0].partition_ident = "04732dpl6osj8d1g60o30c1g"
+        snapshot2[1].partition_ident = "04732dpl6os3adpm60o30c1g"
+
+        activities = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=1)
+
+        # Should have 2 separate activities (partitions tracked separately)
+        assert len(activities) == 2
+
+        # Verify deltas are calculated correctly for each partition
+        partition1_activity = next(a for a in activities if "04732dpl6osj8d1g60o30c1g" in a.snapshot1.shard_identifier)
+        partition2_activity = next(a for a in activities if "04732dpl6os3adpm60o30c1g" in a.snapshot1.shard_identifier)
+
+        assert partition1_activity.local_checkpoint_delta == 116  # 32800 - 32684
+        assert partition2_activity.local_checkpoint_delta == 111  # 54400 - 54289
+
+        # Verify they have different shard identifiers due to partition
+        assert partition1_activity.snapshot1.shard_identifier != partition2_activity.snapshot1.shard_identifier
+        assert ":04732dpl6osj8d1g60o30c1g" in partition1_activity.snapshot1.shard_identifier
+        assert ":04732dpl6os3adpm60o30c1g" in partition2_activity.snapshot1.shard_identifier
+
+        # This test prevents mixing partitions which would create fake activity measurements
+
+    def test_format_activity_display_watch_mode(self):
+        """Test that watch mode excludes legend and insights"""
+        snapshot1 = self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 100.0)
+        snapshot2 = self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1500, 130.0)
+
+        activity = ActiveShardActivity(
+            schema_name="schema1",
+            table_name="table1",
+            shard_id=1,
+            node_name="node1",
+            is_primary=True,
+            partition_ident="",
+            local_checkpoint_delta=500,
+            snapshot1=snapshot1,
+            snapshot2=snapshot2,
+            time_diff_seconds=30.0,
+        )
+
+        # Test non-watch mode (should include legend and insights)
+        normal_display = self.monitor.format_activity_display([activity], show_count=10, watch_mode=False)
+        assert "Legend:" in normal_display
+        assert "Insights:" in normal_display
+        assert "Checkpoint Δ:" in normal_display
+
+        # Test watch mode (should exclude legend and insights)
+        watch_display = self.monitor.format_activity_display([activity], show_count=10, watch_mode=True)
+        assert "Legend:" not in watch_display
+        assert "Insights:" not in watch_display
+        assert "Checkpoint Δ" in watch_display  # Core data should still be present
+
+        # But should still contain the core data
+        assert "Most Active Shards" in watch_display
+        assert "schema1.table1" in watch_display
+        assert "500" in watch_display  # checkpoint delta
diff --git a/tests/admin/test_distribution_analyzer.py b/tests/admin/test_distribution_analyzer.py
new file mode 100644
index 00000000..000fd0f9
--- /dev/null
+++ b/tests/admin/test_distribution_analyzer.py
@@ -0,0 +1,294 @@
+"""
+Tests for distribution analyzer functionality
+"""
+
+from unittest.mock import Mock, patch
+
+from cratedb_toolkit.admin.xmover.analysis.table import DistributionAnalyzer, DistributionAnomaly, TableDistribution
+from cratedb_toolkit.admin.xmover.util.database import CrateDBClient
+
+
+class TestDistributionAnalyzer:
+    def setup_method(self):
+        """Set up test fixtures"""
+        self.mock_client = Mock(spec=CrateDBClient)
+        self.analyzer = DistributionAnalyzer(self.mock_client)
+
+    def test_coefficient_of_variation_calculation(self):
+        """Test CV calculation with different scenarios"""
+
+        # Normal case
+        values = [10, 12, 8, 14, 6]
+        cv = self.analyzer.calculate_coefficient_of_variation(values)
+        assert cv > 0
+
+        # All equal values (should return 0)
+        equal_values = [10, 10, 10, 10]
+        cv_equal = self.analyzer.calculate_coefficient_of_variation(equal_values)
+        assert cv_equal == 0.0
+
+        # Empty list
+        empty_values = []
+        cv_empty = self.analyzer.calculate_coefficient_of_variation(empty_values)
+        assert cv_empty == 0.0
+
+        # Single value
+        single_value = [10]
+        cv_single = self.analyzer.calculate_coefficient_of_variation(single_value)
+        assert cv_single == 0.0
+
+    def test_get_largest_tables_distribution(self):
+        """Test fetching table distribution data"""
+
+        # Mock query results
+        mock_results = [
+            # schema, table, node, primary_shards, replica_shards, total_shards, total_size, primary_size, replica_size, docs  # noqa: E501, ERA001
+            ["doc", "large_table", "node1", 5, 2, 7, 100.5, 80.2, 20.3, 1000000],
+            ["doc", "large_table", "node2", 4, 3, 7, 95.1, 75.8, 19.3, 950000],
+            ["doc", "large_table", "node3", 6, 1, 7, 110.2, 85.9, 24.3, 1100000],
+            ["custom", "another_table", "node1", 3, 2, 5, 50.1, 40.2, 9.9, 500000],
+            ["custom", "another_table", "node2", 2, 3, 5, 45.8, 35.1, 10.7, 480000],
+        ]
+
+        self.mock_client.execute_query.return_value = mock_results
+
+        distributions = self.analyzer.get_largest_tables_distribution(top_n=10)
+
+        # Verify query was called with correct parameters
+        self.mock_client.execute_query.assert_called_once()
+        call_args = self.mock_client.execute_query.call_args
+        assert call_args[0][1] == [10]  # top_n parameter
+
+        # Verify we got the expected number of tables
+        assert len(distributions) == 2
+
+        # Verify table data structure
+        large_table = next(d for d in distributions if d.table_name == "large_table")
+        assert large_table.schema_name == "doc"
+        assert large_table.full_table_name == "large_table"  # Should omit 'doc' schema
+        assert len(large_table.node_distributions) == 3
+
+        another_table = next(d for d in distributions if d.table_name == "another_table")
+        assert another_table.schema_name == "custom"
+        assert another_table.full_table_name == "custom.another_table"
+        assert len(another_table.node_distributions) == 2
+
+        # Verify sorting by primary size (descending)
+        assert distributions[0].total_primary_size_gb >= distributions[1].total_primary_size_gb
+
+    def test_detect_shard_count_imbalance(self):
+        """Test shard count imbalance detection"""
+
+        # Create test table with imbalanced shard distribution
+        imbalanced_table = TableDistribution(
+            schema_name="doc",
+            table_name="imbalanced_table",
+            total_primary_size_gb=500.0,
+            node_distributions={
+                "node1": {"total_shards": 10, "primary_shards": 5, "replica_shards": 5},
+                "node2": {"total_shards": 15, "primary_shards": 8, "replica_shards": 7},
+                "node3": {"total_shards": 5, "primary_shards": 2, "replica_shards": 3},
+            },
+        )
+
+        anomaly = self.analyzer.detect_shard_count_imbalance(imbalanced_table)
+
+        assert anomaly is not None
+        assert anomaly.anomaly_type == "Shard Count Imbalance"
+        assert anomaly.combined_score > 0
+        assert len(anomaly.recommendations) > 0
+
+        # Create balanced table (should not detect anomaly)
+        balanced_table = TableDistribution(
+            schema_name="doc",
+            table_name="balanced_table",
+            total_primary_size_gb=100.0,
+            node_distributions={
+                "node1": {"total_shards": 8, "primary_shards": 4, "replica_shards": 4},
+                "node2": {"total_shards": 8, "primary_shards": 4, "replica_shards": 4},
+                "node3": {"total_shards": 8, "primary_shards": 4, "replica_shards": 4},
+            },
+        )
+
+        no_anomaly = self.analyzer.detect_shard_count_imbalance(balanced_table)
+        assert no_anomaly is None
+
+    def test_detect_storage_imbalance(self):
+        """Test storage imbalance detection"""
+
+        # Create test table with storage imbalance
+        storage_imbalanced_table = TableDistribution(
+            schema_name="doc",
+            table_name="storage_imbalanced",
+            total_primary_size_gb=300.0,
+            node_distributions={
+                "node1": {"total_size_gb": 150.0, "primary_size_gb": 100.0, "replica_size_gb": 50.0},
+                "node2": {"total_size_gb": 50.0, "primary_size_gb": 30.0, "replica_size_gb": 20.0},
+                "node3": {"total_size_gb": 100.0, "primary_size_gb": 70.0, "replica_size_gb": 30.0},
+            },
+        )
+
+        anomaly = self.analyzer.detect_storage_imbalance(storage_imbalanced_table)
+
+        assert anomaly is not None
+        assert anomaly.anomaly_type == "Storage Imbalance"
+        assert anomaly.combined_score > 0
+
+        # Small table (should be ignored)
+        small_table = TableDistribution(
+            schema_name="doc",
+            table_name="small_table",
+            total_primary_size_gb=0.1,
+            node_distributions={
+                "node1": {"total_size_gb": 0.5, "primary_size_gb": 0.05, "replica_size_gb": 0.05},
+                "node2": {"total_size_gb": 0.1, "primary_size_gb": 0.03, "replica_size_gb": 0.02},
+            },
+        )
+
+        no_anomaly = self.analyzer.detect_storage_imbalance(small_table)
+        assert no_anomaly is None
+
+    def test_detect_node_coverage_issues(self):
+        """Test node coverage issue detection"""
+
+        # Mock nodes_info to simulate cluster with 4 nodes
+        mock_nodes = [Mock(name="node1"), Mock(name="node2"), Mock(name="node3"), Mock(name="node4")]
+        self.mock_client.get_nodes_info.return_value = mock_nodes
+
+        # Table with limited coverage (only on 2 out of 4 nodes)
+        limited_coverage_table = TableDistribution(
+            schema_name="doc",
+            table_name="limited_coverage",
+            total_primary_size_gb=100.0,  # Significant size
+            node_distributions={
+                "node1": {"total_shards": 10, "primary_shards": 5, "replica_shards": 5},
+                "node2": {"total_shards": 10, "primary_shards": 5, "replica_shards": 5},
+                # node3 and node4 missing
+            },
+        )
+
+        anomaly = self.analyzer.detect_node_coverage_issues(limited_coverage_table)
+
+        assert anomaly is not None
+        assert anomaly.anomaly_type == "Node Coverage Issue"
+        assert "node3" in anomaly.details["nodes_without_shards"]
+        assert "node4" in anomaly.details["nodes_without_shards"]
+        assert len(anomaly.recommendations) > 0
+
+    def test_detect_document_imbalance(self):
+        """Test document imbalance detection"""
+
+        # Table with document imbalance
+        doc_imbalanced_table = TableDistribution(
+            schema_name="doc",
+            table_name="doc_imbalanced",
+            total_primary_size_gb=200.0,
+            node_distributions={
+                "node1": {"total_documents": 1000000},  # 1M docs
+                "node2": {"total_documents": 500000},  # 500K docs
+                "node3": {"total_documents": 100000},  # 100K docs (5x imbalance)
+            },
+        )
+
+        anomaly = self.analyzer.detect_document_imbalance(doc_imbalanced_table)
+
+        assert anomaly is not None
+        assert anomaly.anomaly_type == "Document Imbalance"
+        assert "data skew" in anomaly.recommendations[0].lower()
+
+        # Table with very few documents (should be ignored)
+        low_doc_table = TableDistribution(
+            schema_name="doc",
+            table_name="low_docs",
+            total_primary_size_gb=100.0,
+            node_distributions={
+                "node1": {"total_documents": 1000},
+                "node2": {"total_documents": 500},
+            },
+        )
+
+        no_anomaly = self.analyzer.detect_document_imbalance(low_doc_table)
+        assert no_anomaly is None
+
+    def test_analyze_distribution_integration(self):
+        """Test the full analysis workflow"""
+
+        # Mock the get_largest_tables_distribution method
+        mock_table = TableDistribution(
+            schema_name="doc",
+            table_name="test_table",
+            total_primary_size_gb=500.0,
+            node_distributions={
+                "node1": {
+                    "total_shards": 15,
+                    "primary_shards": 8,
+                    "replica_shards": 7,
+                    "total_size_gb": 200.0,
+                    "primary_size_gb": 120.0,
+                    "replica_size_gb": 80.0,
+                    "total_documents": 2000000,
+                },
+                "node2": {
+                    "total_shards": 8,
+                    "primary_shards": 4,
+                    "replica_shards": 4,
+                    "total_size_gb": 100.0,
+                    "primary_size_gb": 60.0,
+                    "replica_size_gb": 40.0,
+                    "total_documents": 1000000,
+                },
+                "node3": {
+                    "total_shards": 5,
+                    "primary_shards": 3,
+                    "replica_shards": 2,
+                    "total_size_gb": 50.0,
+                    "primary_size_gb": 30.0,
+                    "replica_size_gb": 20.0,
+                    "total_documents": 500000,
+                },
+            },
+        )
+
+        with patch.object(self.analyzer, "get_largest_tables_distribution", return_value=[mock_table]):
+            anomalies, tables_analyzed = self.analyzer.analyze_distribution(top_tables=10)
+
+            # Should detect multiple types of anomalies
+            assert len(anomalies) > 0
+            assert tables_analyzed == 1  # We provided 1 mock table
+
+            # Anomalies should be sorted by combined score (descending)
+            if len(anomalies) > 1:
+                for i in range(len(anomalies) - 1):
+                    assert anomalies[i].combined_score >= anomalies[i + 1].combined_score
+
+            # Each anomaly should have required fields
+            for anomaly in anomalies:
+                assert anomaly.table is not None
+                assert anomaly.anomaly_type is not None
+                assert anomaly.combined_score >= 0
+                assert isinstance(anomaly.recommendations, list)
+
+    def test_format_distribution_report_no_anomalies(self):
+        """Test report formatting when no anomalies found"""
+
+        # This should not raise an exception
+        with patch("builtins.print"):  # Mock print to avoid console output during tests
+            self.analyzer.format_distribution_report([], 5)
+
+    def test_format_distribution_report_with_anomalies(self):
+        """Test report formatting with anomalies"""
+
+        mock_anomaly = DistributionAnomaly(
+            table=TableDistribution("doc", "test_table", 100.0, {}),
+            anomaly_type="Test Anomaly",
+            severity_score=7.5,
+            impact_score=8.0,
+            combined_score=60.0,
+            description="Test description",
+            details={},
+            recommendations=["Test recommendation"],
+        )
+
+        # This should not raise an exception
+        with patch("builtins.print"):  # Mock print to avoid console output during tests
+            self.analyzer.format_distribution_report([mock_anomaly], 3)
diff --git a/tests/admin/test_recovery_monitor.py b/tests/admin/test_recovery_monitor.py
new file mode 100644
index 00000000..c6d8a178
--- /dev/null
+++ b/tests/admin/test_recovery_monitor.py
@@ -0,0 +1,296 @@
+"""
+Test script for XMover recovery monitoring functionality
+
+This script tests the recovery monitoring features by creating mock recovery scenarios
+and verifying the output formatting and data parsing.
+"""
+
+import os
+import sys
+from typing import Any, Dict
+from unittest.mock import Mock
+
+from cratedb_toolkit.admin.xmover.model import RecoveryInfo
+from cratedb_toolkit.admin.xmover.operational.monitor import RecoveryMonitor
+from cratedb_toolkit.admin.xmover.util.database import CrateDBClient
+
+# Add the src directory to the path
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "src"))
+
+
+def create_mock_allocation(
+    schema_name: str, table_name: str, shard_id: int, current_state: str, node_id: str
+) -> Dict[str, Any]:
+    """Create a mock allocation response"""
+    return {
+        "schema_name": schema_name,
+        "table_name": table_name,
+        "shard_id": shard_id,
+        "current_state": current_state,
+        "node_id": node_id,
+        "explanation": None,
+    }
+
+
+def create_mock_shard_detail(
+    schema_name: str,
+    table_name: str,
+    shard_id: int,
+    node_name: str,
+    node_id: str,
+    recovery_type: str,
+    stage: str,
+    files_percent: float,
+    bytes_percent: float,
+    total_time: int,
+    size: int,
+    is_primary: bool,
+) -> Dict[str, Any]:
+    """Create a mock shard detail response"""
+    return {
+        "schema_name": schema_name,
+        "table_name": table_name,
+        "shard_id": shard_id,
+        "node_name": node_name,
+        "node_id": node_id,
+        "routing_state": "RELOCATING",
+        "state": "RECOVERING",
+        "recovery": {
+            "type": recovery_type,
+            "stage": stage,
+            "files": {"percent": files_percent},
+            "size": {"percent": bytes_percent},
+            "total_time": total_time,
+        },
+        "size": size,
+        "primary": is_primary,
+    }
+
+
+def test_recovery_info_parsing():
+    """Test RecoveryInfo dataclass and its properties"""
+    print("Testing RecoveryInfo parsing...")
+
+    recovery = RecoveryInfo(
+        schema_name="CURVO",
+        table_name="PartioffD",
+        shard_id=19,
+        node_name="data-hot-1",
+        node_id="ZH6fBanGSjanGqeSh-sw0A",
+        recovery_type="PEER",
+        stage="DONE",
+        files_percent=100.0,
+        bytes_percent=100.0,
+        total_time_ms=1555907,
+        routing_state="RELOCATING",
+        current_state="RELOCATING",
+        is_primary=False,
+        size_bytes=56565284209,
+    )
+
+    # Test properties
+    assert recovery.overall_progress == 100.0, f"Expected 100.0, got {recovery.overall_progress}"
+    assert abs(recovery.size_gb - 52.681) < 0.01, f"Expected ~52.681, got {recovery.size_gb:.3f}"
+    assert recovery.shard_type == "REPLICA", f"Expected REPLICA, got {recovery.shard_type}"
+    assert recovery.total_time_seconds == 1555.907, f"Expected 1555.907, got {recovery.total_time_seconds}"
+
+    print("✅ RecoveryInfo parsing tests passed")
+
+
+def test_database_client_parsing():
+    """Test database client recovery parsing logic"""
+    print("Testing database client recovery parsing...")
+
+    # Create a real client instance to test the parsing method
+    client = CrateDBClient.__new__(CrateDBClient)  # Create without calling __init__
+
+    # Create test data
+    allocation = create_mock_allocation("CURVO", "PartioffD", 19, "RELOCATING", "node1")
+    shard_detail = create_mock_shard_detail(
+        "CURVO", "PartioffD", 19, "data-hot-1", "node1", "PEER", "DONE", 100.0, 100.0, 1555907, 56565284209, False
+    )
+
+    # Test the parsing method directly
+    recovery_info = client._parse_recovery_info(allocation, shard_detail)
+
+    assert recovery_info.recovery_type == "PEER"
+    assert recovery_info.stage == "DONE"
+    assert recovery_info.overall_progress == 100.0
+
+    print("✅ Database client parsing tests passed")
+
+
+def test_recovery_monitor_formatting():
+    """Test recovery monitor display formatting"""
+    print("Testing recovery monitor formatting...")
+
+    # Create mock client
+    mock_client = Mock(spec=CrateDBClient)
+    monitor = RecoveryMonitor(mock_client)
+
+    # Create test recovery data
+    recoveries = [
+        RecoveryInfo(
+            schema_name="CURVO",
+            table_name="PartioffD",
+            shard_id=19,
+            node_name="data-hot-1",
+            node_id="node1",
+            recovery_type="PEER",
+            stage="DONE",
+            files_percent=100.0,
+            bytes_percent=100.0,
+            total_time_ms=1555907,
+            routing_state="RELOCATING",
+            current_state="RELOCATING",
+            is_primary=False,
+            size_bytes=56565284209,
+        ),
+        RecoveryInfo(
+            schema_name="CURVO",
+            table_name="orderTracking",
+            shard_id=7,
+            node_name="data-hot-2",
+            node_id="node2",
+            recovery_type="DISK",
+            stage="INDEX",
+            files_percent=75.5,
+            bytes_percent=67.8,
+            total_time_ms=890234,
+            routing_state="INITIALIZING",
+            current_state="INITIALIZING",
+            is_primary=True,
+            size_bytes=25120456789,
+        ),
+    ]
+
+    # Test summary generation
+    summary = monitor.get_recovery_summary(recoveries)
+
+    assert summary["total_recoveries"] == 2
+    assert "PEER" in summary["by_type"]
+    assert "DISK" in summary["by_type"]
+    assert summary["by_type"]["PEER"]["count"] == 1
+    assert summary["by_type"]["DISK"]["count"] == 1
+
+    # Test display formatting
+    display_output = monitor.format_recovery_display(recoveries)
+
+    assert "Active Shard Recoveries (2 total)" in display_output
+    assert "PEER Recoveries (1)" in display_output
+    assert "DISK Recoveries (1)" in display_output
+    assert "PartioffD" in display_output
+    assert "orderTracking" in display_output
+
+    print("✅ Recovery monitor formatting tests passed")
+
+
+def test_empty_recovery_handling():
+    """Test handling of no active recoveries"""
+    print("Testing empty recovery handling...")
+
+    mock_client = Mock(spec=CrateDBClient)
+    monitor = RecoveryMonitor(mock_client)
+
+    # Test empty list
+    empty_recoveries = []
+
+    summary = monitor.get_recovery_summary(empty_recoveries)
+    assert summary["total_recoveries"] == 0
+    assert summary["by_type"] == {}
+
+    display_output = monitor.format_recovery_display(empty_recoveries)
+    assert "No active shard recoveries found" in display_output
+
+    print("✅ Empty recovery handling tests passed")
+
+
+def test_recovery_type_filtering():
+    """Test filtering by recovery type"""
+    print("Testing recovery type filtering...")
+
+    mock_client = Mock(spec=CrateDBClient)
+
+    # Mock the get_all_recovering_shards method
+    mock_recoveries = [
+        RecoveryInfo(
+            schema_name="test",
+            table_name="table1",
+            shard_id=1,
+            node_name="node1",
+            node_id="n1",
+            recovery_type="PEER",
+            stage="DONE",
+            files_percent=100.0,
+            bytes_percent=100.0,
+            total_time_ms=1000,
+            routing_state="RELOCATING",
+            current_state="RELOCATING",
+            is_primary=True,
+            size_bytes=1000000,
+        ),
+        RecoveryInfo(
+            schema_name="test",
+            table_name="table2",
+            shard_id=2,
+            node_name="node2",
+            node_id="n2",
+            recovery_type="DISK",
+            stage="INDEX",
+            files_percent=50.0,
+            bytes_percent=45.0,
+            total_time_ms=2000,
+            routing_state="INITIALIZING",
+            current_state="INITIALIZING",
+            is_primary=False,
+            size_bytes=2000000,
+        ),
+    ]
+
+    mock_client.get_all_recovering_shards.return_value = mock_recoveries
+
+    monitor = RecoveryMonitor(mock_client)
+
+    # Test filtering
+    peer_only = monitor.get_cluster_recovery_status(recovery_type_filter="PEER")
+    assert len(peer_only) == 1
+    assert peer_only[0].recovery_type == "PEER"
+
+    disk_only = monitor.get_cluster_recovery_status(recovery_type_filter="DISK")
+    assert len(disk_only) == 1
+    assert disk_only[0].recovery_type == "DISK"
+
+    all_recoveries = monitor.get_cluster_recovery_status(recovery_type_filter="all")
+    assert len(all_recoveries) == 2
+
+    print("✅ Recovery type filtering tests passed")
+
+
+def main():
+    """Run all tests"""
+    print("🧪 Running XMover Recovery Monitor Tests")
+    print("=" * 50)
+
+    try:
+        test_recovery_info_parsing()
+        test_database_client_parsing()
+        test_recovery_monitor_formatting()
+        test_empty_recovery_handling()
+        test_recovery_type_filtering()
+
+        print("\n🎉 All tests passed successfully!")
+        print("\n📋 Test Summary:")
+        print("   ✅ RecoveryInfo data class and properties")
+        print("   ✅ Database client parsing logic")
+        print("   ✅ Recovery monitor display formatting")
+        print("   ✅ Empty recovery state handling")
+        print("   ✅ Recovery type filtering")
+
+        print("\n🚀 Recovery monitoring feature is ready for use!")
+
+    except Exception as e:
+        print(f"\n❌ Test failed: {e}")
+        import traceback
+
+        traceback.print_exc()
+        sys.exit(1)