iffiX
diff --git a/‎machin/frame/algorithms/a2c.py
+12-13 b/‎machin/frame/algorithms/a2c.py
+12-13
diff --git a/‎machin/frame/algorithms/ddpg.py
+4-5 b/‎machin/frame/algorithms/ddpg.py
+4-5
diff --git a/‎machin/frame/algorithms/ddpg_per.py
+1-1 b/‎machin/frame/algorithms/ddpg_per.py
+1-1
diff --git a/‎machin/frame/algorithms/dqn.py
+4-5 b/‎machin/frame/algorithms/dqn.py
+4-5
diff --git a/‎machin/frame/algorithms/dqn_per.py
+1-1 b/‎machin/frame/algorithms/dqn_per.py
+1-1
diff --git a/‎machin/frame/algorithms/gail.py
+6-4 b/‎machin/frame/algorithms/gail.py
+6-4
diff --git a/‎machin/frame/algorithms/impala.py
+1-10 b/‎machin/frame/algorithms/impala.py
+1-10
diff --git a/‎machin/frame/algorithms/maddpg.py
+17-19 b/‎machin/frame/algorithms/maddpg.py
+17-19
diff --git a/‎machin/frame/algorithms/rainbow.py
+11-12 b/‎machin/frame/algorithms/rainbow.py
+11-12
diff --git a/‎machin/frame/algorithms/sac.py
+5-6 b/‎machin/frame/algorithms/sac.py
+5-6
diff --git a/‎machin/frame/algorithms/td3.py
+1-1 b/‎machin/frame/algorithms/td3.py
+1-1
diff --git a/‎machin/frame/buffers/buffer.py
+3-4 b/‎machin/frame/buffers/buffer.py
+3-4
diff --git a/‎machin/frame/buffers/buffer_d.py
+4-3 b/‎machin/frame/buffers/buffer_d.py
+4-3
@@ -312,19 +312,18 @@ def store_episode(self, episode: List[Union[Transition, Dict]]):
                     + gae_delta
                 )
 
-        for trans in episode:
-            self.replay_buffer.append(
-                trans,
-                required_attrs=(
-                    "state",
-                    "action",
-                    "next_state",
-                    "reward",
-                    "value",
-                    "gae",
-                    "terminal",
-                ),
-            )
+        self.replay_buffer.store_episode(
+            episode,
+            required_attrs=(
+                "state",
+                "action",
+                "next_state",
+                "reward",
+                "value",
+                "gae",
+                "terminal",
+            ),
+        )
 
     def update(
         self, update_value=True, update_policy=True, concatenate_samples=True, **__
 
@@ -370,11 +370,10 @@ def store_episode(self, episode: List[Union[Transition, Dict]]):
         """
         Add a full episode of transition samples to the replay buffer.
         """
-        for trans in episode:
-            self.replay_buffer.append(
-                trans,
-                required_attrs=("state", "action", "reward", "next_state", "terminal"),
-            )
+        self.replay_buffer.store_episode(
+            episode,
+            required_attrs=("state", "action", "reward", "next_state", "terminal"),
+        )
 
     def update(
         self,
 
@@ -82,7 +82,7 @@ def __init__(
             )
         else:
             # A loss defined in ``torch.nn.modules.loss``
-            if self.criterion.reduction != "none":
+            if getattr(self.criterion, "reduction") != "none":
                 default_logger.warning(
                     "The reduction property of criterion is not 'none', "
                     "automatically corrected."
 
@@ -325,11 +325,10 @@ def store_episode(self, episode: List[Union[Transition, Dict]]):
         """
         Add a full episode of transition samples to the replay buffer.
         """
-        for trans in episode:
-            self.replay_buffer.append(
-                trans,
-                required_attrs=("state", "action", "reward", "next_state", "terminal"),
-            )
+        self.replay_buffer.store_episode(
+            episode,
+            required_attrs=("state", "action", "reward", "next_state", "terminal"),
+        )
 
     def update(
         self, update_value=True, update_target=True, concatenate_samples=True, **__
 
@@ -79,7 +79,7 @@ def __init__(
             )
         else:
             # A loss defined in ``torch.nn.modules.loss``
-            if self.criterion.reduction != "none":
+            if getattr(self.criterion, "reduction") != "none":
                 default_logger.warning(
                     "The reduction property of criterion is not 'none', "
                     "automatically corrected."
 
@@ -237,10 +237,12 @@ def store_expert_episode(self, episode: List[Union[ExpertTransition, Dict]]):
 
         Only states and actions are required.
         """
-        for trans in episode:
-            if isinstance(trans, dict):
-                trans = ExpertTransition(**trans)
-            self.expert_replay_buffer.append(trans, required_attrs=("state", "action"))
+        episode = [
+            ExpertTransition(**trans) for trans in episode if isinstance(trans, dict)
+        ]
+        self.expert_replay_buffer.store_episode(
+            episode, required_attrs=("state", "action")
+        )
 
     def update(
         self,
 
@@ -40,7 +40,7 @@ def sample_batch(
         *_,
         **__,
     ) -> Any:
-        super().sample_batch(
+        return super().sample_batch(
             batch_size=batch_size,
             concatenate=concatenate,
             device=device,
@@ -310,15 +310,6 @@ def update(self, update_value=True, update_policy=True, **__):
                 " an unknown error has occurred."
             )
 
-        for major_attr in (state, action, next_state):
-            for k, v in major_attr.items():
-                major_attr[k] = t.cat(v, dim=0)
-                assert major_attr[k].shape[0] == sum_length
-
-        terminal = t.cat(terminal, dim=0).view(sum_length, 1)
-        reward = t.cat(reward, dim=0).view(sum_length, 1)
-        action_log_prob = t.cat(action_log_prob, dim=0).view(sum_length, 1)
-
         # Below are the v-trace process
 
         # Calculate c and rho first, because there is no dependency
 
@@ -7,14 +7,19 @@
 from machin.utils.logging import default_logger
 from machin.model.nets.base import static_module_wrapper
 from machin.parallel.pool import P2PPool, ThreadPool
+from machin.frame.transition import Scalar
 
 # pylint: disable=wildcard-import, unused-wildcard-import
 from .ddpg import *
 
 
 class SHMBuffer(Buffer):
-    @staticmethod
-    def make_tensor_from_batch(batch, device, concatenate):
+    def make_tensor_from_batch(
+        self,
+        batch: List[Union[Scalar, t.Tensor]],
+        device: Union[str, t.device],
+        concatenate: bool,
+    ):
         # this function is used in post processing, and we will
         # move all cpu tensors to shared memory.
         if concatenate and len(batch) != 0:
@@ -307,11 +312,11 @@ def optimizers(self):
     def optimizers(self, optimizers):
         counter = 0
         for ac in self.actor_optims:
-            for id, _acc in enumerate(ac):
-                ac[id] = optimizers[counter]
+            for i in range(len(ac)):
+                ac[i] = optimizers[counter]
                 counter += 1
-        for id in range(len(self.critic_optims)):
-            self.critic_optims[id] = optimizers[counter]
+        for i in range(len(self.critic_optims)):
+            self.critic_optims[i] = optimizers[counter]
             counter += 1
 
     @property
@@ -506,18 +511,11 @@ def store_episodes(self, episodes: List[List[Union[Transition, Dict]]]):
         assert len(episodes) == len(self.replay_buffers)
         all_length = [len(ep) for ep in episodes]
         assert len(set(all_length)) == 1, "All episodes must have the same length!"
-        for buff, ep in zip(self.replay_buffers, episodes):
-            for trans in ep:
-                buff.append(
-                    trans,
-                    required_attrs=(
-                        "state",
-                        "action",
-                        "next_state",
-                        "reward",
-                        "terminal",
-                    ),
-                )
+        for buffer, episode in zip(self.replay_buffers, episodes):
+            buffer.store_episode(
+                episode,
+                required_attrs=("state", "action", "next_state", "reward", "terminal",),
+            )
 
     def update(
         self,
@@ -961,7 +959,7 @@ def _check_parameters_device(models):
     def _create_sample_method(indexes):
         def sample_method(buffer, _len):
             nonlocal indexes
-            batch = [buffer[i] for i in indexes if i < len(buffer)]
+            batch = [buffer.storage[i] for i in indexes if i < buffer.size()]
             return len(batch), batch
 
         return sample_method
 
@@ -188,18 +188,17 @@ def store_episode(self, episode: List[Union[Transition, Dict]]):
                 value_sum = value_sum * self.discount + episode[i + j]["reward"]
             episode[i]["value"] = value_sum
 
-        for trans in episode:
-            self.replay_buffer.append(
-                trans,
-                required_attrs=(
-                    "state",
-                    "action",
-                    "next_state",
-                    "reward",
-                    "value",
-                    "terminal",
-                ),
-            )
+        self.replay_buffer.store_episode(
+            episode,
+            required_attrs=(
+                "state",
+                "action",
+                "next_state",
+                "reward",
+                "value",
+                "terminal",
+            ),
+        )
 
     def update(
         self, update_value=True, update_target=True, concatenate_samples=True, **__
 
@@ -271,11 +271,10 @@ def store_episode(self, episode: List[Union[Transition, Dict]]):
         """
         Add a full episode of transition samples to the replay buffer.
         """
-        for trans in episode:
-            self.replay_buffer.append(
-                trans,
-                required_attrs=("state", "action", "next_state", "reward", "terminal"),
-            )
+        self.replay_buffer.store_episode(
+            episode,
+            required_attrs=("state", "action", "next_state", "reward", "terminal"),
+        )
 
     def update(
         self,
@@ -395,7 +394,7 @@ def update(
         self.critic.eval()
         self.critic2.eval()
         # use .item() to prevent memory leakage
-        return (-act_policy_loss.item(), (value_loss.item() + value_loss2.item()) / 2)
+        return -act_policy_loss.item(), (value_loss.item() + value_loss2.item()) / 2
 
     def update_lr_scheduler(self):
         """
 
@@ -257,7 +257,7 @@ def update(
         self.critic.eval()
         self.critic2.eval()
         # use .item() to prevent memory leakage
-        return (-act_policy_loss.item(), (value_loss.item() + value_loss2.item()) / 2)
+        return -act_policy_loss.item(), (value_loss.item() + value_loss2.item()) / 2
 
     @staticmethod
     def policy_noise_function(actions, *_):
 
@@ -15,7 +15,6 @@ def __init__(
         buffer_size: int = 1000000,
         buffer_device: Union[str, t.device] = "cpu",
         storage: TransitionStorageBase = None,
-        *_,
         **__,
     ):
         """
@@ -79,15 +78,15 @@ def store_episode(
             elif isinstance(transition, TransitionBase):
                 pass
             else:  # pragma: no cover
-                raise RuntimeError(
+                raise ValueError(
                     "Transition object must be a dict or an instance"
-                    " of the Transition class"
+                    " of the Transition class."
                 )
             if not transition.has_keys(required_attrs):
                 missing_keys = set(required_attrs) - set(transition.keys())
                 raise ValueError(
                     f"Transition object missing attributes: {missing_keys}, "
-                    f"object is {transition}"
+                    f"object is {transition}."
                 )
             episode[idx] = transition
 
 
@@ -21,8 +21,7 @@ def __init__(
         group: RpcGroup,
         buffer_size: int = 1000000,
         storage: TransitionStorageBase = None,
-        *_,
-        **__,
+        **kwargs,
     ):
         """
         Create a distributed replay buffer instance.
@@ -58,7 +57,9 @@ def __init__(
             storage: Custom storage, not compatible with `buffer_size` and
                 `buffer_device`.
         """
-        super().__init__(buffer_size=buffer_size, buffer_device="cpu", storage=storage)
+        super().__init__(
+            buffer_size=buffer_size, buffer_device="cpu", storage=storage, **kwargs
+        )
         self.buffer_name = buffer_name
         self.group = group
Original file line number	Diff line number	Diff line change
`@@ -82,7 +82,7 @@ def __init__(`
`82`	`82`	`)`
`83`	`83`	`else:`
`84`	`84`	# A loss defined in ``torch.nn.modules.loss``
`85`		`- if self.criterion.reduction != "none":`
	`85`	`+ if getattr(self.criterion, "reduction") != "none":`
`86`	`86`	`default_logger.warning(`
`87`	`87`	`"The reduction property of criterion is not 'none', "`
`88`	`88`	`"automatically corrected."`
Original file line number	Diff line number	Diff line change
`@@ -79,7 +79,7 @@ def __init__(`
`79`	`79`	`)`
`80`	`80`	`else:`
`81`	`81`	# A loss defined in ``torch.nn.modules.loss``
`82`		`- if self.criterion.reduction != "none":`
	`82`	`+ if getattr(self.criterion, "reduction") != "none":`
`83`	`83`	`default_logger.warning(`
`84`	`84`	`"The reduction property of criterion is not 'none', "`
`85`	`85`	`"automatically corrected."`