NVIDIA · ZhiyuLi-Nvidia · May 7, 2025 · SahilJain314 · May 8, 2025 · ZhiyuLi-Nvidia
diff --git a/examples/configs/dpo.yaml b/examples/configs/dpo.yaml
@@ -84,6 +84,7 @@ policy:
 data:
   dataset_name: "HelpSteer3"
   max_input_seq_length: ${policy.max_total_sequence_length}
+  shuffle: true
 logger:
   log_dir: "logs"  # Base directory for all logs
   wandb_enabled: false # Make sure you do a ``wandb login [Your API key]'' before running

diff --git a/examples/configs/grpo_math_1B.yaml b/examples/configs/grpo_math_1B.yaml
@@ -96,6 +96,7 @@ data:
   prompt_file: "examples/prompts/cot.txt"
   system_prompt_file: null
   dataset_name: "OpenMathInstruct-2"
+  shuffle: true
 
 env:
   math:

diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp1-quick.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp1-quick.yaml
@@ -68,6 +68,7 @@ policy:
 data:
   dataset_name: "HelpSteer3"
   max_input_seq_length: ${policy.max_total_sequence_length}
+  shuffle: true
 
 logger:
   log_dir: "logs"

diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.yaml
@@ -68,6 +68,7 @@ policy:
 data:
   dataset_name: "HelpSteer3"
   max_input_seq_length: ${policy.max_total_sequence_length}
+  shuffle: true
 
 logger:
   log_dir: "logs"

diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.yaml
@@ -68,6 +68,7 @@ policy:
 data:
   dataset_name: "HelpSteer3"
   max_input_seq_length: ${policy.max_total_sequence_length}
+  shuffle: true
 
 logger:
   log_dir: "logs"

diff --git a/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.yaml b/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.yaml
@@ -69,6 +69,7 @@ policy:
 data:
   dataset_name: "HelpSteer3"
   max_input_seq_length: ${policy.max_total_sequence_length}
+  shuffle: true
 logger:
   log_dir: "logs"
   wandb_enabled: true

diff --git a/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v2.yaml b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v2.yaml
@@ -84,6 +84,7 @@ data:
   prompt_file: examples/prompts/cot.txt
   system_prompt_file: null
   dataset_name: OpenMathInstruct-2
+  shuffle: true
 env:
   math:
     num_workers: 8

diff --git a/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml b/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml
@@ -84,6 +84,8 @@ data:
   prompt_file: examples/prompts/cot.txt
   system_prompt_file: null
   dataset_name: OpenMathInstruct-2
+  shuffle: true
+
 env:
   math:
     num_workers: 8

diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v2.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v2.yaml
@@ -84,6 +84,7 @@ data:
   prompt_file: examples/prompts/cot.txt
   system_prompt_file: null
   dataset_name: OpenMathInstruct-2
+  shuffle: true
 env:
   math:
     num_workers: 8

diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v2.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v2.yaml
@@ -84,6 +84,7 @@ data:
   prompt_file: examples/prompts/cot.txt
   system_prompt_file: null
   dataset_name: OpenMathInstruct-2
+  shuffle: true
 env:
   math:
     num_workers: 8

diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v2.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v2.yaml
@@ -84,6 +84,7 @@ data:
   prompt_file: examples/prompts/cot.txt
   system_prompt_file: null
   dataset_name: OpenMathInstruct-2
+  shuffle: true
 env:
   math:
     num_workers: 8

diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v2.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v2.yaml
@@ -84,6 +84,7 @@ data:
   prompt_file: examples/prompts/cot.txt
   system_prompt_file: null
   dataset_name: OpenMathInstruct-2
+  shuffle: true
 env:
   math:
     num_workers: 8

diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v2.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v2.yaml
@@ -84,6 +84,7 @@ data:
   prompt_file: examples/prompts/cot.txt
   system_prompt_file: null
   dataset_name: OpenMathInstruct-2
+  shuffle: true
 env:
   math:
     num_workers: 8

diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp1.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp1.yaml
@@ -49,6 +49,7 @@ data:
   dataset_name: squad
   add_bos: true
   add_eos: true
+  shuffle: true
 logger:
   log_dir: logs/sft-llama3.1-8b-instruct-1n8g-fsdp1
   wandb_enabled: true

diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.yaml
@@ -49,6 +49,7 @@ data:
   dataset_name: squad
   add_bos: true
   add_eos: true
+  shuffle: true
 logger:
   log_dir: logs/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long
   wandb_enabled: true

diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.yaml
@@ -49,6 +49,7 @@ data:
   dataset_name: squad
   add_bos: true
   add_eos: true
+  shuffle: true
 logger:
   log_dir: logs/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp
   wandb_enabled: true

diff --git a/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.yaml b/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.yaml
@@ -49,6 +49,7 @@ data:
   dataset_name: squad
   add_bos: true
   add_eos: true
+  shuffle: true
 logger:
   log_dir: logs/sft-llama3.2-1b-1n8g-fsdp2tp1
   wandb_enabled: true

diff --git a/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.yaml b/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.yaml
@@ -49,6 +49,7 @@ data:
   dataset_name: squad
   add_bos: true
   add_eos: true
+  shuffle: true
 logger:
   log_dir: logs/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt
   wandb_enabled: true

diff --git a/examples/configs/sft.yaml b/examples/configs/sft.yaml
@@ -61,6 +61,7 @@ data:
   dataset_name: "squad"
   add_bos: true
   add_eos: true
+  shuffle: true
 
 logger:
   log_dir: "logs"  # Base directory for all logs

diff --git a/nemo_rl/algorithms/dpo.py b/nemo_rl/algorithms/dpo.py
@@ -153,7 +153,7 @@ def setup(
     train_dataloader = StatefulDataLoader(
         train_dataset,
         batch_size=policy_config["train_global_batch_size"],
-        shuffle=True,
+        shuffle=data_config["shuffle"],
         collate_fn=partial(
             dpo_collate_fn,
             tokenizer=tokenizer,

diff --git a/nemo_rl/algorithms/grpo.py b/nemo_rl/algorithms/grpo.py
@@ -171,7 +171,7 @@ def setup(
     dataloader = StatefulDataLoader(
         dataset,
         batch_size=grpo_config["num_prompts_per_step"],
-        shuffle=False,
+        shuffle=data_config["shuffle"],
         collate_fn=rl_collate_fn,
     )
     if last_checkpoint_path is not None:

diff --git a/nemo_rl/algorithms/sft.py b/nemo_rl/algorithms/sft.py
@@ -145,7 +145,7 @@ def setup(
     train_dataloader = StatefulDataLoader(
         train_dataset,
         batch_size=policy_config["train_global_batch_size"],
-        shuffle=True,
+        shuffle=data_config["shuffle"],
         collate_fn=rl_collate_fn,
         drop_last=True,
     )

diff --git a/nemo_rl/data/__init__.py b/nemo_rl/data/__init__.py
@@ -23,6 +23,7 @@ class DataConfig(TypedDict):
     val_dataset_name: Optional[str]
     add_bos: Optional[bool]
     add_eos: Optional[bool]
+    shuffle: Optional[bool]
 
 
 class MathDataConfig(DataConfig):