ml-gde
diff --git a/‎README.md
Lines changed: 33 additions & 0 deletions b/‎README.md
Lines changed: 33 additions & 0 deletions
diff --git a/‎examples/llama_inference_example.py
Lines changed: 30 additions & 0 deletions b/‎examples/llama_inference_example.py
Lines changed: 30 additions & 0 deletions
diff --git a/‎jaxgarden/__init__.py
Lines changed: 19 additions & 0 deletions b/‎jaxgarden/__init__.py
Lines changed: 19 additions & 0 deletions
diff --git a/‎jaxgarden/models/__init__.py
Lines changed: 16 additions & 0 deletions b/‎jaxgarden/models/__init__.py
Lines changed: 16 additions & 0 deletions
diff --git a/‎jaxgarden/models/base.py
Lines changed: 96 additions & 9 deletions b/‎jaxgarden/models/base.py
Lines changed: 96 additions & 9 deletions
diff --git a/‎jaxgarden/models/generation_utils.py
Lines changed: 1 addition & 0 deletions b/‎jaxgarden/models/generation_utils.py
Lines changed: 1 addition & 0 deletions
@@ -37,6 +37,39 @@ pip install -e .
 
 ## Usage
 
+### LLaMA inference
+
+```python
+from jaxgarden import LlamaConfig, LlamaForCausalLM, Tokenizer
+from flax import nnx
+
+
+# HF repo id of the LLaMA variant that you want to use
+model_id = "meta-llama/Llama-3.2-1B"
+
+# initialize the LLaMA architecture
+config = LlamaConfig()
+model = LlamaForCausalLM(config, rngs=nnx.Rngs(0))
+
+# This is a one-liner to download HF checkpoint from HuggingFace Hub,
+# convert it to jaxgarden format,
+# save it in an Orbax checkpoint,
+# and then remove the HF checkpoint.
+model.from_hf(model_id)
+
+# this works just like `transformers.AutoTokenizer`,
+# but without the dependency of the whole `transformers` library.
+# Instead, we simply extend `tokenizers` package and add some cnvenience code for JAX.
+tokenizer = Tokenizer.from_pretrained(model_id)
+    
+text = "The meaning of life is"
+model_inputs = tokenizer.encode(text)
+output = model.generate(**model_inputs, max_length=20, do_sample=True)
+output_text = tokenizer.decode(output)
+print(output_text)
+```
+
+
 ### MultiHeadAttention Module (Flax NNX)
 
 ```python
 
@@ -0,0 +1,30 @@
+from flax import nnx
+
+from jaxgarden import LlamaConfig, LlamaForCausalLM, Tokenizer
+
+if __name__ == "__main__":
+    # initialize a config object (with defaults for 1B varient)
+    # other varients to be added.
+    config = LlamaConfig()
+    model = LlamaForCausalLM(config, rngs=nnx.Rngs(0))
+    model_id = "meta-llama/Llama-3.2-1B"
+
+    # this will download HF checkpoint from HuggingFace Hub,
+    # convert it to jaxgarden format,
+    # save it in an Orbax checkpoint,
+    # and then remove the HF checkpoint.
+    # If you didn't set your HF token globally,
+    # you may need to pass your token as an argument to this method.
+    model.from_hf(model_id, force_download=True)
+
+    # this works just like `transformers.AutoTokenizer`,
+    # but without the dependency of the whole `transformers` library.
+    # Instead, we simply extend `tokenizers` package and add some cnvenience code for JAX.
+    tokenizer = Tokenizer.from_pretrained(model_id)
+
+    text = "The meaning of life is"
+    model_inputs = tokenizer.encode(text)
+    output = model.generate(**model_inputs, max_length=20, do_sample=True)
+    output_text = tokenizer.decode(output)
+    print(output, output.shape)
+    print(output_text)
@@ -4,6 +4,15 @@
 from jaxgarden.functional.attention import dot_product_attention
 from jaxgarden.models.base import BaseConfig, BaseModel
 from jaxgarden.models.generation_utils import GenerationMixin
+from jaxgarden.models.llama import (
+    LlamaAttention,
+    LlamaConfig,
+    LlamaForCausalLM,
+    LlamaMLP,
+    LlamaRMSNorm,
+    LlamaRotaryEmbedding,
+    LlamaTransformerBlock,
+)
 from jaxgarden.models.modernbert import (
     ModernBertAttention,
     ModernBertEmbeddings,
@@ -12,6 +21,7 @@
     ModernBertLayer,
     ModernBertMLP,
 )
+from jaxgarden.tokenization import Tokenizer
 
 __all__ = [
     # Base classes
@@ -20,6 +30,13 @@
     # Mixins
     "GenerationMixin",
     # Models
+    "LlamaAttention",
+    "LlamaConfig",
+    "LlamaForCausalLM",
+    "LlamaMLP",
+    "LlamaRMSNorm",
+    "LlamaRotaryEmbedding",
+    "LlamaTransformerBlock",
     "ModernBERTEncoder",
     "ModernBERTForMaskedLM",
     "ModernBertAttention",
@@ -28,6 +45,8 @@
     "ModernBertMLP",
     # Attention modules
     "MultiHeadAttention",
+    # tokenization
+    "Tokenizer",
     # Functional interfaces
     "dot_product_attention",
 ]
 
@@ -1,5 +1,14 @@
 from jaxgarden.models.base import BaseConfig, BaseModel
 from jaxgarden.models.generation_utils import GenerationMixin
+from jaxgarden.models.llama import (
+    LlamaAttention,
+    LlamaConfig,
+    LlamaForCausalLM,
+    LlamaMLP,
+    LlamaRMSNorm,
+    LlamaRotaryEmbedding,
+    LlamaTransformerBlock,
+)
 from jaxgarden.models.modernbert import (
     ModernBertAttention,
     ModernBertEmbeddings,
@@ -13,6 +22,13 @@
     "BaseConfig",
     "BaseModel",
     "GenerationMixin",
+    "LlamaAttention",
+    "LlamaConfig",
+    "LlamaForCausalLM",
+    "LlamaMLP",
+    "LlamaRMSNorm",
+    "LlamaRotaryEmbedding",
+    "LlamaTransformerBlock",
     "ModernBERTEncoder",
     "ModernBERTForMaskedLM",
     "ModernBertAttention",
 
@@ -1,4 +1,6 @@
+import logging
 import os
+import shutil
 from collections.abc import Iterator
 from dataclasses import dataclass, field
 from pathlib import Path
@@ -11,6 +13,9 @@
 from huggingface_hub import snapshot_download
 from safetensors import safe_open
 
+# Set up logging
+logger = logging.getLogger(__name__)
+
 DEFAULT_PARAMS_FILE = "jaxgarden_state"
 
 
@@ -64,11 +69,16 @@ def __init__(
         self.rngs = rngs
 
     @property
-    def state(self) -> dict[str, jnp.ndarray]:
-        """Splits state from the graph and returns it.
+    def state(self) -> nnx.State:
+        """Splits state from the graph and returns it"""
+        return nnx.split(self, nnx.Param, ...)[1]
+
+    @property
+    def state_dict(self) -> dict[str, jnp.ndarray]:
+        """Splits state from the graph and returns it as a dictionary.
 
         It can be used for serialization with orbax."""
-        state = nnx.split(self, nnx.Param, ...)[1]
+        state = self.state
         pure_dict_state = nnx.to_pure_dict(state)
         return pure_dict_state
 
@@ -78,7 +88,7 @@ def save(self, path: str) -> None:
         Args:
             path: The directory path to save the model state to.
         """
-        state = self.state
+        state = self.state_dict
         checkpointer = ocp.StandardCheckpointer()
         checkpointer.save(os.path.join(path, DEFAULT_PARAMS_FILE), state)
         checkpointer.wait_until_finished()
@@ -97,20 +107,30 @@ def load(self, path: str) -> nnx.Module:
         return nnx.merge(graphdef, abstract_state)
 
     @staticmethod
-    def download_from_hf(repo_id: str, local_dir: str) -> None:
+    def download_from_hf(
+        repo_id: str, local_dir: str, token: str | None = None, force_download: bool = False
+    ) -> None:
         """Downloads the model from the Hugging Face Hub.
 
         Args:
             repo_id: The repository ID of the model to download.
             local_dir: The local directory to save the model to.
         """
-        snapshot_download(repo_id, local_dir=local_dir)
+        logger.info(f"Attempting to download {repo_id} from Hugging Face Hub to {local_dir}.")
+        try:
+            snapshot_download(
+                repo_id, local_dir=local_dir, token=token, force_download=force_download
+            )
+            logger.info(f"Successfully downloaded {repo_id} to {local_dir}.")
+        except Exception as e:
+            logger.error(f"Failed to download {repo_id}: {e}")
+            raise
 
     @staticmethod
-    def load_safetensors(path_to_model_weights: str) -> Iterator[tuple[Any, Any]]:
+    def iter_safetensors(path_to_model_weights: str) -> Iterator[tuple[Any, Any]]:
         """Helper function to lazily load params from safetensors file.
 
-        Use this static method to load weights for conversion tasks.
+        Use this static method to iterate over weights for conversion tasks.
 
         Args:
             model_path_to_params: Path to directory containing .safetensors files."""
@@ -121,5 +141,72 @@ def load_safetensors(path_to_model_weights: str) -> Iterator[tuple[Any, Any]]:
 
         for file in safetensors_files:
             with safe_open(file, framework="jax", device="cpu") as f:
-                for key in f:
+                for key in f.keys():  # noqa: SIM118
                     yield (key, f.get_tensor(key))
+
+    def from_hf(
+        self,
+        model_repo_or_id: str,
+        token: str | None = None,
+        force_download: bool = False,
+        save_in_orbax: bool = True,
+        remove_hf_after_conversion: bool = True,
+    ) -> None:
+        """Downloads the model from the Hugging Face Hub and returns a new instance of the model.
+
+        It can also save the converted weights in an Orbax checkpoint
+            and removes the original HF checkpoint after conversion.
+
+        Args:
+            model_repo_or_id: The repository ID or name of the model to download.
+            token: The token to use for authentication with the Hugging Face Hub.
+            save_in_orbax: Whether to save the converted weights in an Orbax checkpoint.
+            remove_hf_after_conversion: Whether to remove the downloaded HuggingFace checkpoint
+                after conversion.
+        """
+        logger.info(f"Starting from_hf process for model: {model_repo_or_id}")
+        local_dir = os.path.join(
+            os.path.expanduser("~"), ".jaxgarden", "hf_models", *model_repo_or_id.split("/")
+        )
+        save_dir = local_dir.replace("hf_models", "models")
+        if os.path.exists(save_dir):
+            if force_download:
+                logger.warn(f"Removing {save_dir} because force_download is set to True")
+                shutil.rmtree(save_dir)
+            else:
+                raise RuntimeError(
+                    f"Path {save_dir} already exists."
+                    + " Set force_download to Tru to run conversion again."
+                )
+
+        logger.debug(f"Local Hugging Face model directory set to: {local_dir}")
+
+        BaseModel.download_from_hf(
+            model_repo_or_id, local_dir, token=token, force_download=force_download
+        )
+        logger.info(f"Initiating weight iteration from safetensors in {local_dir}")
+        weights = BaseModel.iter_safetensors(local_dir)
+        state = self.state
+        logger.info("Running weight conversion...")
+        self.convert_weights_from_hf(state, weights)
+        logger.info("Weight conversion finished. Updating model state...")
+        nnx.update(self, state)
+        logger.warn("Model state successfully updated with converted weights.")
+
+        if remove_hf_after_conversion:
+            logger.warn(f"Removing HuggingFace checkpoint from {local_dir}...")
+            shutil.rmtree(local_dir)
+
+        if save_in_orbax:
+            logger.warn(f")Saving Orbax checkpoint in {save_dir}.")
+            self.save(save_dir)
+
+        logger.warn(f"from_hf process completed for {model_repo_or_id}.")
+
+    def convert_weights_from_hf(self, state: nnx.State, weights: Iterator[tuple[Any, Any]]) -> None:
+        """Convert weights from Hugging Face Hub to the model's state.
+
+        This method should be implemented in downstream classes
+        to support conversion from HuggingFace format.
+        """
+        raise NotImplementedError("This model does not support conversion from HuggingFace yet.")
@@ -363,6 +363,7 @@ def scan_step(carry: dict, _: Any) -> tuple[dict, None]:
     def generate(
         self: "GenerationMixin",
         input_ids: jnp.ndarray,
+        attention_mask: jnp.ndarray | None = None,
         max_length: int = 20,
         temperature: float = 1.0,
         top_k: int | None = None,