bhashini-ai
diff --git a/‎Tools/PyTorch/TimeSeriesPredictionPlatform/.dockerignore
+4 b/‎Tools/PyTorch/TimeSeriesPredictionPlatform/.dockerignore
+4
diff --git a/‎Tools/PyTorch/TimeSeriesPredictionPlatform/.gitignore
+2 b/‎Tools/PyTorch/TimeSeriesPredictionPlatform/.gitignore
+2
diff --git a/‎Tools/PyTorch/TimeSeriesPredictionPlatform/Dockerfile
+26-7 b/‎Tools/PyTorch/TimeSeriesPredictionPlatform/Dockerfile
+26-7
diff --git a/‎Tools/PyTorch/TimeSeriesPredictionPlatform/LICENSE
+1-1 b/‎Tools/PyTorch/TimeSeriesPredictionPlatform/LICENSE
+1-1
diff --git a/‎Tools/PyTorch/TimeSeriesPredictionPlatform/NOTICE
+1-1 b/‎Tools/PyTorch/TimeSeriesPredictionPlatform/NOTICE
+1-1
diff --git a/‎Tools/PyTorch/TimeSeriesPredictionPlatform/README.md
+429-108 b/‎Tools/PyTorch/TimeSeriesPredictionPlatform/README.md
+429-108
diff --git a/‎Tools/PyTorch/TimeSeriesPredictionPlatform/callbacks/callbacks.py
+2-1 b/‎Tools/PyTorch/TimeSeriesPredictionPlatform/callbacks/callbacks.py
+2-1
diff --git a/‎Tools/PyTorch/TimeSeriesPredictionPlatform/callbacks/ctl_callbacks.py
+2-1 b/‎Tools/PyTorch/TimeSeriesPredictionPlatform/callbacks/ctl_callbacks.py
+2-1
diff --git a/‎Tools/PyTorch/TimeSeriesPredictionPlatform/callbacks/hydra_callbacks.py
+16-3 b/‎Tools/PyTorch/TimeSeriesPredictionPlatform/callbacks/hydra_callbacks.py
+16-3
diff --git a/‎Tools/PyTorch/TimeSeriesPredictionPlatform/conf/conf_utils.py
+7-3 b/‎Tools/PyTorch/TimeSeriesPredictionPlatform/conf/conf_utils.py
+7-3
diff --git a/‎Tools/PyTorch/TimeSeriesPredictionPlatform/conf/converter_config.yaml
+2-15 b/‎Tools/PyTorch/TimeSeriesPredictionPlatform/conf/converter_config.yaml
+2-15
diff --git a/‎Tools/PyTorch/TimeSeriesPredictionPlatform/conf/dataset/M5.yaml
+92 b/‎Tools/PyTorch/TimeSeriesPredictionPlatform/conf/dataset/M5.yaml
+92
@@ -6,3 +6,7 @@
 .gitignore
 Dockerfile
 .dockerignore
+/outputs/
+/datasets/
+/multirun/
+/notebooks/
@@ -3,3 +3,5 @@ __pycache__
 /outputs/
 *.zip
 /datasets/*/
+/datasets/
+/notebooks/
@@ -1,5 +1,19 @@
+# Copyright 2021-2024 NVIDIA CORPORATION
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 #SPDX-License-Identifier: Apache-2.0
-ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:22.04-py3
+ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:22.12-py3
 
 FROM ${FROM_IMAGE_NAME}
 
@@ -30,26 +44,31 @@ RUN apt-get update && \
     rm -rf /var/lib/apt/lists/*
 
 
-# Install perf_client required library
 RUN apt-get update && \
     apt-get install -y libb64-dev libb64-0d curl && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
-# Set workdir and python path
 WORKDIR /workspace
 ENV PYTHONPATH /workspace
 
+RUN rm /usr/lib/libxgboost.so
+
 ADD requirements.txt /workspace/requirements.txt
 ADD triton/requirements.txt /workspace/triton/requirements.txt
 RUN pip install -r /workspace/requirements.txt
 RUN pip install -r /workspace/triton/requirements.txt
 RUN pip install nvidia-pyindex
 RUN pip install git+https://github.com/NVIDIA/dllogger#egg=dllogger
-RUN pip install --no-cache-dir -r requirements.txt -f https://data.dgl.ai/wheels/repo.html
+RUN pip install --no-cache-dir -r requirements.txt
+RUN pip install dgl==1.0.1 -f https://data.dgl.ai/wheels/cu117/repo.html
 
-# Add model files to workspace
-ADD . /workspace
+ADD ./hydra_plugins /workspace/hydra_plugins
+RUN pip install /workspace/hydra_plugins/hydra_optuna_sweeper/
+RUN pip install /workspace/hydra_plugins/hydra_joblib_launcher/
+RUN pip install /workspace/hydra_plugins/hydra_multiprocessing_launcher/
+RUN pip install /workspace/hydra_plugins/hydra_torchrun_launcher/
+RUN cp /workspace/hydra_plugins/optuna_sweeper.py /usr/local/lib/python3.8/dist-packages/hydra/plugins/sweeper.py
 
-RUN pip install -e distributed_launcher
+ADD . /workspace
 RUN rm -rf examples docker-examples tutorials
@@ -186,7 +186,7 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright 2021-2022 NVIDIA Corporation
+   Copyright [yyyy] [name of copyright owner]
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
 
@@ -190,7 +190,7 @@ This repository contains code from https://github.com/rwightman/pytorch-image-mo
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright 2021-2022 NVIDIA Corporation
+   Copyright [yyyy] [name of copyright owner]
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
 
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# SPDX-License-Identifier: Apache-2.0
 class Callback(object):
     """
     Base class for building new callbacks.
 
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# SPDX-License-Identifier: Apache-2.0
 import time
 
 import dllogger
 
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,26 +13,27 @@
 # limitations under the License.
 
 import os
+import mlflow
 import pandas as pd
 
 from omegaconf import OmegaConf
 from hydra.experimental.callback import Callback
 
 from loggers.log_helper import jsonlog_2_df
+from mlflow.entities import Metric, Param
 
 class MergeLogs(Callback):
     def on_multirun_end(self, config, **kwargs):
         OmegaConf.resolve(config)
 
-        ALLOWED_KEYS=['timestamp', 'elapsed_time', 'step', 'loss', 'val_loss', 'MAE', 'MSE', 'RMSE', 'P50', 'P90']
+        ALLOWED_KEYS=['timestamp', 'elapsed_time', 'step', 'loss', 'val_loss', 'MAE', 'MSE', 'RMSE', 'P50', 'P90', 'SMAPE', 'TDI']
 
         dfs = []
         for p, sub_dirs, files in os.walk(config.hydra.sweep.dir):
             if 'log.json' in files:
                 path = os.path.join(p, 'log.json')
                 df = jsonlog_2_df(path, ALLOWED_KEYS)
                 dfs.append(df)
-
         # Transpose dataframes
         plots = {}
         for c in dfs[0].columns:
@@ -49,3 +50,15 @@ def on_multirun_end(self, config, **kwargs):
         timestamps = (timestamps * 1000).astype(int)
         if not timestamps.is_monotonic:
             raise ValueError('Timestamps are not monotonic')
+
+        metrics = [Metric('_'.join((k,name)), v, timestamp, step)
+                for k, df in plots.items()
+                for timestamp, (step, series) in zip(timestamps, df.iterrows())
+                for name, v in series.items()
+                ]
+        client = mlflow.tracking.MlflowClient(tracking_uri=config.trainer.config.mlflow_store)
+        exp = client.get_experiment_by_name(config.trainer.config.get('experiment_name', ''))
+        run = client.create_run(exp.experiment_id if exp else '0')
+        for i in range(0, len(metrics), 1000):
+            client.log_batch(run.info.run_id, metrics=metrics[i:i+1000])
+        client.set_terminated(run.info.run_id)
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,8 +14,10 @@
 
 from omegaconf import OmegaConf
 from data.data_utils import InputTypes, DataTypes, FeatureSpec
+import functools
+from hydra.utils import get_method
 
-OmegaConf.register_new_resolver("and", lambda x, y: x and y, use_cache=True)
+OmegaConf.register_new_resolver("and", lambda x, y: bool(x and y), use_cache=True)
 OmegaConf.register_new_resolver("feature.selector",
         lambda x,feat_type,embed_type:
             OmegaConf.create([elem for elem in x if elem.feature_type == feat_type and elem.feature_embed_type == embed_type])
@@ -27,10 +29,12 @@
 OmegaConf.register_new_resolver("cmp", lambda x, y: x == y)
 OmegaConf.register_new_resolver("cont.lower", lambda x, y: y.lower() in x.lower())
 
-# XXX I don't know whether it is the best idea to allow user to sum over nested structure without checks
 def sum_nested(*args):
     if len(args) == 1 and isinstance(args[0], (int, float)):
         return args[0]
     return sum(arg if isinstance(arg, (int, float)) else sum_nested(*arg) for arg in args)
 
 OmegaConf.register_new_resolver("sum", sum_nested)
+
+def partial(func, *args, **kwargs):
+    return functools.partial(get_method(func), *args, **kwargs)
@@ -1,18 +1,5 @@
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#           http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
+# SPDX-License-Identifier: Apache-2.0
 defaults:
   - deployment: convert
 
-checkpoint: ???
+checkpoint: ???
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: MIT
+_target_: data.datasets.create_datasets
+config:
+    source_path: /workspace/datasets/M5/M5.csv
+    dest_path: /workspace/datasets/M5/
+    iterable: False
+    encoder_length: 28
+    input_length: 28
+    example_length: 56
+    valid_boundary: '2016-04-25'
+    train_samples: 1000000
+    time_series_count: 30490
+    drop_unseen: True
+    MultiID: False
+    features:
+      - name: 'id'
+        feature_type: 'ID'
+        feature_embed_type: 'CATEGORICAL'
+        cardinality: 30490
+      - name: "date"
+        feature_type: 'TIME'
+        feature_embed_type: 'DATE'
+      - name: "weight"
+        feature_type: 'WEIGHT'
+        feature_embed_type: 'CONTINUOUS'
+      - name: "item_id"
+        feature_type: 'STATIC'
+        feature_embed_type: 'CATEGORICAL'
+        cardinality: 3050
+      - name: "dept_id"
+        feature_type: 'STATIC'
+        feature_embed_type: 'CATEGORICAL'
+        cardinality: 8
+      - name: "cat_id"
+        feature_type: 'STATIC'
+        feature_embed_type: 'CATEGORICAL'
+        cardinality: 4
+      - name: "store_id"
+        feature_type: 'STATIC'
+        feature_embed_type: 'CATEGORICAL'
+        cardinality: 11
+      - name: "state_id"
+        feature_type: 'STATIC'
+        feature_embed_type: 'CATEGORICAL'
+        cardinality: 4
+      - name: "items_sold"
+        feature_type: 'TARGET'
+        feature_embed_type: 'CONTINUOUS'
+        scaler:
+            _target_: data.data_utils.Log1pScaler
+      - name: "wday"
+        feature_type: 'KNOWN'
+        feature_embed_type: 'CATEGORICAL'
+        cardinality: 8
+      - name: "month"
+        feature_type: 'KNOWN'
+        feature_embed_type: 'CATEGORICAL'
+        cardinality: 13
+      - name: "event_name_1"
+        feature_type: 'KNOWN'
+        feature_embed_type: 'CATEGORICAL'
+        cardinality: 31
+      - name: "event_type_1"
+        feature_type: 'KNOWN'
+        feature_embed_type: 'CATEGORICAL'
+        cardinality: 5
+      - name: "event_type_2"
+        feature_type: 'KNOWN'
+        feature_embed_type: 'CATEGORICAL'
+        cardinality: 3
+      - name: "event_name_2"
+        feature_type: 'KNOWN'
+        feature_embed_type: 'CATEGORICAL'
+        cardinality: 5
+      - name: "snap_CA"
+        feature_type: 'KNOWN'
+        feature_embed_type: 'CATEGORICAL'
+        cardinality: 3
+      - name: "snap_TX"
+        feature_type: 'KNOWN'
+        feature_embed_type: 'CATEGORICAL'
+        cardinality: 3
+      - name: "snap_WI"
+        feature_type: 'KNOWN'
+        feature_embed_type: 'CATEGORICAL'
+        cardinality: 3
+      - name: "sell_price"
+        feature_type: 'KNOWN'
+        feature_embed_type: 'CONTINUOUS'
+        scaler:
+            _target_: sklearn.preprocessing.StandardScaler
+    binarized: True