hpcaitech · juncongmoo · Mar 7, 2023 · ver217 · Mar 20, 2023 · juncongmoo
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1,4 @@
+include version.txt
+include requirements.txt
+
+
diff --git a/README.md b/README.md
@@ -1,6 +1,7 @@
-<!-- <div  align="center">    
+<div  align="center">
     <img src="https://user-images.githubusercontent.com/12018307/170214566-b611b131-fff1-41c0-9447-786a8a6f0bac.png" width = "600" height = "148" alt="Architecture" align=center />
-</div> -->
+</div>
+
 # Energon-AI
 
 ![](https://img.shields.io/badge/Made%20with-ColossalAI-blueviolet?style=flat)
@@ -17,23 +18,34 @@ For models trained by [Colossal-AI](https://github.com/hpcaitech/ColossalAI), th
 For single-device models, they require manual coding works to introduce tensor parallelism and pipeline parallelism.
 
 
-### Installation
-**Install from source**
+## Installation
+
+There are three ways to install energonai.
+
+- **Install from pypi**
+
+``` bash
+pip install energonai
+```
+
+
+- **Install from source**
 ``` bash
 $ git clone git@github.com:hpcaitech/EnergonAI.git
 $ pip install -r requirements.txt
 $ pip install .
 ```
-**Use docker**
+
+- **Use docker**
 ``` bash
 $ docker pull hpcaitech/energon-ai:latest
 ```
 
 
-### Build an online OPT service in 5 minutes
+## Build an online OPT service in 5 minutes
 
 1. **Download OPT model:**
-  To launch the distributed inference service quickly, you can download the checkpoint of OPT-125M [here](https://huggingface.co/patrickvonplaten/opt_metaseq_125m/blob/main/model/restored.pt). You can get details for loading other sizes of models [here](https://github.com/hpcaitech/EnergonAI/tree/main/examples/opt/script).
+To launch the distributed inference service quickly, you can download the checkpoint of OPT-125M [here](https://huggingface.co/patrickvonplaten/opt_metaseq_125m/blob/main/model/restored.pt). You can get details for loading other sizes of models [here](https://github.com/hpcaitech/EnergonAI/tree/main/examples/opt/script).
 
 2. **Launch an HTTP service:**
 To launch a service, we need to provide python scripts to describe the model type and related configurations, and start an http service.
@@ -55,7 +67,7 @@ For example, set the model class as opt_125M and set the correct checkpoint path
     Then open ***https://[ip]:[port]/docs*** in your browser and try out!
 
 
-### Publication
+## Publication
 You can find technical details in our blog and manuscript:
 
 [Build an online OPT service using Colossal-AI in 5 minutes](https://www.colossalai.org/docs/advanced_tutorials/opt_service/)
@@ -73,8 +85,8 @@ You can find technical details in our blog and manuscript:
 }
 ```
 
-### Contributing
+## Contributing
 
 If interested in making your own contribution to the project, please refer to [Contributing](./CONTRIBUTING.md) for guidance.
 
-Thanks so much!
+Thanks so much!
diff --git a/energonai/__init__.py b/energonai/__init__.py
@@ -4,3 +4,4 @@
 
 
 __all__ = ['BatchManager', 'launch_engine', 'SubmitEntry', 'TaskEntry', 'QueueFullError']
+__version__='0.0.2'
diff --git a/examples/bloom/run.sh b/examples/bloom/run.sh
diff --git a/requirements.txt b/requirements.txt
@@ -1,9 +1,10 @@
-numpy
+colossalai
 tqdm
 psutil
 packaging
-fastapi~=0.75.1
-uvicorn==0.14
+numpy~=1.23.5
+fastapi~=0.92.0
+uvicorn~=0.20.0
 typer
 redis
 scipy
@@ -12,11 +13,10 @@ requests
 click
 transformers
 readerwriterlock
---extra-index-url https://download.pytorch.org/whl/cu113
 torch
 torchvision
-torchaudio 
-colossalai
+torchaudio
 omegaconf
+hiq-python
 prometheus-fastapi-instrumentator
 
diff --git a/setup.py b/setup.py
@@ -1,21 +1,23 @@
 import os
 import subprocess
 import sys
-
+import hiq
 import torch
 from setuptools import setup, find_packages
 from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CUDA_HOME
 
 # ninja build does not work unless include_dirs are abs path
 this_dir = os.path.dirname(os.path.abspath(__file__))
-build_cuda_ext = True
+build_cuda_ext = torch.cuda.is_available()
 
 if '--no_cuda_ext' in sys.argv:
     sys.argv.remove('--no_cuda_ext')
     build_cuda_ext = False
 
 
 def get_cuda_bare_metal_version(cuda_dir):
+    if cuda_dir is None or not os.path.exists(cuda_dir + "/bin/nvcc"):
+      return [None]*3
     raw_output = subprocess.check_output([cuda_dir + "/bin/nvcc", "-V"], universal_newlines=True)
     output = raw_output.split()
     release_idx = output.index("release") + 1
@@ -35,12 +37,11 @@ def check_cuda_torch_binary_vs_bare_metal(cuda_dir):
     print(raw_output + "from " + cuda_dir + "/bin\n")
 
     if (bare_metal_major != torch_binary_major) or (bare_metal_minor != torch_binary_minor):
-        raise RuntimeError("Cuda extensions are being compiled with a version of Cuda that does " +
-                           "not match the version used to compile Pytorch binaries.  " +
-                           "Pytorch binaries were compiled with Cuda {}.\n".format(torch.version.cuda) +
-                           "In some cases, a minor-version mismatch will not cause later errors:  " +
-                           "https://github.com/NVIDIA/apex/pull/323#discussion_r287021798.  "
-                           "You can try commenting out this check (at your own risk).")
+      print("*"*40 + "!!!Warning!!!" + "*"*40)
+      print(f"CUDA(nvcc) version({bare_metal_major}.{bare_metal_minor}) does not match the version({torch.version.cuda}) used to compile Pytorch binaries.")
+      print(f"We strongly recommend you reinstall Pytorch compiled with CUDA version {bare_metal_major}.{bare_metal_minor}.")
+      print("In some cases, even a minor-version mismatch will cause subtle error. Pleas refer to: https://github.com/NVIDIA/apex/pull/323#discussion_r287021798.")
+      print("*"*90)
 
 
 def append_nvcc_threads(nvcc_extra_args):
@@ -50,11 +51,6 @@ def append_nvcc_threads(nvcc_extra_args):
     return nvcc_extra_args
 
 
-def fetch_requirements(path):
-    with open(path, 'r') as fd:
-        return [r.strip() for r in fd.readlines()]
-
-
 if not torch.cuda.is_available():
     # https://github.com/NVIDIA/apex/issues/486
     # Extension builds after https://github.com/pytorch/pytorch/pull/23408 attempt to
@@ -69,14 +65,14 @@ def fetch_requirements(path):
         'and, if the CUDA version is >= 11.0, Ampere (compute capability 8.0).\n'
         'If you wish to cross-compile for a single specific architecture,\n'
         'export TORCH_CUDA_ARCH_LIST="compute capability" before running setup.py.\n')
-    if os.environ.get("TORCH_CUDA_ARCH_LIST", None) is None:
+    if CUDA_HOME is not None and os.environ.get("TORCH_CUDA_ARCH_LIST", None) is None:
         _, bare_metal_major, _ = get_cuda_bare_metal_version(CUDA_HOME)
         if int(bare_metal_major) == 11:
             os.environ["TORCH_CUDA_ARCH_LIST"] = "6.0;6.1;6.2;7.0;7.5;8.0"
         else:
             os.environ["TORCH_CUDA_ARCH_LIST"] = "6.0;6.1;6.2;7.0;7.5"
 
-print("\n\ntorch.__version__  = {}\n\n".format(torch.__version__))
+print("torch.__version__  = {}".format(torch.__version__))
 TORCH_MAJOR = int(torch.__version__.split('.')[0])
 TORCH_MINOR = int(torch.__version__.split('.')[1])
 
@@ -160,10 +156,25 @@ def get_version():
             version += f'+torch{torch_version}cu{cuda_version}'
         return version
 
+def package_files(ds):
+    paths = []
+    for d in ds:
+        for (path, directories, filenames) in os.walk(d):
+            for filename in filenames:
+                if '__pycache__' not in str(filename):
+                    paths.append(str(os.path.join(path, filename))[len('energonai/'):])
+    return paths
 
+extra_files = package_files(['energonai/'])
+
+#print("ext_modules:", ext_modules)
+#print("extra_files:", extra_files)
 setup(
     name='energonai',
-    version=get_version(),
+    maintainer='Juncong Moo;Open Source Community;HPCAiTech',
+    url='https://github.com/hpcaitech/EnergonAI',
+    maintainer_email='juncongmoo@gmail.com',
+    version=hiq.read_file('version.txt')[0],
     packages=find_packages(
         exclude=(
             'benchmark',
@@ -173,17 +184,32 @@ def get_version():
             'examples',
             'tests',
             'scripts',
-            'requirements',
             '*.egg-info',
             'dist',
             'build',
         )),
-    description='Large-scale Model Inference',
+    description='EnergonAI: An Inference System for Large Transformer Models',
+    long_description=hiq.read_file('README.md', by_line=False),
+    long_description_content_type="text/markdown",
     license='Apache Software License 2.0',
     ext_modules=ext_modules,
     cmdclass={'build_ext': BuildExtension} if ext_modules else {},
-    #   install_requires=fetch_requirements('requirements.txt'),
+    install_requires=hiq.read_file('requirements.txt'),
     entry_points={
         'console_scripts': ['energonai=energonai.cli:typer_click_object', ],
     },
+    package_data={"energonai": extra_files, "": ['requirements.txt']},
+    classifiers=[
+        'Operating System :: POSIX :: Linux',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
+        'Programming Language :: Python :: 3.9',
+        'Programming Language :: Python :: 3.10',
+        'Programming Language :: Python :: 3.11',
+        'Programming Language :: Python :: Implementation :: CPython',
+        'Programming Language :: Python :: Implementation :: PyPy',
+        'Topic :: Scientific/Engineering :: Artificial Intelligence',
+    ],
 )
+
diff --git a/version.txt b/version.txt
@@ -1 +1 @@
-0.0.1
+0.0.2
Original file line number	Diff line number	Diff line change
Expand Up		@@ -4,3 +4,4 @@


		__all__ = ['BatchManager', 'launch_engine', 'SubmitEntry', 'TaskEntry', 'QueueFullError']
		__version__='0.0.2'