diff --git a/docs/README.md b/docs/README.md index 9fe094f9..5e36411c 100644 --- a/docs/README.md +++ b/docs/README.md @@ -14,7 +14,10 @@ And then, change directory to docs folder to build the docs. ```sh cd docs/ -sphinx-build -M html . build +# To build docs specific to branch +sphinx-build -M html . build/html +# [Optional] To build docs for all the supporting branches +sphinx-multiversion . build/html ``` ## Preview the docs locally @@ -22,4 +25,4 @@ sphinx-build -M html . build cd build/html python -m http.server ``` -You can visit the page with your web browser with url `http://localhost:8080`. \ No newline at end of file +You can visit the page with your web browser with url `http://localhost:8080`. diff --git a/docs/_templates/versions.html b/docs/_templates/versions.html new file mode 100644 index 00000000..e3aa8a1e --- /dev/null +++ b/docs/_templates/versions.html @@ -0,0 +1,13 @@ +
+ + Version: Main + + +
+ Versions +
+
main
+
release/v1.18
+
+
+
diff --git a/docs/conf.py b/docs/conf.py index 55881fb4..3f254e26 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -37,7 +37,7 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = ["myst_parser", "sphinx.ext.todo", "sphinx.ext.viewcode", "sphinx.ext.autodoc"] +extensions = ["myst_parser", "sphinx.ext.todo", "sphinx.ext.viewcode", "sphinx.ext.autodoc", "sphinx_multiversion"] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] diff --git a/docs/index.md b/docs/index.md index dd4b7ecc..965bccff 100644 --- a/docs/index.md +++ b/docs/index.md @@ -28,6 +28,12 @@ source/validate source/installation ``` +```{toctree} +:caption: 'Upgrade Efficient-Transformers' +:maxdepth: 2 + +source/upgrade +``` ```{toctree} :caption: 'Quick start' diff --git a/docs/source/blogs.md b/docs/source/blogs.md index 6e20dd7f..efe6e158 100644 --- a/docs/source/blogs.md +++ b/docs/source/blogs.md @@ -11,5 +11,5 @@ [click here](https://www.qualcomm.com/developer/blog/2024/01/qualcomm-cloud-ai-100-accelerates-large-language-model-inference-2x-using-microscaling-mx) # Qualcomm Cloud AI Introduces Efficient Transformers: One API, Infinite Possibilities -[click here](https://www.qualcomm.com/developer/blog/2024/05/qualcomm-cloud-ai-introduces-efficient-transformers--one-api--in) +[click here](https://www.qualcomm.com/developer/blog/2024/05/qualcomm-cloud-ai-introduces-efficient-transformers-one-api) diff --git a/docs/source/installation.md b/docs/source/installation.md index 1a08928f..ae9742ce 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -1,43 +1,42 @@ # Pre-requisites System Requirements: 1. [Supported Linux OS](https://quic.github.io/cloud-ai-sdk-pages/latest/Getting-Started/Installation/#operating-systems) - Ubuntu, RHEL and AWS Linux -2. [Cloud AI 100 Platform and Apps SDK installed](https://quic.github.io/cloud-ai-sdk-pages/latest/Getting-Started/Installation/Cloud-AI-SDK/Cloud-AI-SDK/) +2. [Cloud AI 100 Platform SDK installed](https://quic.github.io/cloud-ai-sdk-pages/latest/Getting-Started/Installation/Cloud-AI-SDK/Cloud-AI-SDK/#platform-sdk) 3. [SDK Pre-requisites](https://quic.github.io/cloud-ai-sdk-pages/latest/Getting-Started/Installation/Pre-requisites/pre-requisites/) 4. [Multi-device support enabled for model sharding](https://github.com/quic/cloud-ai-sdk/tree/1.12/utils/multi-device) -# Linux Installation -There are two different way to install efficient-transformers. +# Installation -## Using SDK +### 1. Download Apps SDK + * [Cloud AI 100 Apps SDK install](https://quic.github.io/cloud-ai-sdk-pages/latest/Getting-Started/Installation/Cloud-AI-SDK/Cloud-AI-SDK/) -* Download Apps SDK: [Cloud AI 100 Platform and Apps SDK install](https://quic.github.io/cloud-ai-sdk-pages/latest/Getting-Started/Installation/Cloud-AI-SDK/Cloud-AI-SDK/) - - -```bash -# Install using Apps SDK - -bash install.sh --enable-qeff +### 2. Install Efficient-Transformers +Uninstall existing Apps SDK +``` +sudo ./uninstall.sh +``` +Run the install.sh script as root or with sudo to install with root permissions. +``` +sudo ./install.sh --enable-qeff source /opt/qti-aic/dev/python/qeff/bin/activate - ``` -## Using GitHub Repository - -```bash - -# Create Python virtual env and activate it. (Required Python 3.8) - -python3.8 -m venv qeff_env -source qeff_env/bin/activate -pip install -U pip - -# Clone and Install the QEfficient Repo. -pip install git+https://github.com/quic/efficient-transformers - -``` +On successful installation, the contents are stored to the /opt/qti-aic path under the dev and exec directories: +``` +dev exec integrations scripts +``` +Check the Apps SDK version with the following command +``` +sudo /opt/qti-aic/tools/qaic-version-util --apps +``` +Apply chmod commands +``` +sudo chmod a+x /opt/qti-aic/dev/hexagon_tools/bin/* +sudo chmod a+x /opt/qti-aic/exec/* +``` # Sanity Check -After any of the above installation methods, you can check if ``QEfficient`` is installed correctly by using +After above installation methods, you can check if ``QEfficient`` is installed correctly by using ```bash python -c "import QEfficient; print(QEfficient.__version__)" ``` diff --git a/docs/source/reference.md b/docs/source/reference.md index 37d1ddad..9a1c5260 100644 --- a/docs/source/reference.md +++ b/docs/source/reference.md @@ -3,4 +3,4 @@ # [Qualcomm Cloud AI SDK download](https://www.qualcomm.com/products/technology/processors/cloud-artificial-intelligence/cloud-ai-100#Software) # [Qualcomm Cloud AI API reference](https://quic.github.io/cloud-ai-sdk-pages/latest/API/) # [User Guide](https://quic.github.io/cloud-ai-sdk-pages/) -# [OCP Microscaling Formats (MX) Specification](https://www.qualcomm.com/developer/blog/2024/05/6.%09https:/www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf) \ No newline at end of file +# [OCP Microscaling Formats (MX) Specification](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf) \ No newline at end of file diff --git a/docs/source/upgrade.md b/docs/source/upgrade.md new file mode 100644 index 00000000..16e04b62 --- /dev/null +++ b/docs/source/upgrade.md @@ -0,0 +1,16 @@ + +## Using GitHub Repository + +``Warning: Efficient Transformers have been validated to work with the same compatible SDK. Upgrading this may result in certain models becoming incompatible.`` + +```bash +# Create Python virtual env and activate it. (Required Python 3.8) + +python3.8 -m venv qeff_env +source qeff_env/bin/activate +pip install -U pip + +# Clone and Install the QEfficient Repo. +pip install git+https://github.com/quic/efficient-transformers + +``` \ No newline at end of file diff --git a/docs/source/validate.md b/docs/source/validate.md index c7384231..392a748b 100644 --- a/docs/source/validate.md +++ b/docs/source/validate.md @@ -1,41 +1,39 @@ (validated_models)= # Validated Models +``Note- All validated models support CB functionality.`` +| Model Name | Model Support | +| --- | --- | +| [GPT2](https://huggingface.co/openai-community/gpt2) | ✔️ | +| [Llama-3-8b](https://huggingface.co/meta-llama/Meta-Llama-3-8B) | ✔️ | +| [Llama-3-70b](https://huggingface.co/meta-llama/Meta-Llama-3-70B) | ✔️ | +| [Llama-2-70b](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) | ✔️ | +| [Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) | ✔️ | +| [Llama-2-13b-chat-hf](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf) | ✔️ | +| [CodeLlama-7b-hf](https://huggingface.co/codellama/CodeLlama-7b-hf) | ✔️ | +| [CodeLlama-13b-hf](https://huggingface.co/codellama/CodeLlama-13b-hf) | ✔️ | +| [CodeLlama-34b-hf](https://huggingface.co/codellama/CodeLlama-34b-hf) | ✔️ | +| [Salesforce/codegen25-7b-mono_P](https://huggingface.co/Salesforce/codegen25-7b-mono_P) | ✔️ | +| [Salesforce/xgen-7b-8k-base](https://huggingface.co/Salesforce/xgen-7b-8k-base) | ✔️ | +| [MPT-7b](https://huggingface.co/mosaicml/mpt-7b) | ✔️ | +| [Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) | ✔️ | +| [Mixtral-8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1) | ✔️ | +| [Vicuna-v0](https://huggingface.co/lmsys/vicuna-13b-delta-v0) | ✔️ | +| [Vicuna-v1.3](https://huggingface.co/lmsys/vicuna-13b-v1.3) | ✔️ | +| [Vicuna-v1.5](https://huggingface.co/lmsys/vicuna-13b-v1.5) | ✔️ | +| [Qwen2-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2-1.5B-Instruct) | ✔️ | +| [StarCoder2-15B](https://huggingface.co/bigcode/starcoder2-15b) | ✔️ | +| [Phi3-Mini-4K-Instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) | ✔️ | +| [Codestral-22B-v0.1](https://huggingface.co/mistralai/Codestral-22B-v0.1) | ✔️ | +| [Falcon-40b](https://huggingface.co/tiiuae/falcon-40b) | ✔️ | +| [GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6b) | ✔️ | +| [Jais-adapted-70b](https://huggingface.co/inceptionai/jais-adapted-70b) | ✔️ | +| [Jais-adapted-13b-chat](https://huggingface.co/inceptionai/jais-adapted-13b-chat) | ✔️ | +| [Jais-adapted-7b](https://huggingface.co/inceptionai/jais-adapted-7b) | ✔️ | -| Model Name | Model Support | Continuous Batching Support | -| --- | --- | --- | -| [GPT2](https://huggingface.co/openai-community/gpt2) | ✔️ | ✔️ | -| [Llama-3-8b](https://huggingface.co/meta-llama/Meta-Llama-3-8B) | ✔️ | ✔️ | -| [Llama-3-70b](https://huggingface.co/meta-llama/Meta-Llama-3-70B) | ✔️ | ✔️ | -| [Llama-2-70b](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) | ✔️ | ✔️ | -| [Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) | ✔️ | ✔️ | -| [Llama-2-13b-chat-hf](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf) | ✔️ | ✔️ | -| [CodeLlama-7b-hf](https://huggingface.co/codellama/CodeLlama-7b-hf) | ✔️ | ✔️ | -| [CodeLlama-13b-hf](https://huggingface.co/codellama/CodeLlama-13b-hf) | ✔️ | ✔️ | -| [CodeLlama-34b-hf](https://huggingface.co/codellama/CodeLlama-34b-hf) | ✔️ | ✔️ | -| [Salesforce/codegen25-7b-mono_P](https://huggingface.co/Salesforce/codegen25-7b-mono_P) | ✔️ | ✔️ | -| [Salesforce/xgen-7b-8k-base](https://huggingface.co/Salesforce/xgen-7b-8k-base) | ✔️ | ✔️ | -| [MPT-7b](https://huggingface.co/mosaicml/mpt-7b) | ✔️ | ✔️ | -| [Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) | ✔️ | ✔️ | -| [Mixtral-8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1) | ✔️ | ✔️ | -| [Vicuna-v0](https://huggingface.co/lmsys/vicuna-13b-delta-v0) | ✔️ | ✔️ | -| [Vicuna-v1.3](https://huggingface.co/lmsys/vicuna-13b-v1.3) | ✔️ | ✔️ | -| [Vicuna-v1.5](https://huggingface.co/lmsys/vicuna-13b-v1.5) | ✔️ | ✔️ | -| [Qwen2-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2-1.5B-Instruct) | ✔️ | ✔️ | -| [StarCoder2-15B](https://huggingface.co/bigcode/starcoder2-15b) | ✔️ | ✔️ | -| [Phi3-Mini-4K-Instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) | ✔️ | ✔️ | -| [Codestral-22B-v0.1](https://huggingface.co/mistralai/Codestral-22B-v0.1) | ✔️ | ✔️ | -| [Falcon-40b](https://huggingface.co/tiiuae/falcon-40b) | ✔️ | ✔️ | -| [GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6b) | ✔️ | ✔️ | -| [Jais-adapted-70b](https://huggingface.co/inceptionai/jais-adapted-70b) | ✔️ | ✔️ | -| [Jais-adapted-13b-chat](https://huggingface.co/inceptionai/jais-adapted-13b-chat) | ✔️ | ✔️ | -| [Jais-adapted-7b](https://huggingface.co/inceptionai/jais-adapted-7b) | ✔️ | ✔️ | -| [granite-20b-code-base](https://huggingface.co/ibm-granite/granite-20b-code-base-8k) | ✔️ | ✔️ | -| [granite-20b-code-instruct-8k](https://huggingface.co/ibm-granite/granite-20b-code-instruct-8k) | ✔️ | ✔️ | -| [Starcoder1-15B](https://huggingface.co/bigcode/starcoder) | ✔️ | ✔️ | (coming_soon_models)= # Models Coming Soon - -* [Chatglm2-6b](https://huggingface.co/THUDM/chatglm2-6b) -* [Baichuan2-7B-Base](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base) + * [CohereForAI/c4ai-command-r-v01](https://huggingface.co/CohereForAI/c4ai-command-r-v01) -* [databricks/dbrx-base](https://huggingface.co/databricks/dbrx-base) \ No newline at end of file +* [databricks/dbrx-base](https://huggingface.co/databricks/dbrx-base) +* [Chatglm2-6b](https://huggingface.co/THUDM/chatglm2-6b) +* [Baichuan2-7B-Base](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 06078473..61f72a64 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ dependencies = [ [project.optional-dependencies] test = ["pytest","pytest-mock"] -docs = ["Sphinx==7.1.2","sphinx-rtd-theme==2.0.0","myst-parser==3.0.1"] +docs = ["Sphinx==7.1.2","sphinx-rtd-theme==2.0.0","myst-parser==3.0.1","sphinx-multiversion"] quality = ["black", "ruff", "hf_doc_builder@git+https://github.com/huggingface/doc-builder.git"] [build-system]