diff --git a/torchbenchmark/canary_models/hf_Yi/__init__.py b/torchbenchmark/canary_models/hf_Yi/__init__.py
new file mode 100644
index 0000000000..5aedde8855
--- /dev/null
+++ b/torchbenchmark/canary_models/hf_Yi/__init__.py
@@ -0,0 +1,17 @@
+from torchbenchmark.tasks import NLP
+from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel
+
+class Model(HuggingFaceModel):
+    task = NLP.LANGUAGE_MODELING
+    # DEFAULT_TRAIN_BSIZE not specified since we're not implementing a train test
+    # DEFAULT_TRAIN_BSIZE = 1
+    DEFAULT_EVAL_BSIZE = 1
+
+    def __init__(self, test, device, batch_size=None, extra_args=[]):
+        super().__init__(name="hf_Yi", test=test, device=device, batch_size=batch_size, extra_args=extra_args)
+
+    def train(self):
+        return NotImplementedError("Not implemented")
+
+    def eval(self):
+        super().eval()
\ No newline at end of file
diff --git a/torchbenchmark/canary_models/hf_Yi/install.py b/torchbenchmark/canary_models/hf_Yi/install.py
new file mode 100644
index 0000000000..64e5b1127e
--- /dev/null
+++ b/torchbenchmark/canary_models/hf_Yi/install.py
@@ -0,0 +1,13 @@
+import subprocess
+import sys
+import os
+from torchbenchmark.util.framework.huggingface.patch_hf import patch_transformers, cache_model
+
+def pip_install_requirements():
+    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', '-r', 'requirements.txt'])
+
+if __name__ == '__main__':
+    pip_install_requirements()
+    patch_transformers()
+    model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
+    cache_model(model_name, trust_remote_code=True)
\ No newline at end of file
diff --git a/torchbenchmark/canary_models/hf_Yi/metadata.yaml b/torchbenchmark/canary_models/hf_Yi/metadata.yaml
new file mode 100644
index 0000000000..19877db021
--- /dev/null
+++ b/torchbenchmark/canary_models/hf_Yi/metadata.yaml
@@ -0,0 +1,11 @@
+devices:
+  NVIDIA A100-SXM4-40GB:
+    eval_batch_size: 1
+eval_benchmark: false
+eval_deterministic: false
+eval_nograd: true
+train_benchmark: false
+train_deterministic: false
+not_implemented:
+  - device: NVIDIA A10G
+  # - device: cpu
\ No newline at end of file
diff --git a/torchbenchmark/canary_models/hf_Yi/requirements.txt b/torchbenchmark/canary_models/hf_Yi/requirements.txt
new file mode 100644
index 0000000000..fd0728f16f
--- /dev/null
+++ b/torchbenchmark/canary_models/hf_Yi/requirements.txt
@@ -0,0 +1 @@
+numba
\ No newline at end of file
diff --git a/torchbenchmark/util/framework/huggingface/model_factory.py b/torchbenchmark/util/framework/huggingface/model_factory.py
index c0975e6494..995d5bec91 100644
--- a/torchbenchmark/util/framework/huggingface/model_factory.py
+++ b/torchbenchmark/util/framework/huggingface/model_factory.py
@@ -34,7 +34,8 @@
     'llama_v2_7b' : (512,512, 'AutoConfig.from_pretrained("meta-llama/Llama-2-7b-hf")', 'AutoModelForCausalLM'),
     'llama_v2_13b' : (512,512, 'AutoConfig.from_pretrained("meta-llama/Llama-2-13b-hf")', 'AutoModelForCausalLM'),
     'llama_v2_70b' : (512, 512, 'AutoConfig.from_pretrained("meta-llama/Llama-2-70b-hf")', 'AutoModelForMaskedLM'),
-    'phi_1_5' : (512, 512, 'AutoConfig.from_pretrained("microsoft/phi-1_5", trust_remote_code=True)', 'AutoModelForCausalLM')
+    'phi_1_5' : (512, 512, 'AutoConfig.from_pretrained("microsoft/phi-1_5", trust_remote_code=True)', 'AutoModelForCausalLM'),
+    'hf_Yi' : (512, 512, 'AutoConfig.from_pretrained("01-ai/Yi-6B", trust_remote_code=True)', 'AutoModelForCausalLM'),
 }
 
 cpu_input_slice = {
@@ -86,7 +87,8 @@ def __init__(self, name, test, device, batch_size=None, extra_args=[]):
             config.num_buckets = 128
         class_ctor = getattr(transformers, class_models[name][3])
         kwargs = {}
-        if name == "hf_Falcon_7b" or name == "hf_MPT_7b_instruct" or name == "phi_1_5":
+        remote_code_required = ['hf_Falcon_7b', 'hf_MPT_7b_instruct', 'phi_1_5', 'hf_Yi']
+        if name in remote_code_required:
             kwargs["trust_remote_code"] = True
         self.model = class_ctor.from_config(config, **kwargs).to(device)
         self.optimizer = optim.Adam(