From 313c60bbf55aeec0bd57074d9ab14deddd1a3fec Mon Sep 17 00:00:00 2001 From: SumanthRH Date: Mon, 22 Jan 2024 13:02:37 -0800 Subject: [PATCH 1/5] update requirements and add float16 support --- requirements.txt | 22 +++++++++++----------- setup.py | 10 +++++----- src/ecco/__init__.py | 5 +++-- src/ecco/lm.py | 12 +++++------- src/ecco/model-config.yaml | 23 +++++++++++++++++++++++ 5 files changed, 47 insertions(+), 25 deletions(-) diff --git a/requirements.txt b/requirements.txt index 239b132..b812e2e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,11 @@ -matplotlib~=3.3.1 -numpy~=1.19.1 -ipython~=7.16.1 -scikit-learn~=0.24.2 -seaborn~=0.11.0 -transformers~=4.6.1 -pytest~=6.1.2 -setuptools~=49.6.0 -torch~=1.9.0 -PyYAML==5.4.1 -captum==0.4.1 +matplotlib==3.8.2 +numpy==1.26.2 +ipython==8.18.1 +scikit-learn==1.3.2 +seaborn==0.13.0 +transformers==4.36.2 +pytest==7.4.3 +setuptools==68.2.2 +torch==2.1.1 +PyYAML==6.0.1 +captum==0.6.0 diff --git a/setup.py b/setup.py index 96cc39d..e8bc591 100644 --- a/setup.py +++ b/setup.py @@ -65,11 +65,11 @@ def read(*names, **kwargs): ], python_requires='!=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*', install_requires=[ - "transformers ~= 4.2", - "seaborn ~= 0.11", - "scikit-learn~=0.23", - "PyYAML~=5.4", - "captum ~= 0.4" + "transformers ~= 4.36", + "seaborn ~= 0.13", + "scikit-learn ~= 1.3", + "PyYAML ~= 6.0", + "captum ~= 0.6" ], extras_require={ "dev": [ diff --git a/src/ecco/__init__.py b/src/ecco/__init__.py index 91c0ecc..d0d96e8 100644 --- a/src/ecco/__init__.py +++ b/src/ecco/__init__.py @@ -26,7 +26,8 @@ def from_pretrained(hf_model_id: str, hidden_states: Optional[bool] = True, activations_layer_nums: Optional[List[int]] = None, verbose: Optional[bool] = True, - gpu: Optional[bool] = True + gpu: Optional[bool] = True, + **model_kwargs: Dict[str, Any] ): """ Constructs a [LM][ecco.lm.LM] object based on a string identifier from HuggingFace Transformers. This is @@ -80,7 +81,7 @@ def from_pretrained(hf_model_id: str, else: model_cls = AutoModel - model = model_cls.from_pretrained(hf_model_id, output_hidden_states=hidden_states, output_attentions=attention) + model = model_cls.from_pretrained(hf_model_id, output_hidden_states=hidden_states, output_attentions=attention, **model_kwargs) lm_kwargs = { 'model_name': hf_model_id, diff --git a/src/ecco/lm.py b/src/ecco/lm.py index 97d3330..26517e5 100644 --- a/src/ecco/lm.py +++ b/src/ecco/lm.py @@ -67,10 +67,6 @@ def __init__(self, if torch.cuda.is_available() and gpu: self.model = model.to('cuda') - self.device = 'cuda' if torch.cuda.is_available() \ - and self.model.device.type == 'cuda' \ - else 'cpu' - self.tokenizer = tokenizer self.verbose = verbose self._path = os.path.dirname(ecco.__file__) @@ -104,6 +100,10 @@ def __init__(self, # we're running it before every d.HTML cell # d.display(d.HTML(filename=os.path.join(self._path, "html", "setup.html"))) + @property + def device(self): + return self.model.device + def _reset(self): self._all_activations_dict = defaultdict(dict) self.activations = defaultdict(dict) @@ -114,9 +114,7 @@ def _reset(self): self._hooks = {} def to(self, tensor: Union[torch.Tensor, BatchEncoding]): - if self.device == 'cuda': - return tensor.to('cuda') - return tensor + return tensor.to(self.device).to(self.model.dtype) def _analyze_token(self, encoder_input_embeds: torch.Tensor, diff --git a/src/ecco/model-config.yaml b/src/ecco/model-config.yaml index 6dffb2d..cdf9603 100644 --- a/src/ecco/model-config.yaml +++ b/src/ecco/model-config.yaml @@ -342,3 +342,26 @@ EleutherAI/gpt-neo-2.7B: - 'mlp\.c_proj' token_prefix: ' ' partial_token_prefix: '' + +# Llama +openlm-research/open_llama_3b: + embedding: "model.embed_tokens" + type: 'causal' + activations: + - 'mlp\.up_proj' #This is a regex + token_prefix: '▁' + partial_token_prefix: '' +meta-llama/Llama-2-7b: + embedding: "model.embed_tokens" + type: 'causal' + activations: + - 'mlp\.up_proj' #This is a regex + token_prefix: '▁' + partial_token_prefix: '' +meta-llama/Llama-2-13b: + embedding: "model.embed_tokens" + type: 'causal' + activations: + - 'mlp\.up_proj' #This is a regex + token_prefix: '▁' + partial_token_prefix: '' \ No newline at end of file From 361a231b354f5434088464b2a2431830fe3a38ef Mon Sep 17 00:00:00 2001 From: SumanthRH Date: Mon, 22 Jan 2024 13:13:42 -0800 Subject: [PATCH 2/5] minor edit --- src/ecco/lm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ecco/lm.py b/src/ecco/lm.py index 26517e5..208170e 100644 --- a/src/ecco/lm.py +++ b/src/ecco/lm.py @@ -114,7 +114,7 @@ def _reset(self): self._hooks = {} def to(self, tensor: Union[torch.Tensor, BatchEncoding]): - return tensor.to(self.device).to(self.model.dtype) + return tensor.to(self.device) def _analyze_token(self, encoder_input_embeds: torch.Tensor, @@ -519,7 +519,7 @@ def _get_embeddings(self, input_ids) -> Tuple[torch.FloatTensor, torch.FloatTens vocab_size = embedding_matrix.shape[0] - one_hot_tensor = self.to(_one_hot_batched(input_ids, vocab_size)) + one_hot_tensor = self.to(_one_hot_batched(input_ids, vocab_size)).to(self.model.dtype) token_ids_tensor_one_hot = one_hot_tensor.clone().requires_grad_(True) inputs_embeds = torch.matmul(token_ids_tensor_one_hot, embedding_matrix) From 2c95e5248436fed656c903b9aac85efbda0a2459 Mon Sep 17 00:00:00 2001 From: SumanthRH Date: Mon, 22 Jan 2024 13:17:34 -0800 Subject: [PATCH 3/5] minor edit --- src/ecco/lm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ecco/lm.py b/src/ecco/lm.py index 208170e..3755876 100644 --- a/src/ecco/lm.py +++ b/src/ecco/lm.py @@ -141,7 +141,7 @@ def _analyze_token(self, 'decoder_inputs_embeds': decoder_input_embeds }, prediction_id=prediction_id - ).cpu().detach().numpy() + ).float().cpu().detach().numpy() # cast to float32 before numpy conversion ) def generate(self, input_str: str, From dd8a7d7ffd05a7dfa772db19d12a2707df016b22 Mon Sep 17 00:00:00 2001 From: SumanthRH Date: Mon, 22 Jan 2024 13:20:28 -0800 Subject: [PATCH 4/5] minor edit --- src/ecco/lm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ecco/lm.py b/src/ecco/lm.py index 3755876..0190da6 100644 --- a/src/ecco/lm.py +++ b/src/ecco/lm.py @@ -591,7 +591,7 @@ def _get_activations_hook(self, name: str, input_): # overwrite the previous step activations. This collects all activations in the last step # Assuming all input tokens are presented as input, no "past" # The inputs to c_proj already pass through the gelu activation function - self._all_activations_dict[layer_type][layer_number] = input_[0].detach().cpu().numpy() + self._all_activations_dict[layer_type][layer_number] = input_[0].detach().float().cpu().numpy() def _inhibit_neurons_hook(self, name: str, input_tensor): """ From cbe80fe75c8ddda39698cb0f7da07d0c389d82ff Mon Sep 17 00:00:00 2001 From: SumanthRH Date: Mon, 22 Jan 2024 13:29:31 -0800 Subject: [PATCH 5/5] update output casting --- src/ecco/output.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/ecco/output.py b/src/ecco/output.py index b42b9ad..a7a8c89 100644 --- a/src/ecco/output.py +++ b/src/ecco/output.py @@ -112,9 +112,7 @@ def __str__(self): return "".format(self.output_text, len(self._get_hidden_states()[1][-1])) def to(self, tensor: torch.Tensor): - if self.device == 'cuda': - return tensor.to('cuda') - return tensor + return tensor.to(self.device) def explorable(self, printJson: Optional[bool] = False): @@ -394,7 +392,7 @@ def layer_predictions(self, position: int = 1, topk: Optional[int] = 10, layer: layer_top_tokens = [self.tokenizer.decode(t) for t in sorted_softmax[-k:]][::-1] top_tokens.append(layer_top_tokens) - layer_probs = softmax[sorted_softmax[-k:]].cpu().detach().numpy()[::-1] + layer_probs = softmax[sorted_softmax[-k:]].float().cpu().detach().numpy()[::-1] probs.append(layer_probs.tolist()) # Package in output format