From 0dea35c4e247265f9efd1472fde60867c81c028f Mon Sep 17 00:00:00 2001
From: AlexDuvalinho <afonsalex@free.fr>
Date: Thu, 18 Apr 2024 04:32:38 -0400
Subject: [PATCH 01/27] fix cluster issue gpus-per-task

---
 mila/sbatch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mila/sbatch.py b/mila/sbatch.py
index f2cc71538..ed8fa878d 100644
--- a/mila/sbatch.py
+++ b/mila/sbatch.py
@@ -41,7 +41,7 @@
     conda activate {env}
 fi
 {wandb_offline}
-srun --gpus-per-task=1 --output={output} {python_command}
+srun --output={output} {python_command}
 """
 
 

From 904fea19b457440d405e13321f3af68497dd9fe9 Mon Sep 17 00:00:00 2001
From: AlexDuvalinho <afonsalex@free.fr>
Date: Thu, 18 Apr 2024 08:35:34 -0400
Subject: [PATCH 02/27] new yaml configs

---
 configs/exps/deup/datasets/new-mc-faenet.yaml | 28 ++++++++++++
 configs/exps/deup/gnn/depfaenet.yaml          |  0
 configs/exps/deup/gnn/faenet-training.yaml    | 43 +++++++++++++++++++
 configs/exps/deup/uncertainty/v0.yaml         |  1 -
 configs/exps/deup/uncertainty/v1.yaml         | 33 ++++++++++++++
 5 files changed, 104 insertions(+), 1 deletion(-)
 create mode 100644 configs/exps/deup/datasets/new-mc-faenet.yaml
 create mode 100644 configs/exps/deup/gnn/depfaenet.yaml
 create mode 100644 configs/exps/deup/gnn/faenet-training.yaml
 create mode 100644 configs/exps/deup/uncertainty/v1.yaml

diff --git a/configs/exps/deup/datasets/new-mc-faenet.yaml b/configs/exps/deup/datasets/new-mc-faenet.yaml
new file mode 100644
index 000000000..95aab7a29
--- /dev/null
+++ b/configs/exps/deup/datasets/new-mc-faenet.yaml
@@ -0,0 +1,28 @@
+job:
+  mem: 32GB
+  cpus: 4
+  gres: gpu:1
+  partition: long
+
+default:
+  config: faenet-is2re-all
+  wandb_project: ocp-deup
+  wandb_tags: base-model, MC-D, 2935198
+  test_ri: True
+  mode: train
+  checkpoint: /network/scratch/a/alexandre.duval/ocp/runs/2935198/checkpoints/best_checkpoint.pt
+  restart_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/2935198
+  model:
+    dropout_lowest_layer: output
+    first_trainable_layer: dropout
+    dropout_lin: 0.7
+  cp_data_to_tmpdir: true
+  inference_time_loops: 1
+  deup_dataset:
+    create: after # "before" -> created before training (for deup) "after" -> created after training (for is2re) "" - not created
+    dataset_strs: ["train", "val_id", "val_ood_cat", "val_ood_ads"]
+    n_samples: 7
+
+runs:
+  - optim:
+      max_epochs: 12
diff --git a/configs/exps/deup/gnn/depfaenet.yaml b/configs/exps/deup/gnn/depfaenet.yaml
new file mode 100644
index 000000000..e69de29bb
diff --git a/configs/exps/deup/gnn/faenet-training.yaml b/configs/exps/deup/gnn/faenet-training.yaml
new file mode 100644
index 000000000..5e5575263
--- /dev/null
+++ b/configs/exps/deup/gnn/faenet-training.yaml
@@ -0,0 +1,43 @@
+job:
+  mem: 32GB
+  cpus: 4
+  gres: gpu:1
+  partition: long
+  time: 18:00:00
+
+default:
+  test_ri: True
+  mode: train
+  graph_rewiring: remove-tag-0
+  wandb_tags: "top-model"
+  wandb_project: ocp-deup
+  optim:
+    batch_size: 256
+    eval_batch_size: 256
+  cp_data_to_tmpdir: True
+
+runs:
+  - config: faenet-is2re-all
+    note: "top-runs"
+    frame_averaging: 2D
+    fa_method: se3-random
+    model:
+      mp_type: updownscale_base
+      phys_embeds: True
+      tag_hidden_channels: 32
+      pg_hidden_channels: 96
+      energy_head: weighted-av-final-embeds
+      complex_mp: True
+      graph_norm: True
+      hidden_channels: 384
+      num_filters: 480
+      num_gaussians: 104
+      num_interactions: 5
+      second_layer_MLP: False
+      skip_co: concat
+      cutoff: 6.0
+    optim:
+      lr_initial: 0.002
+      scheduler: LinearWarmupCosineAnnealingLR
+      max_epochs: 12
+      eval_every: 0.25
\ No newline at end of file
diff --git a/configs/exps/deup/uncertainty/v0.yaml b/configs/exps/deup/uncertainty/v0.yaml
index 94597ddaf..4cdd6d802 100644
--- a/configs/exps/deup/uncertainty/v0.yaml
+++ b/configs/exps/deup/uncertainty/v0.yaml
@@ -3,7 +3,6 @@ job:
   cpus: 4
   gres: gpu:rtx8000:1
   partition: long
-  code_loc: /home/mila/s/schmidtv/ocp-project/run-repos/ocp-3
 
 default:
   config: deup_faenet-deup_is2re-all
diff --git a/configs/exps/deup/uncertainty/v1.yaml b/configs/exps/deup/uncertainty/v1.yaml
new file mode 100644
index 000000000..4f69d7828
--- /dev/null
+++ b/configs/exps/deup/uncertainty/v1.yaml
@@ -0,0 +1,33 @@
+job:
+  mem: 32GB
+  cpus: 4
+  gres: gpu:1
+  partition: long
+
+default:
+  config: deup_faenet-deup_is2re-all
+
+  wandb_project: ocp-deup
+  wandb_tags: base-model, MC-D, 3264530
+  test_ri: True
+  mode: train
+  model:
+    dropout_lowest_layer: null
+    first_trainable_layer: output
+    dropout_lin: 0.7
+  cp_data_to_tmpdir: false
+  inference_time_loops: 1
+  restart_from_dir: /network/scratch/s/schmidtv/ocp/runs/3264530
+  checkpoint: /network/scratch/s/schmidtv/ocp/runs/3264530
+  dataset: # mandatory if restart_from_dir is set
+    default_val: deup-val_ood_cat-val_ood_ads
+    deup-train-val_id:
+      src: /network/scratch/s/schmidtv/ocp/runs/3264530/deup_dataset
+    deup-val_ood_cat-val_ood_ads:
+      src: /network/scratch/s/schmidtv/ocp/runs/3264530/deup_dataset
+  deup_dataset:
+    create: False
+
+runs:
+  - optim:
+      max_epochs: 12

From 7c481391ac6cbe5a77ee068518296e053dcf8930 Mon Sep 17 00:00:00 2001
From: AlexDuvalinho <afonsalex@free.fr>
Date: Thu, 18 Apr 2024 09:30:24 -0400
Subject: [PATCH 03/27] update path trained gnn model

---
 configs/exps/deup/datasets/new-mc-faenet.yaml | 6 +++---
 ocpmodels/datasets/deup_dataset_creator.py    | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/configs/exps/deup/datasets/new-mc-faenet.yaml b/configs/exps/deup/datasets/new-mc-faenet.yaml
index 95aab7a29..56ea29868 100644
--- a/configs/exps/deup/datasets/new-mc-faenet.yaml
+++ b/configs/exps/deup/datasets/new-mc-faenet.yaml
@@ -7,11 +7,11 @@ job:
 default:
   config: faenet-is2re-all
   wandb_project: ocp-deup
-  wandb_tags: base-model, MC-D, 2935198
+  wandb_tags: base-model, MC-D, 4615191
   test_ri: True
   mode: train
-  checkpoint: /network/scratch/a/alexandre.duval/ocp/runs/2935198/checkpoints/best_checkpoint.pt
-  restart_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/2935198
+  checkpoint: /network/scratch/a/alexandre.duval/scratch/ocp/runs/4615191/checkpoints/best_checkpoint.pt
+  restart_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4615191/
   model:
     dropout_lowest_layer: output
     first_trainable_layer: dropout
diff --git a/ocpmodels/datasets/deup_dataset_creator.py b/ocpmodels/datasets/deup_dataset_creator.py
index 64d67fd16..4bc6a8bc0 100644
--- a/ocpmodels/datasets/deup_dataset_creator.py
+++ b/ocpmodels/datasets/deup_dataset_creator.py
@@ -431,7 +431,7 @@ def write_lmdb(self, samples, path, total_size=-1, max_samples=-1):
     from ocpmodels.datasets.lmdb_dataset import DeupDataset
     from ocpmodels.common.utils import JOB_ID, RUNS_DIR, make_config_from_conf_str
 
-    base_trainer_path = "/network/scratch/s/schmidtv/ocp/runs/3298908"
+    base_trainer_path = "/network/scratch/a/alexandre.duval/ocp/runs/4615191"
 
     # what models to load for inference
     trainers_conf = {

From 72ae772108c95dea3dc5b0501bb46bc813261fef Mon Sep 17 00:00:00 2001
From: AlexDuvalinho <afonsalex@free.fr>
Date: Thu, 18 Apr 2024 09:30:41 -0400
Subject: [PATCH 04/27] fa_frames => fa_method

---
 configs/models/deup_faenet.yaml            | 2 +-
 ocpmodels/datasets/deup_dataset_creator.py | 2 +-
 ocpmodels/datasets/lmdb_dataset.py         | 6 +++---
 scripts/train_density_estimator.py         | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/configs/models/deup_faenet.yaml b/configs/models/deup_faenet.yaml
index 2284687e2..efa779c80 100644
--- a/configs/models/deup_faenet.yaml
+++ b/configs/models/deup_faenet.yaml
@@ -57,7 +57,7 @@ default:
     energy_coefficient: 1
 
   frame_averaging: False # 2D, 3D, da, False
-  fa_frames: False # can be {None, full, random, det, e3, e3-random, e3-det}
+  fa_method: False # can be {None, full, random, det, e3, e3-random, e3-det}
 
 # -------------------
 # -----  IS2RE  -----
diff --git a/ocpmodels/datasets/deup_dataset_creator.py b/ocpmodels/datasets/deup_dataset_creator.py
index 4bc6a8bc0..1af5cdf8f 100644
--- a/ocpmodels/datasets/deup_dataset_creator.py
+++ b/ocpmodels/datasets/deup_dataset_creator.py
@@ -167,7 +167,7 @@ def load_trainers(self, overrides={}):
 
         shared_config = {}
         shared_config["graph_rewiring"] = self.trainers[0].config["graph_rewiring"]
-        shared_config["fa_frames"] = self.trainers[0].config["fa_frames"]
+        shared_config["fa_method"] = self.trainers[0].config["fa_method"]
         shared_config["frame_averaging"] = self.trainers[0].config["frame_averaging"]
 
         # Done!
diff --git a/ocpmodels/datasets/lmdb_dataset.py b/ocpmodels/datasets/lmdb_dataset.py
index e4ea6bd7b..8f8fb2444 100644
--- a/ocpmodels/datasets/lmdb_dataset.py
+++ b/ocpmodels/datasets/lmdb_dataset.py
@@ -37,7 +37,7 @@ class LmdbDataset(Dataset):
             config (dict): Dataset configuration
             transform (callable, optional): Data transform function.
                     (default: :obj:`None`)
-            fa_frames (str, optional): type of frame averaging method applied, if any.
+            fa_method (str, optional): type of frame averaging method applied, if any.
             adsorbates (str, optional): comma-separated list of adsorbates to filter.
                     If None or "all", no filtering is applied.
                     (default: None)
@@ -49,7 +49,7 @@ def __init__(
         self,
         config,
         transform=None,
-        fa_frames=None,
+        fa_method=None,
         lmdb_glob=None,
         adsorbates=None,
         adsorbates_ref_dir=None,
@@ -96,7 +96,7 @@ def __init__(
 
         self.filter_per_adsorbates()
         self.transform = transform
-        self.fa_method = fa_frames
+        self.fa_method = fa_method
 
     def filter_per_adsorbates(self):
         """Filter the dataset to only include structures with a specific
diff --git a/scripts/train_density_estimator.py b/scripts/train_density_estimator.py
index a7a45b327..b5f5bc491 100644
--- a/scripts/train_density_estimator.py
+++ b/scripts/train_density_estimator.py
@@ -303,7 +303,7 @@ def validate(epoch, model, loader):
             "num_workers": 0,
         },
         "frame_averaging": None,
-        "fa_frames": None,
+        "fa_method": None,
         "silent": False,
         "graph_rewiring": "remove-tag-0",
         "de": {

From 040b475d479cd3a3ed7f48f1b357456fa4e8cf94 Mon Sep 17 00:00:00 2001
From: AlexDuvalinho <afonsalex@free.fr>
Date: Thu, 18 Apr 2024 09:54:07 -0400
Subject: [PATCH 05/27] skip_co = concat  is not possible

---
 .../exps/deup/gnn/{depfaenet.yaml => depfaenet-training.yaml} | 0
 configs/exps/deup/gnn/faenet-training.yaml                    | 4 ++--
 ocpmodels/datasets/deup_dataset_creator.py                    | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)
 rename configs/exps/deup/gnn/{depfaenet.yaml => depfaenet-training.yaml} (100%)

diff --git a/configs/exps/deup/gnn/depfaenet.yaml b/configs/exps/deup/gnn/depfaenet-training.yaml
similarity index 100%
rename from configs/exps/deup/gnn/depfaenet.yaml
rename to configs/exps/deup/gnn/depfaenet-training.yaml
diff --git a/configs/exps/deup/gnn/faenet-training.yaml b/configs/exps/deup/gnn/faenet-training.yaml
index 5e5575263..8bf38ec5f 100644
--- a/configs/exps/deup/gnn/faenet-training.yaml
+++ b/configs/exps/deup/gnn/faenet-training.yaml
@@ -18,7 +18,7 @@ default:
 
 runs:
   - config: faenet-is2re-all
-    note: "top-runs"
+    note: "top run no concat"
     frame_averaging: 2D
     fa_method: se3-random
     model:
@@ -34,7 +34,7 @@ runs:
       num_gaussians: 104
       num_interactions: 5
       second_layer_MLP: False
-      skip_co: concat
+      skip_co: False
       cutoff: 6.0
     optim:
       lr_initial: 0.002
diff --git a/ocpmodels/datasets/deup_dataset_creator.py b/ocpmodels/datasets/deup_dataset_creator.py
index 1af5cdf8f..b57522422 100644
--- a/ocpmodels/datasets/deup_dataset_creator.py
+++ b/ocpmodels/datasets/deup_dataset_creator.py
@@ -306,6 +306,7 @@ def create_deup_dataset(
 
         stats = {d: {} for d in dataset_strs}
 
+        # Loop on train, val_id, val_ood_cat, val_ood_ads
         for dataset_name in dataset_strs:
             deup_samples = []
             deup_ds_size = 0

From 83659c643d744a1f156b6c6c931930a58615a967 Mon Sep 17 00:00:00 2001
From: AlexDuvalinho <afonsalex@free.fr>
Date: Fri, 19 Apr 2024 05:55:16 -0400
Subject: [PATCH 06/27] Merge only relevant changed from disconnected_gnn
 branch, to run depfaenet

---
 configs/exps/catalyst/gflownet.yaml          | 143 ++++++++++
 configs/exps/catalyst/reproduce-configs.yaml |  75 +++++
 configs/models/depfaenet.yaml                | 271 +++++++++++++++++++
 configs/models/painn.yaml                    |   3 +
 mila/sbatch.py                               |  11 +-
 ocpmodels/common/flags.py                    |  18 +-
 ocpmodels/datasets/data_transforms.py        |  30 ++
 ocpmodels/models/__init__.py                 |   1 +
 ocpmodels/models/base_model.py               |  26 +-
 ocpmodels/models/depfaenet.py                |  97 +++++++
 ocpmodels/preprocessing/graph_rewiring.py    |   5 +
 ocpmodels/trainers/base_trainer.py           |  32 ++-
 ocpmodels/trainers/single_trainer.py         |   8 +-
 scripts/debug_faenet.py                      | 222 +++++++++++++++
 14 files changed, 929 insertions(+), 13 deletions(-)
 create mode 100644 configs/exps/catalyst/gflownet.yaml
 create mode 100644 configs/exps/catalyst/reproduce-configs.yaml
 create mode 100644 configs/models/depfaenet.yaml
 create mode 100644 ocpmodels/models/depfaenet.py
 create mode 100644 scripts/debug_faenet.py

diff --git a/configs/exps/catalyst/gflownet.yaml b/configs/exps/catalyst/gflownet.yaml
new file mode 100644
index 000000000..2432f4733
--- /dev/null
+++ b/configs/exps/catalyst/gflownet.yaml
@@ -0,0 +1,143 @@
+job:
+  mem: 32GB
+  cpus: 4
+  gres: gpu:rtx8000:1
+  partition: long
+  time: 15:00:00
+
+default:
+  # wandb_name: alvaro-carbonero-math
+  wandb_project: ocp-alvaro
+  wandb_tags: "gflownet-model"
+  test_ri: True
+  mode: train
+  # graph_rewiring: remove-tag-0
+  graph_rewiring: ""
+  frame_averaging: 2D
+  fa_method: se3-random
+  cp_data_to_tmpdir: True
+  is_disconnected: true
+  model:
+    edge_embed_type: all_rij
+    mp_type: updownscale_base
+    phys_embeds: True
+    tag_hidden_channels: 0
+    pg_hidden_channels: 96
+    energy_head: weighted-av-final-embeds
+    complex_mp: True
+    graph_norm: True
+    hidden_channels: 352
+    num_filters: 288
+    num_gaussians: 68
+    num_interactions: 5
+    second_layer_MLP: False
+    skip_co: concat
+    cutoff: 4.0
+  optim:
+    batch_size: 256
+    eval_batch_size: 256
+    lr_initial: 0.002
+    scheduler: LinearWarmupCosineAnnealingLR
+    max_epochs: 9
+    eval_every: 0.4
+
+runs:
+ 
+  # - config: faenet-is2re-all
+  #   note: baseline faenet
+
+  # - config: depfaenet-is2re-all
+  #   note: depfaenet baseline
+
+  # - config: depfaenet-is2re-all
+  #   note: depfaenet per-adsorbate
+  #   adsorbates: {'*O', '*OH', '*OH2', '*H'}
+
+  # - config: depfaenet-is2re-all
+  #   note: depfaenet per-adsorbate long string
+  #   adsorbates: '*O, *OH, *OH2, *H'
+
+  # - config: depfaenet-is2re-all
+  #   note: depfaenet per-adsorbate string of a list
+  #   adsorbates: "*O, *OH, *OH2, *H"
+
+  # - config: depfaenet-is2re-all
+  #   note: Trained on selected adsorbate more epochs
+  #   adsorbates: "*O, *OH, *OH2, *H"
+  #   optim:
+  #     max_epochs: 10
+
+  # - config: depfaenet-is2re-all
+  #   note: depfaenet full data
+
+  # - config: depfaenet-is2re-all
+  #   note: To be used for continue from dir
+
+  # - config: depfaenet-is2re-all
+  #   note: Fine-tune on per-ads-dataset 4 epoch
+  #   continue_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4023244
+  #   adsorbates: "*O, *OH, *OH2, *H"
+  #   optim:
+  #     max_epochs: 4
+  #     lr_initial: 0.00015
+
+  # - config: depfaenet-is2re-all
+  #   note: Fine-tune on per-ads-dataset 10 epoch
+  #   continue_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4023244
+  #   adsorbates: "*O, *OH, *OH2, *H"
+  #   optim:
+  #     max_epochs: 10
+  #     lr_initial: 0.00015
+
+  - config: depfaenet-is2re-all
+    note: Fine-tune on per-ads-dataset 10 epoch
+    continue_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4023244
+    adsorbates: "*O, *OH, *OH2, *H"
+    optim:
+      max_epochs: 20
+      lr_initial: 0.0001
+
+  - config: depfaenet-is2re-all
+    note: Fine-tune on per-ads-dataset 20 epoch
+    continue_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4023244
+    adsorbates: "*O, *OH, *OH2, *H"
+    optim:
+      max_epochs: 20
+      lr_initial: 0.00015
+
+  - config: depfaenet-is2re-all
+    note: Fine-tune on per-ads-dataset 15 epoch
+    continue_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4023244
+    adsorbates: "*O, *OH, *OH2, *H"
+    optim:
+      max_epochs: 15
+      lr_initial: 0.0002
+
+  - config: depfaenet-is2re-all
+    note: Fine-tune on per-ads-dataset 10 epoch
+    continue_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4023244
+    adsorbates: "*O, *OH, *OH2, *H"
+    optim:
+      max_epochs: 10
+      lr_initial: 0.0001
+
+  - config: depfaenet-is2re-all
+    note: Fine-tune on per-ads-dataset starting from fine-tuned model
+    continue_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4071859
+    adsorbates: "*O, *OH, *OH2, *H"
+    optim:
+      max_epochs: 10
+      lr_initial: 0.0001
+
+  - config: depfaenet-is2re-all
+    note: Trained on selected adsorbate
+    adsorbates: "*O, *OH, *OH2, *H"
+    optim:
+      max_epochs: 25
+      lr_initial: 0.0001
+
+  - config: depfaenet-is2re-all
+    note: Trained on selected adsorbate
+    adsorbates: "*O, *OH, *OH2, *H"
+    optim:
+      max_epochs: 25
diff --git a/configs/exps/catalyst/reproduce-configs.yaml b/configs/exps/catalyst/reproduce-configs.yaml
new file mode 100644
index 000000000..c4c834585
--- /dev/null
+++ b/configs/exps/catalyst/reproduce-configs.yaml
@@ -0,0 +1,75 @@
+job:
+  mem: 32GB
+  cpus: 4
+  gres: gpu:rtx8000:1
+  partition: long
+  time: 15:00:00
+
+default:
+  # wandb_name: alvaro-carbonero-math
+  wandb_project: ocp-alvaro
+  wandb_tags: "reproduce-best-config"
+  test_ri: True
+  mode: train
+  graph_rewiring: remove-tag-0
+  note: "repoduce-top-run"
+  frame_averaging: 2D
+  fa_method: se3-random
+  cp_data_to_tmpdir: True
+  is_disconnected: true
+  model:
+    edge_embed_type: all_rij
+    mp_type: updownscale_base
+    phys_embeds: True
+    tag_hidden_channels: 32
+    pg_hidden_channels: 96
+    energy_head: weighted-av-final-embeds
+    complex_mp: True
+    graph_norm: True
+    hidden_channels: 352
+    num_filters: 288
+    num_gaussians: 68
+    num_interactions: 5
+    second_layer_MLP: False
+    skip_co: concat
+    cutoff: 4.0
+  optim:
+    batch_size: 256
+    eval_batch_size: 256
+    lr_initial: 0.002
+    scheduler: LinearWarmupCosineAnnealingLR
+    max_epochs: 9
+    eval_every: 0.4
+
+runs:
+ 
+  - config: faenet-is2re-all
+    note: baseline faenet
+
+  - config: indfaenet-is2re-all
+    note: baseline with top configs 
+
+  - config: indfaenet-is2re-all
+    note: baseline with runs' configs 
+    model: 
+      tag_hidden_channels: 32
+      pg_hidden_channels: 96
+      energy_head: weighted-av-final-embeds
+      complex_mp: True
+      graph_norm: True
+      hidden_channels: 528
+      num_filters: 672
+      num_gaussians: 148
+      num_interactions: 5
+      second_layer_MLP: False
+      skip_co: concat
+
+  - config: depfaenet-is2re-all
+    note: baseline with top configs
+
+  - config: indfaenet-is2re-all
+    note: so that ads get old dimensions
+    model:
+      hidden_channels: 704 
+      num_gaussians: 200
+      num_filters: 896
\ No newline at end of file
diff --git a/configs/models/depfaenet.yaml b/configs/models/depfaenet.yaml
new file mode 100644
index 000000000..852ebc3bf
--- /dev/null
+++ b/configs/models/depfaenet.yaml
@@ -0,0 +1,271 @@
+default:
+  model:
+    name: depfaenet
+    act: swish
+    hidden_channels: 128
+    num_filters: 100
+    num_interactions: 3
+    num_gaussians: 100
+    cutoff: 6.0
+    use_pbc: True
+    regress_forces: False
+    # drlab attributes:
+    tag_hidden_channels: 0 # 32
+    pg_hidden_channels: 0 # 32 -> period & group embedding hidden channels
+    phys_embeds: False # True
+    phys_hidden_channels: 0
+    energy_head: False # can be {False, weighted-av-initial-embeds, weighted-av-final-embeds, pooling, graclus, random}
+    # faenet new features
+    skip_co: False # output skip connections {False, "add", "concat"}
+    second_layer_MLP: False # in EmbeddingBlock
+    complex_mp: False
+    edge_embed_type: rij # {'rij','all_rij','sh', 'all'})
+    mp_type: base # {'base', 'simple', 'updownscale', 'att', 'base_with_att', 'local_env'}
+    graph_norm: False  # bool
+    att_heads: 1  # int
+    force_decoder_type: "mlp" # can be {"" or "simple"} | only used if regress_forces is True
+    force_decoder_model_config:
+      simple:
+        hidden_channels: 128
+        norm: batch1d # batch1d, layer or null
+      mlp:
+        hidden_channels: 256
+        norm: batch1d # batch1d, layer or null
+      res:
+        hidden_channels: 128
+        norm: batch1d # batch1d, layer or null
+      res_updown:
+        hidden_channels: 128
+        norm: batch1d # batch1d, layer or null
+  optim:
+    batch_size: 64
+    eval_batch_size: 64
+    num_workers: 4
+    lr_gamma: 0.1
+    lr_initial: 0.001
+    warmup_factor: 0.2
+    max_epochs: 20
+    energy_grad_coefficient: 10
+    force_coefficient: 30
+    energy_coefficient: 1
+
+  frame_averaging: False # 2D, 3D, da, False
+  fa_frames: False # can be {None, full, random, det, e3, e3-random, e3-det}
+
+# -------------------
+# -----  IS2RE  -----
+# -------------------
+
+is2re:
+  # *** Important note ***
+  #   The total number of gpus used for this run was 1.
+  #   If the global batch size (num_gpus * batch_size) is modified
+  #   the lr_milestones and warmup_steps need to be adjusted accordingly.
+  10k:
+    optim:
+      lr_initial: 0.005
+      lr_milestones: # epochs at which lr_initial <- lr_initial * lr_gamma
+        - 1562
+        - 2343
+        - 3125
+      warmup_steps: 468
+      max_epochs: 20
+
+  100k:
+    model:
+      hidden_channels: 256
+    optim:
+      lr_initial: 0.005
+      lr_milestones: # epochs at which lr_initial <- lr_initial * lr_gamma
+        - 1562
+        - 2343
+        - 3125
+      warmup_steps: 468
+      max_epochs: 20
+
+  all:
+    model:
+      hidden_channels: 384
+      num_interactions: 4
+    optim:
+      batch_size: 256
+      eval_batch_size: 256
+      lr_initial: 0.001
+      lr_gamma: 0.1
+      lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
+        - 18000
+        - 27000
+        - 37000
+      warmup_steps: 6000
+      max_epochs: 20
+
+# ------------------
+# -----  S2EF  -----
+# ------------------
+
+# For 2 GPUs
+
+s2ef:
+  default:
+    model:
+      num_interactions: 4
+      hidden_channels: 750
+      num_gaussians: 200
+      num_filters: 256
+      regress_forces: "direct"
+      force_coefficient: 30
+      energy_grad_coefficient: 10
+    optim:
+      batch_size: 96
+      eval_batch_size: 96
+      warmup_factor: 0.2
+      lr_gamma: 0.1
+      lr_initial: 0.0001
+      max_epochs: 15
+      warmup_steps: 30000
+      lr_milestones:
+        - 55000
+        - 75000
+        - 10000
+
+  200k: {}
+
+  # 1 gpus
+  2M:
+    model:
+      num_interactions: 5
+      hidden_channels: 1024
+      num_gaussians: 200
+      num_filters: 256
+    optim:
+      batch_size: 192
+      eval_batch_size: 192
+
+  20M: {}
+
+  all: {}
+
+qm9:
+  default:
+    model:
+      act: swish
+      att_heads: 1
+      complex_mp: true
+      cutoff: 6.0
+      edge_embed_type: all_rij
+      energy_head: ''
+      graph_norm: true
+      graph_rewiring: null
+      hidden_channels: 400
+      max_num_neighbors: 30
+      mp_type: updownscale_base
+      num_filters: 480
+      num_gaussians: 100
+      num_interactions: 5
+      otf_graph: false
+      pg_hidden_channels: 32
+      phys_embeds: false
+      phys_hidden_channels: 0
+      regress_forces: ''
+      second_layer_MLP: true
+      skip_co: true
+      tag_hidden_channels: 0
+      use_pbc: false
+
+    optim:
+      batch_size: 64
+      es_min_abs_change: 1.0e-06
+      es_patience: 20
+      es_warmup_epochs: 600
+      eval_batch_size: 64
+      factor: 0.9
+      lr_initial: 0.0003
+      loss_energy: mse
+      lr_gamma: 0.1
+      lr_initial: 0.001
+      max_epochs: 1500
+      min_lr: 1.0e-06
+      mode: min
+      optimizer: AdamW
+      patience: 15
+      scheduler: ReduceLROnPlateau
+      threshold: 0.0001
+      threshold_mode: abs
+      verbose: true
+      warmup_factor: 0.2
+      warmup_steps: 3000
+
+  10k: {}
+  all: {}
+
+qm7x:
+  default:
+    model: # SOTA settings
+      act: swish
+      att_heads: 1
+      complex_mp: true
+      cutoff: 5.0
+      edge_embed_type: all_rij
+      energy_head: false
+      force_decoder_model_config:
+        mlp:
+          hidden_channels: 256
+          norm: batch1d
+        res:
+          hidden_channels: 128
+          norm: batch1d
+        res_updown:
+          hidden_channels: 128
+          norm: layer
+        simple:
+          hidden_channels: 128
+          norm: batch1d
+      force_decoder_type: res_updown
+      graph_norm: false
+      hidden_channels: 500
+      max_num_neighbors: 40
+      mp_type: updownscale_base
+      num_filters: 400
+      num_gaussians: 50
+      num_interactions: 5
+      otf_graph: false
+      pg_hidden_channels: 32
+      phys_embeds: true
+      phys_hidden_channels: 0
+      regress_forces: direct_with_gradient_target
+      second_layer_MLP: true
+      skip_co: false
+      tag_hidden_channels: 0
+      use_pbc: false
+
+    optim:
+      batch_size: 100
+      energy_grad_coefficient: 5
+      eval_batch_size: 100
+      eval_every: 0.34
+      factor: 0.75
+      force_coefficient: 75
+      loss_energy: mae
+      loss_force: mse
+      lr_gamma: 0.1
+      lr_initial: 0.000193
+      max_steps: 4000000
+      min_lr: 1.0e-06
+      mode: min
+      optimizer: AdamW
+      scheduler: ReduceLROnPlateau
+      threshold: 0.001
+      threshold_mode: abs
+      verbose: true
+      warmup_factor: 0.2
+      warmup_steps: 3000
+
+  all: {}
+  1k: {}
+
+qm9:
+  default:
+    model:
+      use_pbc: False
+  all: {}
+  10k: {}
diff --git a/configs/models/painn.yaml b/configs/models/painn.yaml
index 2c0abac11..c138652a8 100644
--- a/configs/models/painn.yaml
+++ b/configs/models/painn.yaml
@@ -2,6 +2,9 @@ default:
   model:
     name: painn
     use_pbc: True
+  optim:
+    num_workers: 4
+    eval_batch_size: 64
 
 # -------------------
 # -----  IS2RE  -----
diff --git a/mila/sbatch.py b/mila/sbatch.py
index ed8fa878d..b6417adf1 100644
--- a/mila/sbatch.py
+++ b/mila/sbatch.py
@@ -1,12 +1,13 @@
-from minydra import resolved_args, MinyDict
-from pathlib import Path
-from datetime import datetime
 import os
+import re
 import subprocess
-from shutil import copyfile
 import sys
-import re
+from datetime import datetime
+from pathlib import Path
+from shutil import copyfile
+
 import yaml
+from minydra import MinyDict, resolved_args
 
 IS_DRAC = (
     "narval.calcul.quebec" in os.environ.get("HOSTNAME", "")
diff --git a/ocpmodels/common/flags.py b/ocpmodels/common/flags.py
index a6fbf20d0..761e61dac 100644
--- a/ocpmodels/common/flags.py
+++ b/ocpmodels/common/flags.py
@@ -87,12 +87,14 @@ def add_core_args(self):
             "--checkpoint", type=str, help="Model checkpoint to load"
         )
         self.parser.add_argument(
-            "--continue_from_dir", type=str, help="Run to continue, loading its config"
+            "--continue_from_dir",
+            type=str,
+            help="Continue an existing run, loading its config and overwriting desired arguments",
         )
         self.parser.add_argument(
             "--restart_from_dir",
             type=str,
-            help="Run to restart, loading its config and overwriting "
+            help="Restart training from an existing run, loading its config and overwriting args"
             + "from the command-line",
         )
         self.parser.add_argument(
@@ -293,6 +295,18 @@ def add_core_args(self):
             help="Number of validation loops to run in order to collect inference"
             + " timing stats",
         )
+        self.parser.add_argument(
+            "--is_disconnected",
+            type=bool,
+            default=False,
+            help="Eliminates edges between catalyst and adsorbate.",
+        )
+        self.parser.add_argument(
+            "--lowest_energy_only",
+            type=bool,
+            default=False,
+            help="Makes trainer use the lowest energy data point for every (catalyst, adsorbate, cell) tuple. ONLY USE WITH ALL DATASET",
+        )
 
 
 flags = Flags()
diff --git a/ocpmodels/datasets/data_transforms.py b/ocpmodels/datasets/data_transforms.py
index 6c26d2a9a..17a63dfa5 100644
--- a/ocpmodels/datasets/data_transforms.py
+++ b/ocpmodels/datasets/data_transforms.py
@@ -127,6 +127,35 @@ def __call__(self, data):
         return self.rewiring_func(data)
 
 
+class Disconnected(Transform):
+    def __init__(self, is_disconnected=False) -> None:
+        self.inactive = not is_disconnected
+
+    def edge_classifier(self, edge_index, tags):
+        edges_with_tags = tags[
+            edge_index.type(torch.long)
+        ]  # Tensor with shape=edge_index.shape where every entry is a tag
+        filt1 = edges_with_tags[0] == edges_with_tags[1]
+        filt2 = (edges_with_tags[0] != 2) * (edges_with_tags[1] != 2)
+
+        # Edge is removed if tags are different (R1), and at least one end has tag 2 (R2). We want ~(R1*R2) = ~R1+~R2.
+        # filt1 = ~R1. Let L1 be that head has tag 2, and L2 is that tail has tag 2. Then R2 = L1+L2, so ~R2 = ~L1*~L2 = filt2.
+
+        return filt1 + filt2
+
+    def __call__(self, data):
+        if self.inactive:
+            return data
+
+        values = self.edge_classifier(data.edge_index, data.tags)
+
+        data.edge_index = data.edge_index[:, values]
+        data.cell_offsets = data.cell_offsets[values, :]
+        data.distances = data.distances[values]
+
+        return data
+
+
 class Compose:
     # https://pytorch.org/vision/stable/_modules/torchvision/transforms/transforms.html#Compose
     def __init__(self, transforms):
@@ -167,5 +196,6 @@ def get_transforms(trainer_config):
         AddAttributes(),
         GraphRewiring(trainer_config.get("graph_rewiring")),
         FrameAveraging(trainer_config["frame_averaging"], trainer_config["fa_method"]),
+        Disconnected(trainer_config["is_disconnected"]),
     ]
     return Compose(transforms)
diff --git a/ocpmodels/models/__init__.py b/ocpmodels/models/__init__.py
index a722f7817..c15c217b0 100644
--- a/ocpmodels/models/__init__.py
+++ b/ocpmodels/models/__init__.py
@@ -7,6 +7,7 @@
 from .cgcnn import CGCNN  # noqa: F401
 from .dimenet import DimeNet  # noqa: F401
 from .faenet import FAENet  # noqa: F401
+from .depfaenet import depFAENet  # noqa: F401
 from .gemnet.gemnet import GemNetT  # noqa: F401
 from .dimenet_plus_plus import DimeNetPlusPlus  # noqa: F401
 from .forcenet import ForceNet  # noqa: F401
diff --git a/ocpmodels/models/base_model.py b/ocpmodels/models/base_model.py
index 4a5c84a20..e2df0e737 100644
--- a/ocpmodels/models/base_model.py
+++ b/ocpmodels/models/base_model.py
@@ -4,10 +4,12 @@
 This source code is licensed under the MIT license found in the
 LICENSE file in the root directory of this source tree.
 """
+
 import logging
 
 import torch
 import torch.nn as nn
+from torch_geometric.data import HeteroData
 from torch_geometric.nn import radius_graph
 
 from ocpmodels.common.utils import (
@@ -74,7 +76,14 @@ def forward(self, data, mode="train", regress_forces=None, q=None):
 
         # energy gradient w.r.t. positions will be computed
         if mode == "train" or self.regress_forces == "from_energy":
-            data.pos.requires_grad_(True)
+            if type(data) is list:
+                data[0].pos.requires_grad_(True)
+                data[1].pos.requires_grad_(True)
+            elif type(data[0]) is HeteroData:
+                data["adsorbate"].pos.requires_grad_(True)
+                data["catalyst"].pos.requires_grad_(True)
+            else:
+                data.pos.requires_grad_(True)
 
         # predict energy
         preds = self.energy_forward(data, q=q)
@@ -85,7 +94,20 @@ def forward(self, data, mode="train", regress_forces=None, q=None):
                 forces = self.forces_forward(preds)
 
             if mode == "train" or self.regress_forces == "from_energy":
-                grad_forces = self.forces_as_energy_grad(data.pos, preds["energy"])
+                if (
+                    "gemnet" in self.__class__.__name__.lower()
+                    and self.regress_forces == "from_energy"
+                ):
+                    # gemnet forces are already computed
+                    grad_forces = forces
+                else:
+                    # compute forces from energy gradient
+                    try:
+                        grad_forces = self.forces_as_energy_grad(
+                            data.pos, preds["energy"]
+                        )
+                    except:
+                        grad_forces = self.forces_as_energy_grad(data["adsorbate"].pos)
 
             if self.regress_forces == "from_energy":
                 # predicted forces are the energy gradient
diff --git a/ocpmodels/models/depfaenet.py b/ocpmodels/models/depfaenet.py
new file mode 100644
index 000000000..25f6a0968
--- /dev/null
+++ b/ocpmodels/models/depfaenet.py
@@ -0,0 +1,97 @@
+import torch
+from torch.nn import Linear
+from torch import nn
+from torch_scatter import scatter
+
+from ocpmodels.models.faenet import FAENet
+from ocpmodels.models.faenet import OutputBlock as conOutputBlock
+from ocpmodels.common.registry import registry
+from ocpmodels.common.utils import conditional_grad
+from ocpmodels.models.utils.activations import swish
+
+from torch_geometric.data import Batch
+
+
+class discOutputBlock(conOutputBlock):
+    def __init__(self, energy_head, hidden_channels, act, disconnected_mlp=False):
+        super(discOutputBlock, self).__init__(energy_head, hidden_channels, act)
+
+        # We modify the last output linear function to make the output a vector
+        self.lin2 = Linear(hidden_channels // 2, hidden_channels // 2)
+
+        self.disconnected_mlp = disconnected_mlp
+        if self.disconnected_mlp:
+            self.ads_lin = Linear(hidden_channels // 2, hidden_channels // 2)
+            self.cat_lin = Linear(hidden_channels // 2, hidden_channels // 2)
+
+        # Combines the hidden representation of each to a scalar.
+        self.combination = nn.Sequential(
+            Linear(hidden_channels // 2 * 2, hidden_channels // 2),
+            swish,
+            Linear(hidden_channels // 2, 1),
+        )
+
+    def tags_saver(self, tags):
+        self.current_tags = tags
+
+    def forward(self, h, edge_index, edge_weight, batch, alpha):
+        if (
+            self.energy_head == "weighted-av-final-embeds"
+        ):  # Right now, this is the only available option.
+            alpha = self.w_lin(h)
+
+        elif self.energy_head == "graclus":
+            h, batch = self.graclus(h, edge_index, edge_weight, batch)
+
+        elif self.energy_head in {"pooling", "random"}:
+            h, batch, pooling_loss = self.hierarchical_pooling(
+                h, edge_index, edge_weight, batch
+            )
+
+        # MLP
+        h = self.lin1(h)
+        h = self.lin2(self.act(h))
+
+        if self.energy_head in {
+            "weighted-av-initial-embeds",
+            "weighted-av-final-embeds",
+        }:
+            h = h * alpha
+
+        # We pool separately and then we concatenate.
+        ads = self.current_tags == 2
+        cat = ~ads
+
+        ads_out = scatter(h, batch * ads, dim=0, reduce="add")
+        cat_out = scatter(h, batch * cat, dim=0, reduce="add")
+
+        if self.disconnected_mlp:
+            ads_out = self.ads_lin(ads_out)
+            cat_out = self.cat_lin(cat_out)
+
+        system = torch.cat([ads_out, cat_out], dim=1)
+
+        # Finally, we predict a number.
+        energy = self.combination(system)
+
+        return energy
+
+
+@registry.register_model("depfaenet")
+class depFAENet(FAENet):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+        # We replace the old output block by the new output block
+        self.disconnected_mlp = kwargs.get("disconnected_mlp", False)
+        self.output_block = discOutputBlock(
+            self.energy_head, kwargs["hidden_channels"], self.act, self.disconnected_mlp
+        )
+
+    @conditional_grad(torch.enable_grad())
+    def energy_forward(self, data):
+        # We need to save the tags so this step is necessary.
+        self.output_block.tags_saver(data.tags)
+        pred = super().energy_forward(data)
+
+        return pred
diff --git a/ocpmodels/preprocessing/graph_rewiring.py b/ocpmodels/preprocessing/graph_rewiring.py
index 2f3b103a6..b9115e907 100644
--- a/ocpmodels/preprocessing/graph_rewiring.py
+++ b/ocpmodels/preprocessing/graph_rewiring.py
@@ -36,6 +36,11 @@ def remove_tag0_nodes(data):
     data.tags = data.tags[non_sub]
     if hasattr(data, "pos_relaxed"):
         data.pos_relaxed = data.pos_relaxed[non_sub, :]
+    if hasattr(data, "query"):
+        data.h = data.h[non_sub, :]
+        data.query = data.query[non_sub, :]
+        data.key = data.key[non_sub, :]
+        data.value = data.value[non_sub, :]
 
     # per-edge tensors
     data.edge_index = data.edge_index[:, neither_is_sub]
diff --git a/ocpmodels/trainers/base_trainer.py b/ocpmodels/trainers/base_trainer.py
index ea1537737..e871027ef 100644
--- a/ocpmodels/trainers/base_trainer.py
+++ b/ocpmodels/trainers/base_trainer.py
@@ -8,6 +8,7 @@
 import errno
 import logging
 import os
+import pickle
 import random
 import time
 from abc import ABC, abstractmethod
@@ -24,7 +25,7 @@
 from rich.console import Console
 from rich.table import Table
 from torch.nn.parallel.distributed import DistributedDataParallel
-from torch.utils.data import DataLoader
+from torch.utils.data import DataLoader, Subset
 from torch_geometric.data import Batch
 from tqdm import tqdm
 
@@ -57,6 +58,7 @@
 class BaseTrainer(ABC):
     def __init__(self, load=True, **kwargs):
         run_dir = kwargs["run_dir"]
+
         model_name = kwargs["model"].pop(
             "name", kwargs.get("model_name", "Unknown - base_trainer issue")
         )
@@ -173,9 +175,21 @@ def __init__(self, load=True, **kwargs):
             )
             (run_dir / f"config-{JOB_ID}.yaml").write_text(yaml.dump(self.config))
 
-        if load:
-            self.load()
+        # Here's the models whose edges are removed as a transform
+        transform_models = [
+            "depfaenet",
+        ]
+        if self.config["is_disconnected"]:
+            print("\n\nHeads up: cat-ads edges being removed!")
+        if self.config["model_name"] in transform_models:
+            if not self.config["is_disconnected"]:
+                print(
+                    f"\n\nWhen using {self.config['model_name']},",
+                    "the flag 'is_disconnected' should be used! The flag has been turned on.\n",
+                )
+                self.config["is_disconnected"] = True
 
+        self.load()
         self.evaluator = Evaluator(
             task=self.task_name,
             model_regresses_forces=self.config["model"].get("regress_forces", ""),
@@ -244,6 +258,7 @@ def get_dataloader(self, dataset, sampler):
             pin_memory=True,
             batch_sampler=sampler,
         )
+
         return loader
 
     def load_datasets(self):
@@ -281,6 +296,16 @@ def load_datasets(self):
                     silent=self.silent,
                 )
 
+            if self.config["lowest_energy_only"]:
+                with open(
+                    "/network/scratch/a/alvaro.carbonero/lowest_energy.pkl", "rb"
+                ) as fp:
+                    good_indices = pickle.load(fp)
+                good_indices = list(good_indices)
+
+                self.real_dataset = self.datasets["train"]
+                self.datasets["train"] = Subset(self.datasets["train"], good_indices)
+
             shuffle = False
             if "train" in split:
                 shuffle = True
@@ -402,6 +427,7 @@ def load_model(self):
                 "task_name": self.task_name,
             },
             **self.config["model"],
+            "model_name": self.config["model_name"],
         }
 
         self.model = registry.get_model_class(self.config["model_name"])(
diff --git a/ocpmodels/trainers/single_trainer.py b/ocpmodels/trainers/single_trainer.py
index c8850fe1a..25f82ec9a 100644
--- a/ocpmodels/trainers/single_trainer.py
+++ b/ocpmodels/trainers/single_trainer.py
@@ -227,6 +227,8 @@ def train(
 
         # Calculate start_epoch from step instead of loading the epoch number
         # to prevent inconsistencies due to different batch size in checkpoint.
+        if self.config["continue_from_dir"] is not None and self.config["adsorbates"] not in {None, "all"}:
+            self.step = 0
         start_epoch = self.step // n_train
         max_epochs = self.config["optim"]["max_epochs"]
         timer = Times()
@@ -498,7 +500,11 @@ def end_of_training(
         # Close datasets
         if debug_batches < 0:
             for ds in self.datasets.values():
-                ds.close_db()
+                try:
+                    ds.close_db()
+                except:
+                    assert self.config["lowest_energy_only"] == True
+                    self.real_dataset.close_db()
 
     def model_forward(self, batch_list, mode="train", q=None):
         """Perform a forward pass of the model when frame averaging is applied.
diff --git a/scripts/debug_faenet.py b/scripts/debug_faenet.py
new file mode 100644
index 000000000..56d79c3d6
--- /dev/null
+++ b/scripts/debug_faenet.py
@@ -0,0 +1,222 @@
+"""
+Copyright (c) Facebook, Inc. and its affiliates.
+
+This source code is licensed under the MIT license found in the
+LICENSE file in the root directory of this source tree.
+"""
+
+import logging
+import os
+import time
+import traceback
+import sys
+import torch
+from yaml import dump
+
+from ocpmodels.common import dist_utils
+from ocpmodels.common.flags import flags
+from ocpmodels.common.registry import registry
+from ocpmodels.common.utils import (
+    JOB_ID,
+    auto_note,
+    build_config,
+    merge_dicts,
+    move_lmdb_data_to_slurm_tmpdir,
+    resolve,
+    setup_imports,
+    setup_logging,
+    update_from_sbatch_py_vars,
+    set_min_hidden_channels,
+)
+from ocpmodels.common.orion_utils import (
+    continue_orion_exp,
+    load_orion_exp,
+    sample_orion_hparams,
+)
+from ocpmodels.trainers import BaseTrainer
+
+# os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
+torch.multiprocessing.set_sharing_strategy("file_system")
+
+
+def print_warnings():
+    warnings = [
+        "`max_num_neighbors` is set to 40. This should be tuned per model.",
+        "`tag_specific_weights` is not handled for "
+        + "`regress_forces: direct_with_gradient_target` in compute_loss()",
+    ]
+    print("\n" + "-" * 80 + "\n")
+    print("🛑  OCP-DR-Lab Warnings (nota benes):")
+    for warning in warnings:
+        print(f"  • {warning}")
+    print("Remove warnings when they are fixed in the code/configs.")
+    print("\n" + "-" * 80 + "\n")
+
+
+def wrap_up(args, start_time, error=None, signal=None, trainer=None):
+    total_time = time.time() - start_time
+    logging.info(f"Total time taken: {total_time}")
+    if trainer and trainer.logger is not None:
+        trainer.logger.log({"Total time": total_time})
+
+    if args.distributed:
+        print(
+            "\nWaiting for all processes to finish with dist_utils.cleanup()...",
+            end="",
+        )
+        dist_utils.cleanup()
+        print("Done!")
+
+    if "interactive" not in os.popen(f"squeue -hj {JOB_ID}").read():
+        print("\nSelf-canceling SLURM job in 32s", JOB_ID)
+        os.popen(f"sleep 32 && scancel {JOB_ID}")
+
+    if trainer and trainer.logger:
+        trainer.logger.finish(error or signal)
+
+
+if __name__ == "__main__":
+    error = signal = orion_exp = orion_trial = trainer = None
+    orion_race_condition = False
+    hparams = {}
+
+    setup_logging()
+
+    parser = flags.get_parser()
+    args, override_args = parser.parse_known_args()
+    args = update_from_sbatch_py_vars(args)
+    if args.logdir:
+        args.logdir = resolve(args.logdir)
+
+    # -- Build config
+
+    args.wandb_name = "alvaro-carbonero-math"
+    args.wandb_project = "ocp-alvaro"
+    args.test_ri = True
+    args.mode = "train"
+    args.graph_rewiring = "remove-tag-0"
+    args.cp_data_to_tmpdir = True
+    args.config = "indfaenet-is2re-10k"
+    args.frame_averaging = "2D"
+    args.fa_frames = "se3-random"
+
+    trainer_config = build_config(args, override_args)
+
+    if dist_utils.is_master():
+        trainer_config = move_lmdb_data_to_slurm_tmpdir(trainer_config)
+    dist_utils.synchronize()
+
+    trainer_config["dataset"] = dist_utils.broadcast_from_master(
+        trainer_config["dataset"]
+    )
+
+    trainer_config["model"]["edge_embed_type"] = "all_rij"
+    trainer_config["model"]["mp_type"] = "updownscale"
+    trainer_config["model"]["phys_embeds"] = True
+    trainer_config["model"]["tag_hidden_channels"] = 32
+    trainer_config["model"]["pg_hidden_channels"] = 64
+    trainer_config["model"]["energy_head"] = "weighted-av-final-embeds"
+    trainer_config["model"]["complex_mp"] = False
+    trainer_config["model"]["graph_norm"] = True
+    trainer_config["model"]["hidden_channels"] = 352
+    trainer_config["model"]["num_filters"] = 448
+    trainer_config["model"]["num_gaussians"] = 99
+    trainer_config["model"]["num_interactions"] = 6
+    trainer_config["model"]["second_layer_MLP"] = True
+    trainer_config["model"]["skip_co"] = "concat"
+    # trainer_config["model"]["transformer_out"] = False
+    trainer_config["model"]["afaenet_gat_mode"] = "v1"
+    # trainer_config["model"]["disconnected_mlp"] = True
+
+    # trainer_config["optim"]["batch_sizes"] = 256
+    # trainer_config["optim"]["eval_batch_sizes"] = 256
+    trainer_config["optim"]["lr_initial"] = 0.0019
+    trainer_config["optim"]["scheduler"] = "LinearWarmupCosineAnnealingLR"
+    trainer_config["optim"]["max_epochs"] = 20
+    trainer_config["optim"]["eval_every"] = 0.4
+
+    # -- Initial setup
+
+    setup_imports()
+    print("\n🚩 All things imported.\n")
+    start_time = time.time()
+
+    try:
+        # -- Orion
+
+        if args.orion_exp_config_path and dist_utils.is_master():
+            orion_exp = load_orion_exp(args)
+            hparams, orion_trial = sample_orion_hparams(orion_exp, trainer_config)
+
+            if hparams.get("orion_race_condition"):
+                logging.warning("\n\n ⛔️ Orion race condition. Stopping here.\n\n")
+                wrap_up(args, start_time, error, signal)
+                sys.exit()
+
+        hparams = dist_utils.broadcast_from_master(hparams)
+        if hparams:
+            print("\n💎 Received hyper-parameters from Orion:")
+            print(dump(hparams), end="\n")
+            trainer_config = merge_dicts(trainer_config, hparams)
+
+        # -- Setup trainer
+        trainer_config = continue_orion_exp(trainer_config)
+        trainer_config = auto_note(trainer_config)
+        trainer_config = set_min_hidden_channels(trainer_config)
+
+        try:
+            cls = registry.get_trainer_class(trainer_config["trainer"])
+            trainer: BaseTrainer = cls(**trainer_config)
+        except Exception as e:
+            traceback.print_exc()
+            logging.warning(f"\n💀 Error in trainer initialization: {e}\n")
+            signal = "trainer_init_error"
+
+        if signal is None:
+            task = registry.get_task_class(trainer_config["mode"])(trainer_config)
+            task.setup(trainer)
+            print_warnings()
+
+            # -- Start Training
+
+            signal = task.run()
+
+        # -- End of training
+
+        # handle job preemption / time limit
+        if signal == "SIGTERM":
+            print("\nJob was preempted. Wrapping up...\n")
+            if trainer:
+                trainer.close_datasets()
+
+        dist_utils.synchronize()
+
+        objective = dist_utils.broadcast_from_master(
+            trainer.objective if trainer else None
+        )
+
+        if orion_exp is not None:
+            if objective is None:
+                if signal == "loss_is_nan":
+                    objective = 1e12
+                    print("Received NaN objective from worker. Setting to 1e12.")
+                if signal == "trainer_init_error":
+                    objective = 1e12
+                    print(
+                        "Received trainer_init_error from worker.",
+                        "Setting objective to 1e12.",
+                    )
+            if objective is not None:
+                orion_exp.observe(
+                    orion_trial,
+                    [{"type": "objective", "name": "energy_mae", "value": objective}],
+                )
+            else:
+                print("Received None objective from worker. Skipping observation.")
+
+    except Exception:
+        error = True
+        print(traceback.format_exc())
+
+    finally:
+        wrap_up(args, start_time, error, signal, trainer=trainer)

From 442ca59bcd788167e7c0533dde0a8b7c0e9a1770 Mon Sep 17 00:00:00 2001
From: AlexDuvalinho <afonsalex@free.fr>
Date: Fri, 19 Apr 2024 06:54:22 -0400
Subject: [PATCH 07/27] remove edge_embed_type

---
 configs/exps/catalyst/gflownet.yaml      | 76 ++++++++++++------------
 configs/exps/is2re/top-configs.yaml      |  2 -
 configs/exps/orion/faenet-is2re-all.yaml |  2 -
 configs/exps/orion/faenet-qm9.yaml       |  2 -
 configs/models/depfaenet.yaml            |  3 -
 configs/models/deup_faenet.yaml          |  3 -
 scripts/debug_faenet.py                  |  1 -
 scripts/test_all.py                      |  6 +-
 8 files changed, 40 insertions(+), 55 deletions(-)

diff --git a/configs/exps/catalyst/gflownet.yaml b/configs/exps/catalyst/gflownet.yaml
index 2432f4733..4499b6e2a 100644
--- a/configs/exps/catalyst/gflownet.yaml
+++ b/configs/exps/catalyst/gflownet.yaml
@@ -6,9 +6,8 @@ job:
   time: 15:00:00
 
 default:
-  # wandb_name: alvaro-carbonero-math
-  wandb_project: ocp-alvaro
-  wandb_tags: "gflownet-model"
+  wandb_project: ocp-deup  # ocp-alvaro
+  wandb_tags: gflownet-model, depfaenet
   test_ri: True
   mode: train
   # graph_rewiring: remove-tag-0
@@ -18,7 +17,6 @@ default:
   cp_data_to_tmpdir: True
   is_disconnected: true
   model:
-    edge_embed_type: all_rij
     mp_type: updownscale_base
     phys_embeds: True
     tag_hidden_channels: 0
@@ -89,55 +87,55 @@ runs:
   #     max_epochs: 10
   #     lr_initial: 0.00015
 
-  - config: depfaenet-is2re-all
-    note: Fine-tune on per-ads-dataset 10 epoch
-    continue_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4023244
-    adsorbates: "*O, *OH, *OH2, *H"
-    optim:
-      max_epochs: 20
-      lr_initial: 0.0001
+  # - config: depfaenet-is2re-all
+  #   note: Fine-tune on per-ads-dataset 10 epoch
+  #   continue_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4023244
+  #   adsorbates: "*O, *OH, *OH2, *H"
+  #   optim:
+  #     max_epochs: 20
+  #     lr_initial: 0.0001
 
-  - config: depfaenet-is2re-all
-    note: Fine-tune on per-ads-dataset 20 epoch
-    continue_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4023244
-    adsorbates: "*O, *OH, *OH2, *H"
-    optim:
-      max_epochs: 20
-      lr_initial: 0.00015
+  # - config: depfaenet-is2re-all
+  #   note: Fine-tune on per-ads-dataset 20 epoch
+  #   continue_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4023244
+  #   adsorbates: "*O, *OH, *OH2, *H"
+  #   optim:
+  #     max_epochs: 20
+  #     lr_initial: 0.00015
 
   - config: depfaenet-is2re-all
-    note: Fine-tune on per-ads-dataset 15 epoch
+    note: Depfaenet per-ads-dataset
     continue_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4023244
     adsorbates: "*O, *OH, *OH2, *H"
     optim:
-      max_epochs: 15
+      max_epochs: 12
       lr_initial: 0.0002
 
   - config: depfaenet-is2re-all
-    note: Fine-tune on per-ads-dataset 10 epoch
+    note: Depfaenet per-ads-dataset
     continue_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4023244
     adsorbates: "*O, *OH, *OH2, *H"
     optim:
       max_epochs: 10
       lr_initial: 0.0001
 
-  - config: depfaenet-is2re-all
-    note: Fine-tune on per-ads-dataset starting from fine-tuned model
-    continue_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4071859
-    adsorbates: "*O, *OH, *OH2, *H"
-    optim:
-      max_epochs: 10
-      lr_initial: 0.0001
+  # - config: depfaenet-is2re-all
+  #   note: Fine-tune on per-ads-dataset starting from fine-tuned model
+  #   continue_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4071859
+  #   adsorbates: "*O, *OH, *OH2, *H"
+  #   optim:
+  #     max_epochs: 10
+  #     lr_initial: 0.0001
 
-  - config: depfaenet-is2re-all
-    note: Trained on selected adsorbate
-    adsorbates: "*O, *OH, *OH2, *H"
-    optim:
-      max_epochs: 25
-      lr_initial: 0.0001
+  # - config: depfaenet-is2re-all
+  #   note: Trained on selected adsorbate
+  #   adsorbates: "*O, *OH, *OH2, *H"
+  #   optim:
+  #     max_epochs: 25
+  #     lr_initial: 0.0001
 
-  - config: depfaenet-is2re-all
-    note: Trained on selected adsorbate
-    adsorbates: "*O, *OH, *OH2, *H"
-    optim:
-      max_epochs: 25
+  # - config: depfaenet-is2re-all
+  #   note: Trained on selected adsorbate
+  #   adsorbates: "*O, *OH, *OH2, *H"
+  #   optim:
+  #     max_epochs: 25
diff --git a/configs/exps/is2re/top-configs.yaml b/configs/exps/is2re/top-configs.yaml
index cf4e79fe4..6fa882648 100644
--- a/configs/exps/is2re/top-configs.yaml
+++ b/configs/exps/is2re/top-configs.yaml
@@ -9,8 +9,6 @@ default:
   test_ri: True
   mode: train
   graph_rewiring: remove-tag-0
-  model:
-    edge_embed_type: all_rij
   wandb_tags: "best-config"
   optim:
     batch_size: 256
diff --git a/configs/exps/orion/faenet-is2re-all.yaml b/configs/exps/orion/faenet-is2re-all.yaml
index b3a1ccbca..baecd59d9 100644
--- a/configs/exps/orion/faenet-is2re-all.yaml
+++ b/configs/exps/orion/faenet-is2re-all.yaml
@@ -14,8 +14,6 @@ default:
   wandb_tags: is2re-all, orion
   cp_data_to_tmpdir: true
   graph_rewiring: remove-tag-0
-  model:
-    edge_embed_type: all_rij
   frame_averaging: 2D
   fa_method: random
   optim:
diff --git a/configs/exps/orion/faenet-qm9.yaml b/configs/exps/orion/faenet-qm9.yaml
index 2d26414fd..722ed4472 100644
--- a/configs/exps/orion/faenet-qm9.yaml
+++ b/configs/exps/orion/faenet-qm9.yaml
@@ -39,8 +39,6 @@ default:
     targets: hidden_channels, num_filters, pg_hidden_channels, phys_hidden_channels, batch_size
   frame_averaging: 3D
   fa_method: random
-  model:
-    edge_embed_type: all_rij
 
 orion:
   # Remember to change the experiment name if you change anything in the search space
diff --git a/configs/models/depfaenet.yaml b/configs/models/depfaenet.yaml
index 852ebc3bf..da19d6e35 100644
--- a/configs/models/depfaenet.yaml
+++ b/configs/models/depfaenet.yaml
@@ -19,7 +19,6 @@ default:
     skip_co: False # output skip connections {False, "add", "concat"}
     second_layer_MLP: False # in EmbeddingBlock
     complex_mp: False
-    edge_embed_type: rij # {'rij','all_rij','sh', 'all'})
     mp_type: base # {'base', 'simple', 'updownscale', 'att', 'base_with_att', 'local_env'}
     graph_norm: False  # bool
     att_heads: 1  # int
@@ -152,7 +151,6 @@ qm9:
       att_heads: 1
       complex_mp: true
       cutoff: 6.0
-      edge_embed_type: all_rij
       energy_head: ''
       graph_norm: true
       graph_rewiring: null
@@ -205,7 +203,6 @@ qm7x:
       att_heads: 1
       complex_mp: true
       cutoff: 5.0
-      edge_embed_type: all_rij
       energy_head: false
       force_decoder_model_config:
         mlp:
diff --git a/configs/models/deup_faenet.yaml b/configs/models/deup_faenet.yaml
index efa779c80..bdc723bb5 100644
--- a/configs/models/deup_faenet.yaml
+++ b/configs/models/deup_faenet.yaml
@@ -25,7 +25,6 @@ default:
     skip_co: False # output skip connections {False, "add", "concat"}
     second_layer_MLP: False # in EmbeddingBlock
     complex_mp: False
-    edge_embed_type: rij # {'rij','all_rij','sh', 'all'})
     mp_type: base # {'base', 'simple', 'updownscale', 'att', 'base_with_att', 'local_env'}
     graph_norm: False  # bool
     att_heads: 1  # int
@@ -153,7 +152,6 @@ qm9:
       att_heads: 1
       complex_mp: true
       cutoff: 6.0
-      edge_embed_type: all_rij
       energy_head: ''
       graph_norm: true
       graph_rewiring: null
@@ -205,7 +203,6 @@ qm7x:
       att_heads: 1
       complex_mp: true
       cutoff: 5.0
-      edge_embed_type: all_rij
       energy_head: false
       force_decoder_model_config:
         mlp:
diff --git a/scripts/debug_faenet.py b/scripts/debug_faenet.py
index 56d79c3d6..6e55aef82 100644
--- a/scripts/debug_faenet.py
+++ b/scripts/debug_faenet.py
@@ -110,7 +110,6 @@ def wrap_up(args, start_time, error=None, signal=None, trainer=None):
         trainer_config["dataset"]
     )
 
-    trainer_config["model"]["edge_embed_type"] = "all_rij"
     trainer_config["model"]["mp_type"] = "updownscale"
     trainer_config["model"]["phys_embeds"] = True
     trainer_config["model"]["tag_hidden_channels"] = 32
diff --git a/scripts/test_all.py b/scripts/test_all.py
index 783f6f302..39d69b4a2 100644
--- a/scripts/test_all.py
+++ b/scripts/test_all.py
@@ -180,9 +180,9 @@ def isin(key, args):
         "--config=sfarinet-qm7x-1k --regress_forces=direct",
         "--config=sfarinet-qm7x-1k --regress_forces=direct_with_gradient_target",
         "--config=sfarinet-qm7x-1k --regress_forces=from_energy",
-        "--config=faenet-is2re-10k --model.edge_embed_type=rij --model.mp_type=base",
-        "--config=faenet-is2re-10k --model.edge_embed_type=all --model.mp_type=simple",
-        "--config=faenet-is2re-10k --model.edge_embed_type=sh --model.mp_type=updownscale",
+        "--config=faenet-is2re-10k --model.mp_type=base",
+        "--config=faenet-is2re-10k --model.mp_type=simple",
+        "--config=faenet-is2re-10k --model.mp_type=updownscale",
         # "--config=faenet-is2re-10k --model.edge_embed_type=all_rij --model.mp_type=local_env",
         # "--config=faenet-is2re-10k --model.mp_type=att",
         # "--config=faenet-is2re-10k --model.mp_type=base_with_att",

From 0d70e8e9488b32de955d014f7ad89118030f1c83 Mon Sep 17 00:00:00 2001
From: AlexDuvalinho <afonsalex@free.fr>
Date: Fri, 19 Apr 2024 07:23:36 -0400
Subject: [PATCH 08/27] create deup-depfaenet, add dropout_lin, modif class
 names

---
 configs/exps/catalyst/gflownet.yaml |   2 +-
 ocpmodels/models/__init__.py        |   2 +-
 ocpmodels/models/depfaenet.py       |  30 ++++----
 ocpmodels/models/deup_depfaenet.py  | 105 ++++++++++++++++++++++++++++
 4 files changed, 122 insertions(+), 17 deletions(-)
 create mode 100644 ocpmodels/models/deup_depfaenet.py

diff --git a/configs/exps/catalyst/gflownet.yaml b/configs/exps/catalyst/gflownet.yaml
index 4499b6e2a..8dc46c189 100644
--- a/configs/exps/catalyst/gflownet.yaml
+++ b/configs/exps/catalyst/gflownet.yaml
@@ -1,7 +1,7 @@
 job:
   mem: 32GB
   cpus: 4
-  gres: gpu:rtx8000:1
+  gres: gpu:1
   partition: long
   time: 15:00:00
 
diff --git a/ocpmodels/models/__init__.py b/ocpmodels/models/__init__.py
index c15c217b0..9241e161f 100644
--- a/ocpmodels/models/__init__.py
+++ b/ocpmodels/models/__init__.py
@@ -7,7 +7,7 @@
 from .cgcnn import CGCNN  # noqa: F401
 from .dimenet import DimeNet  # noqa: F401
 from .faenet import FAENet  # noqa: F401
-from .depfaenet import depFAENet  # noqa: F401
+from .depfaenet import DepFAENet  # noqa: F401
 from .gemnet.gemnet import GemNetT  # noqa: F401
 from .dimenet_plus_plus import DimeNetPlusPlus  # noqa: F401
 from .forcenet import ForceNet  # noqa: F401
diff --git a/ocpmodels/models/depfaenet.py b/ocpmodels/models/depfaenet.py
index 25f6a0968..97d197916 100644
--- a/ocpmodels/models/depfaenet.py
+++ b/ocpmodels/models/depfaenet.py
@@ -2,6 +2,7 @@
 from torch.nn import Linear
 from torch import nn
 from torch_scatter import scatter
+import torch.nn.functional as F
 
 from ocpmodels.models.faenet import FAENet
 from ocpmodels.models.faenet import OutputBlock as conOutputBlock
@@ -12,9 +13,9 @@
 from torch_geometric.data import Batch
 
 
-class discOutputBlock(conOutputBlock):
-    def __init__(self, energy_head, hidden_channels, act, disconnected_mlp=False):
-        super(discOutputBlock, self).__init__(energy_head, hidden_channels, act)
+class DiscOutputBlock(conOutputBlock):
+    def __init__(self, energy_head, hidden_channels, act, dropout_lin, disconnected_mlp=False):
+        super(DiscOutputBlock, self).__init__(energy_head, hidden_channels, act, dropout_lin)
 
         # We modify the last output linear function to make the output a vector
         self.lin2 = Linear(hidden_channels // 2, hidden_channels // 2)
@@ -40,17 +41,16 @@ def forward(self, h, edge_index, edge_weight, batch, alpha):
         ):  # Right now, this is the only available option.
             alpha = self.w_lin(h)
 
-        elif self.energy_head == "graclus":
-            h, batch = self.graclus(h, edge_index, edge_weight, batch)
-
-        elif self.energy_head in {"pooling", "random"}:
-            h, batch, pooling_loss = self.hierarchical_pooling(
-                h, edge_index, edge_weight, batch
-            )
-
         # MLP
+        h = F.dropout(
+            h, p=self.dropout_lin, training=self.training or self.deup_inference
+        )
         h = self.lin1(h)
-        h = self.lin2(self.act(h))
+        h = self.act(h)
+        h = F.dropout(
+            h, p=self.dropout_lin, training=self.training or self.deup_inference
+        )
+        h = self.lin2(h)
 
         if self.energy_head in {
             "weighted-av-initial-embeds",
@@ -78,14 +78,14 @@ def forward(self, h, edge_index, edge_weight, batch, alpha):
 
 
 @registry.register_model("depfaenet")
-class depFAENet(FAENet):
+class DepFAENet(FAENet):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
 
         # We replace the old output block by the new output block
         self.disconnected_mlp = kwargs.get("disconnected_mlp", False)
-        self.output_block = discOutputBlock(
-            self.energy_head, kwargs["hidden_channels"], self.act, self.disconnected_mlp
+        self.output_block = DiscOutputBlock(
+            self.energy_head, kwargs["hidden_channels"], self.act, self.disconnected_mlp, self.dropout_lin,
         )
 
     @conditional_grad(torch.enable_grad())
diff --git a/ocpmodels/models/deup_depfaenet.py b/ocpmodels/models/deup_depfaenet.py
new file mode 100644
index 000000000..8457acf45
--- /dev/null
+++ b/ocpmodels/models/deup_depfaenet.py
@@ -0,0 +1,105 @@
+import torch
+from torch import nn
+from torch.nn import Linear
+from torch_scatter import scatter
+from ocpmodels.common.registry import registry
+from ocpmodels.models.depfaenet import DepFAENet, DiscOutputBlock
+
+
+class DeupDepOutputBlock(DiscOutputBlock):
+    def __init__(
+        self, energy_head, hidden_channels, act, dropout_lin, deup_features={}
+    ):
+        super().__init__(energy_head, hidden_channels, act, dropout_lin)
+
+        self.deup_features = deup_features
+        self.deup_data_keys = [f"deup_{k}" for k in deup_features]
+        self.deup_extra_dim = 0
+        self._set_q_dim = False
+
+        if "s" in deup_features:
+            self.deup_extra_dim += 1
+        if "energy_pred_std" in deup_features:
+            self.deup_extra_dim += 1
+        if "q" in deup_features:
+            self._set_q_dim = True
+
+        if self.deup_extra_dim > 0:
+            self.deup_lin = Linear(
+                self.lin1.out_features + self.deup_extra_dim, self.lin1.out_features
+            )
+
+    def forward(self, h, edge_index, edge_weight, batch, alpha, data=None):
+        if self._set_q_dim:
+            assert data is not None
+            assert "deup_q" in data.to_dict().keys()
+            self.deup_extra_dim += data.deup_q.shape[-1]
+            self.deup_lin = Linear(
+                self.lin1.out_features + self.deup_extra_dim, self.lin1.out_features
+            )
+            print("\nLazy loading deup extra dim from q. New dim:", self.deup_extra_dim)
+            print("⚠️ OutputBlock will be reinitialized.\n")
+            self.reset_parameters()
+            self._set_q_dim = False
+
+        if self.energy_head == "weighted-av-final-embeds":
+            alpha = self.w_lin(h)
+
+        # OutputBlock to get final atom rep
+        # No dropout in deup-(dep)faenet
+        h = self.lin1(h)
+        h = self.act(h)
+        if self.deup_extra_dim <= 0:
+            h = self.lin2(h)
+
+        if self.energy_head in {
+            "weighted-av-initial-embeds",
+            "weighted-av-final-embeds",
+        }:
+            h = h * alpha
+
+        # Global pooling -- get final graph rep
+        out = scatter(
+            h,
+            batch,
+            dim=0,
+            reduce="mean" if self.deup_extra_dim > 0 else "add",
+        )
+
+        # Concat graph representation with deup features (s, kde(q), std)
+        # and apply MLPs
+        if self.deup_extra_dim > 0:
+            assert data is not None
+            data_keys = set(data.to_dict().keys())
+            assert all(dk in data_keys for dk in self.deup_data_keys), (
+                f"Some deup data keys ({self.deup_data_keys}) are missing"
+                + f" from the data dict ({data_keys})"
+            )
+            out = torch.cat(
+                [out]
+                + [data[f"deup_{k}"][:, None].float() for k in self.deup_features],
+                dim=-1,
+            )
+            out = self.deup_lin(out)
+            out = self.act(out)
+            out = self.lin2(out)
+
+        return out
+
+@registry.register_model("deup_depfaenet")
+class DeupFAENet(DepFAENet):
+    def __init__(self, *args, **kwargs):
+        kwargs["dropout_edge"] = 0
+        super().__init__(*args, **kwargs)
+        self.output_block = DeupDepOutputBlock(
+            self.energy_head,
+            kwargs["hidden_channels"],
+            self.act,
+            self.dropout_lin,
+            kwargs.get("deup_features", {}),
+        )
+        assert (
+            self.energy_head != "weighted-av-initial-embeds"
+        ), "Unsupported head weighted-av-initial-embeds"
+        assert self.skip_co != "concat", "Unsupported skip connection concat"
+        assert self.skip_co != "add", "Unsupported skip connection add"
\ No newline at end of file

From fd9d1d1524a6d661ca2a1f882f3c315f3125640d Mon Sep 17 00:00:00 2001
From: AlexDuvalinho <afonsalex@free.fr>
Date: Fri, 19 Apr 2024 07:31:42 -0400
Subject: [PATCH 09/27] add q

---
 ocpmodels/models/depfaenet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ocpmodels/models/depfaenet.py b/ocpmodels/models/depfaenet.py
index 97d197916..af3da682d 100644
--- a/ocpmodels/models/depfaenet.py
+++ b/ocpmodels/models/depfaenet.py
@@ -89,7 +89,7 @@ def __init__(self, **kwargs):
         )
 
     @conditional_grad(torch.enable_grad())
-    def energy_forward(self, data):
+    def energy_forward(self, data, q=None):
         # We need to save the tags so this step is necessary.
         self.output_block.tags_saver(data.tags)
         pred = super().energy_forward(data)

From 2ab5c335b22903e958efb97f825a6f009d65ef28 Mon Sep 17 00:00:00 2001
From: AlexDuvalinho <afonsalex@free.fr>
Date: Fri, 19 Apr 2024 08:00:59 -0400
Subject: [PATCH 10/27] fix forward of output block depfaenet

---
 ocpmodels/models/depfaenet.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ocpmodels/models/depfaenet.py b/ocpmodels/models/depfaenet.py
index af3da682d..87f76e08c 100644
--- a/ocpmodels/models/depfaenet.py
+++ b/ocpmodels/models/depfaenet.py
@@ -35,7 +35,7 @@ def __init__(self, energy_head, hidden_channels, act, dropout_lin, disconnected_
     def tags_saver(self, tags):
         self.current_tags = tags
 
-    def forward(self, h, edge_index, edge_weight, batch, alpha):
+    def forward(self, h, edge_index, edge_weight, batch, alpha, data):
         if (
             self.energy_head == "weighted-av-final-embeds"
         ):  # Right now, this is the only available option.
@@ -85,7 +85,7 @@ def __init__(self, **kwargs):
         # We replace the old output block by the new output block
         self.disconnected_mlp = kwargs.get("disconnected_mlp", False)
         self.output_block = DiscOutputBlock(
-            self.energy_head, kwargs["hidden_channels"], self.act, self.disconnected_mlp, self.dropout_lin,
+            self.energy_head, kwargs["hidden_channels"], self.act, self.dropout_lin, self.disconnected_mlp,
         )
 
     @conditional_grad(torch.enable_grad())

From 9f18bfd2244e06a7a4e1c9eb3927223c2ed3df6e Mon Sep 17 00:00:00 2001
From: AlexDuvalinho <afonsalex@free.fr>
Date: Tue, 23 Apr 2024 07:41:59 -0400
Subject: [PATCH 11/27] new model checkpoints to create deup-dataset

---
 ...c-faenet.yaml => data-with-depfaenet.yaml} |  6 ++--
 configs/exps/deup/datasets/mc-faenet.yaml     | 28 +++++++++++++++++++
 ...c-faenet.yaml => old-train-mc-faenet.yaml} |  0
 3 files changed, 31 insertions(+), 3 deletions(-)
 rename configs/exps/deup/datasets/{new-mc-faenet.yaml => data-with-depfaenet.yaml} (80%)
 create mode 100644 configs/exps/deup/datasets/mc-faenet.yaml
 rename configs/exps/deup/datasets/{train-mc-faenet.yaml => old-train-mc-faenet.yaml} (100%)

diff --git a/configs/exps/deup/datasets/new-mc-faenet.yaml b/configs/exps/deup/datasets/data-with-depfaenet.yaml
similarity index 80%
rename from configs/exps/deup/datasets/new-mc-faenet.yaml
rename to configs/exps/deup/datasets/data-with-depfaenet.yaml
index 56ea29868..8c7d4a00e 100644
--- a/configs/exps/deup/datasets/new-mc-faenet.yaml
+++ b/configs/exps/deup/datasets/data-with-depfaenet.yaml
@@ -7,11 +7,11 @@ job:
 default:
   config: faenet-is2re-all
   wandb_project: ocp-deup
-  wandb_tags: base-model, MC-D, 4615191
+  wandb_tags: depfaenet, MC-D,4621042
   test_ri: True
   mode: train
-  checkpoint: /network/scratch/a/alexandre.duval/scratch/ocp/runs/4615191/checkpoints/best_checkpoint.pt
-  restart_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4615191/
+  checkpoint: /network/scratch/a/alexandre.duval/ocp/runs/4621042/checkpoints/best_checkpoint.pt
+  restart_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4621042/
   model:
     dropout_lowest_layer: output
     first_trainable_layer: dropout
diff --git a/configs/exps/deup/datasets/mc-faenet.yaml b/configs/exps/deup/datasets/mc-faenet.yaml
new file mode 100644
index 000000000..8069e3573
--- /dev/null
+++ b/configs/exps/deup/datasets/mc-faenet.yaml
@@ -0,0 +1,28 @@
+job:
+  mem: 32GB
+  cpus: 4
+  gres: gpu:1
+  partition: long
+
+default:
+  config: faenet-is2re-all
+  wandb_project: ocp-deup
+  wandb_tags: base-model, MC-D, 4616500
+  test_ri: True
+  mode: train
+  checkpoint: /network/scratch/a/alexandre.duval/ocp/runs/4616500/checkpoints/best_checkpoint.pt
+  restart_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4616500/
+  model:
+    dropout_lowest_layer: output
+    first_trainable_layer: dropout
+    dropout_lin: 0.7
+  cp_data_to_tmpdir: true
+  inference_time_loops: 1
+  deup_dataset:
+    create: after # "before" -> created before training (for deup) "after" -> created after training (for is2re) "" - not created
+    dataset_strs: ["train", "val_id", "val_ood_cat", "val_ood_ads"]
+    n_samples: 7
+
+runs:
+  - optim:
+      max_epochs: 12
diff --git a/configs/exps/deup/datasets/train-mc-faenet.yaml b/configs/exps/deup/datasets/old-train-mc-faenet.yaml
similarity index 100%
rename from configs/exps/deup/datasets/train-mc-faenet.yaml
rename to configs/exps/deup/datasets/old-train-mc-faenet.yaml

From e0fb6f7738c746e205921ba9916166ea4f18a519 Mon Sep 17 00:00:00 2001
From: AlexDuvalinho <afonsalex@free.fr>
Date: Tue, 23 Apr 2024 08:34:27 -0400
Subject: [PATCH 12/27] argparse deup_dataset + comments

---
 ocpmodels/datasets/deup_dataset_creator.py | 29 ++++++++++++++++++----
 scripts/deup_dataset.sh                    | 11 ++++++++
 2 files changed, 35 insertions(+), 5 deletions(-)
 create mode 100644 scripts/deup_dataset.sh

diff --git a/ocpmodels/datasets/deup_dataset_creator.py b/ocpmodels/datasets/deup_dataset_creator.py
index b57522422..f3a4e3adc 100644
--- a/ocpmodels/datasets/deup_dataset_creator.py
+++ b/ocpmodels/datasets/deup_dataset_creator.py
@@ -228,6 +228,7 @@ def _structure(preds):
         if self.mc_dropout:
             if n_samples <= 0:
                 raise ValueError("n_samples must be > 0 for MC-Dropout ensembles.")
+            # Speed up computation by re-using latent representation q for all models
             preds += [
                 self.trainers[0].model_forward(batch_list, mode="deup", q=q)
                 for _ in range(n_samples - len(preds))
@@ -320,12 +321,14 @@ def create_deup_dataset(
                 preds = self.forward(
                     batch_list, n_samples=n_samples, shared_encoder=True
                 )
-
+                # Compute mean and standard deviation of GNN predictions
                 pred_mean = preds["energies"].mean(dim=1)  # Batch
                 pred_std = preds["energies"].std(dim=1)  # Batch
+                # Compute residual between mean predicted energy and ground truth
                 loss = self.trainers[0].loss_fn["energy"](
                     pred_mean, batch.y_relaxed.to(pred_mean.device)
                 )
+                # Store deup samples
                 deup_samples += [
                     {
                         "energy_target": batch.y_relaxed.clone(),
@@ -431,13 +434,29 @@ def write_lmdb(self, samples, path, total_size=-1, max_samples=-1):
     from ocpmodels.datasets.deup_dataset_creator import DeupDatasetCreator
     from ocpmodels.datasets.lmdb_dataset import DeupDataset
     from ocpmodels.common.utils import JOB_ID, RUNS_DIR, make_config_from_conf_str
+    import argparse
+
+    def parse_args():
+        parser = argparse.ArgumentParser(description="Deup Dataset Creator")
+        parser.add_argument(
+            "--checkpoints",
+            nargs="+",
+            default="/network/scratch/a/alexandre.duval/ocp/runs/4616500/",
+            help="Paths to the checkpoints",
+        )
+        parser.add_argument(
+            "--dropout",
+            type=float,
+            default=0.2,
+            help="Dropout value",
+        )
+        return parser.parse_args()
 
-    base_trainer_path = "/network/scratch/a/alexandre.duval/ocp/runs/4615191"
+    args = parse_args()
 
-    # what models to load for inference
     trainers_conf = {
-        "checkpoints": [base_trainer_path],
-        "dropout": 0.7,
+        "checkpoints": args.checkpoints,
+        "dropout": args.dropout,
     }
     # setting first_trainable_layer to output means that the latent space
     # q will be defined as input to the output layer, even though the model
diff --git a/scripts/deup_dataset.sh b/scripts/deup_dataset.sh
new file mode 100644
index 000000000..d42384a05
--- /dev/null
+++ b/scripts/deup_dataset.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+#SBATCH --job-name=deup-dataset
+#SBATCH --ntasks=1
+#SBATCH --mem=32GB
+#SBATCH --gres=gpu:1
+#SBATCH --output="/network/scratch/a/alexandre.duval/ocp/runs/output-%j.txt"  # replace: location where you want to store the output of the job
+
+module load anaconda/3 # replace: load anaconda module
+conda activate ocp  # replace: conda env name
+cd /home/mila/a/alexandre.duval/ocp/ocp # replace: location of the code
+python -m ocpmodels.datasets.deup_dataset_creator
\ No newline at end of file

From 5b9c76f1973df00ae333333e71eadfc9f3af2053 Mon Sep 17 00:00:00 2001
From: AlexDuvalinho <afonsalex@free.fr>
Date: Wed, 24 Apr 2024 05:18:45 -0400
Subject: [PATCH 13/27] fix chkpt_path + edge case error + new configs

---
 .../deup/datasets/data-with-depfaenet.yaml    |  4 +-
 configs/exps/deup/gnn/depfaenet-training.yaml | 57 ++++++++++++++
 configs/exps/deup/gnn/faenet-training.yaml    |  7 +-
 configs/exps/deup/gnn/pretrain-depfaenet.yaml | 78 +++++++++++++++++++
 ocpmodels/tasks/task.py                       |  8 +-
 ocpmodels/trainers/single_trainer.py          | 34 +++++---
 6 files changed, 168 insertions(+), 20 deletions(-)
 create mode 100644 configs/exps/deup/gnn/pretrain-depfaenet.yaml

diff --git a/configs/exps/deup/datasets/data-with-depfaenet.yaml b/configs/exps/deup/datasets/data-with-depfaenet.yaml
index 8c7d4a00e..e329beff8 100644
--- a/configs/exps/deup/datasets/data-with-depfaenet.yaml
+++ b/configs/exps/deup/datasets/data-with-depfaenet.yaml
@@ -5,7 +5,7 @@ job:
   partition: long
 
 default:
-  config: faenet-is2re-all
+  config: depfaenet-is2re-all
   wandb_project: ocp-deup
   wandb_tags: depfaenet, MC-D,4621042
   test_ri: True
@@ -15,7 +15,7 @@ default:
   model:
     dropout_lowest_layer: output
     first_trainable_layer: dropout
-    dropout_lin: 0.7
+    dropout_lin: 0.3
   cp_data_to_tmpdir: true
   inference_time_loops: 1
   deup_dataset:
diff --git a/configs/exps/deup/gnn/depfaenet-training.yaml b/configs/exps/deup/gnn/depfaenet-training.yaml
index e69de29bb..d81ac5d38 100644
--- a/configs/exps/deup/gnn/depfaenet-training.yaml
+++ b/configs/exps/deup/gnn/depfaenet-training.yaml
@@ -0,0 +1,57 @@
+job:
+  mem: 32GB
+  cpus: 4
+  gres: gpu:1
+  partition: long
+  time: 15:00:00
+
+default:
+  wandb_project: ocp-deup
+  wandb_tags: depfaenet, no-concat, with-tag0, dropout
+  test_ri: True
+  mode: train
+  graph_rewiring: ""
+  frame_averaging: 2D
+  fa_method: se3-random
+  cp_data_to_tmpdir: True
+  is_disconnected: true
+  model:
+    mp_type: updownscale_base
+    phys_embeds: True
+    tag_hidden_channels: 0
+    pg_hidden_channels: 96
+    energy_head: weighted-av-final-embeds
+    complex_mp: True
+    graph_norm: True
+    hidden_channels: 352
+    num_filters: 288
+    num_gaussians: 68
+    num_interactions: 5
+    second_layer_MLP: False
+    skip_co: False
+    cutoff: 4.0
+    dropout_lin: 0.3
+  optim:
+    batch_size: 256
+    eval_batch_size: 256
+    lr_initial: 0.002
+    scheduler: LinearWarmupCosineAnnealingLR
+    eval_every: 0.4
+
+runs:
+
+  - config: depfaenet-is2re-all
+    note: Depfaenet per-ads-dataset
+    continue_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4023244
+    adsorbates: "*O, *OH, *OH2, *H"
+    optim:
+      max_epochs: 10
+      lr_initial: 0.0002
+
+  - config: depfaenet-is2re-all
+    note: Depfaenet per-ads-dataset
+    continue_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4023244
+    adsorbates: "*O, *OH, *OH2, *H"
+    optim:
+      max_epochs: 12
+      lr_initial: 0.0001
diff --git a/configs/exps/deup/gnn/faenet-training.yaml b/configs/exps/deup/gnn/faenet-training.yaml
index 8bf38ec5f..0d6aa34d5 100644
--- a/configs/exps/deup/gnn/faenet-training.yaml
+++ b/configs/exps/deup/gnn/faenet-training.yaml
@@ -8,8 +8,7 @@ job:
 default:
   test_ri: True
   mode: train
-  graph_rewiring: remove-tag-0
-  wandb_tags: "top-model"
+  wandb_tags: faenet, no-concat, with-tag0, dropout
   wandb_project: ocp-deup
   optim:
     batch_size: 256
@@ -36,8 +35,10 @@ runs:
       second_layer_MLP: False
       skip_co: False
       cutoff: 6.0
+      dropout_lin: 0.3
+      dropout_lowest_layer: output
     optim:
       lr_initial: 0.002
       scheduler: LinearWarmupCosineAnnealingLR
-      max_epochs: 12
+      max_epochs: 14
       eval_every: 0.25
\ No newline at end of file
diff --git a/configs/exps/deup/gnn/pretrain-depfaenet.yaml b/configs/exps/deup/gnn/pretrain-depfaenet.yaml
new file mode 100644
index 000000000..83029997d
--- /dev/null
+++ b/configs/exps/deup/gnn/pretrain-depfaenet.yaml
@@ -0,0 +1,78 @@
+job:
+  mem: 32GB
+  cpus: 4
+  gres: gpu:1
+  partition: long
+  time: 15:00:00
+
+default:
+  wandb_project: ocp-deup
+  wandb_tags: gflownet-model, depfaenet
+  test_ri: True
+  mode: train
+  graph_rewiring: ""
+  frame_averaging: 2D
+  fa_method: se3-random
+  cp_data_to_tmpdir: True
+  is_disconnected: true
+  model:
+    mp_type: updownscale_base
+    phys_embeds: True
+    tag_hidden_channels: 0
+    pg_hidden_channels: 96
+    energy_head: weighted-av-final-embeds
+    complex_mp: True
+    graph_norm: True
+    hidden_channels: 352
+    num_filters: 288
+    num_gaussians: 68
+    num_interactions: 5
+    second_layer_MLP: False
+    skip_co: False
+    cutoff: 4.0
+    dropout_lin: 0.3
+  optim:
+    batch_size: 256
+    eval_batch_size: 256
+    lr_initial: 0.002
+    scheduler: LinearWarmupCosineAnnealingLR
+    eval_every: 0.4
+
+runs:
+
+  - config: depfaenet-is2re-all
+    note: Depfaenet pre-train + dropout
+    optim:
+      max_epochs: 12
+      lr_initial: 0.0002
+
+  - config: depfaenet-is2re-all
+    note: Depfaenet pre-train + dropout
+    optim:
+      max_epochs: 10
+      lr_initial: 0.0001
+
+  - config: depfaenet-is2re-all
+    note: depfaenet with top configs + dropout
+    model:
+      mp_type: updownscale_base
+      phys_embeds: True
+      tag_hidden_channels: 32
+      pg_hidden_channels: 96
+      energy_head: weighted-av-final-embeds
+      complex_mp: True
+      graph_norm: True
+      hidden_channels: 352
+      num_filters: 288
+      num_gaussians: 68
+      num_interactions: 5
+      second_layer_MLP: False
+      skip_co: False
+      cutoff: 4.0
+    optim:
+      batch_size: 256
+      eval_batch_size: 256
+      lr_initial: 0.002
+      scheduler: LinearWarmupCosineAnnealingLR
+      max_epochs: 9
+      eval_every: 0.4
\ No newline at end of file
diff --git a/ocpmodels/tasks/task.py b/ocpmodels/tasks/task.py
index 8a9e3d8be..c3c938eec 100644
--- a/ocpmodels/tasks/task.py
+++ b/ocpmodels/tasks/task.py
@@ -27,10 +27,10 @@ def setup(self, trainer):
             self.trainer.load_checkpoint(self.config["checkpoint"])
             print()
 
-        # save checkpoint path to runner state for slurm resubmissions
-        self.chkpt_path = os.path.join(
-            self.trainer.config["checkpoint_dir"], "checkpoint.pt"
-        )
+            # save checkpoint path to runner state for slurm resubmissions
+            self.chkpt_path = os.path.join(
+                self.trainer.config["checkpoint_dir"], "checkpoint.pt"
+            )
 
     def run(self):
         raise NotImplementedError
diff --git a/ocpmodels/trainers/single_trainer.py b/ocpmodels/trainers/single_trainer.py
index 25f82ec9a..b9e2f921c 100644
--- a/ocpmodels/trainers/single_trainer.py
+++ b/ocpmodels/trainers/single_trainer.py
@@ -227,7 +227,11 @@ def train(
 
         # Calculate start_epoch from step instead of loading the epoch number
         # to prevent inconsistencies due to different batch size in checkpoint.
-        if self.config["continue_from_dir"] is not None and self.config["adsorbates"] not in {None, "all"}:
+        if (
+            "continue_from_dir" in self.config
+            and self.config["continue_from_dir"] is not None
+            and self.config["adsorbates"] not in {None, "all"}
+        ):
             self.step = 0
         start_epoch = self.step // n_train
         max_epochs = self.config["optim"]["max_epochs"]
@@ -589,11 +593,15 @@ def compute_loss(self, preds, batch_list):
         # Energy loss
         energy_target = torch.cat(
             [
-                batch.y_relaxed.to(self.device)
-                if self.task_name == "is2re"
-                else batch.deup_loss.to(self.device)
-                if self.task_name == "deup_is2re"
-                else batch.y.to(self.device)
+                (
+                    batch.y_relaxed.to(self.device)
+                    if self.task_name == "is2re"
+                    else (
+                        batch.deup_loss.to(self.device)
+                        if self.task_name == "deup_is2re"
+                        else batch.y.to(self.device)
+                    )
+                )
                 for batch in batch_list
             ],
             dim=0,
@@ -706,11 +714,15 @@ def compute_metrics(
         target = {
             "energy": torch.cat(
                 [
-                    batch.y_relaxed.to(self.device)
-                    if self.task_name == "is2re"
-                    else batch.deup_loss.to(self.device)
-                    if self.task_name == "deup_is2re"
-                    else batch.y.to(self.device)
+                    (
+                        batch.y_relaxed.to(self.device)
+                        if self.task_name == "is2re"
+                        else (
+                            batch.deup_loss.to(self.device)
+                            if self.task_name == "deup_is2re"
+                            else batch.y.to(self.device)
+                        )
+                    )
                     for batch in batch_list
                 ],
                 dim=0,

From 8599de7e1b56b8a0a590fd6c9212396b358d3fe1 Mon Sep 17 00:00:00 2001
From: AlexDuvalinho <afonsalex@free.fr>
Date: Wed, 24 Apr 2024 07:57:56 -0400
Subject: [PATCH 14/27] adapt configs for v0 deup-faenet training on
 deup-dataset

---
 .../exps/deup/uncertainty/faenet_test.yaml    |  31 ++
 configs/exps/deup/uncertainty/v1.yaml         |  13 +-
 configs/models/deup_depfaenet.yaml            | 273 ++++++++++++++++++
 configs/models/deup_faenet.yaml               |  53 ++--
 configs/models/tasks/deup_is2re.yaml          |   2 +-
 5 files changed, 340 insertions(+), 32 deletions(-)
 create mode 100644 configs/exps/deup/uncertainty/faenet_test.yaml
 create mode 100644 configs/models/deup_depfaenet.yaml

diff --git a/configs/exps/deup/uncertainty/faenet_test.yaml b/configs/exps/deup/uncertainty/faenet_test.yaml
new file mode 100644
index 000000000..19742c31c
--- /dev/null
+++ b/configs/exps/deup/uncertainty/faenet_test.yaml
@@ -0,0 +1,31 @@
+job:
+  mem: 32GB
+  cpus: 4
+  gres: gpu:1
+  partition: long
+
+default:
+  config: deup_faenet-deup_is2re-all
+  wandb_project: ocp-deup
+  wandb_tags: faenet, MC-D, 4616500-model, 4642835-dataset
+  test_ri: True
+  mode: train
+  model:
+    dropout_lowest_layer: null
+    first_trainable_layer: output
+    dropout_lin: 0.3
+  cp_data_to_tmpdir: false
+  inference_time_loops: 1
+  # restart_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4621042/
+  # checkpoint: /network/scratch/a/alexandre.duval/ocp/runs/4621042/
+  dataset: # mandatory if restart_from_dir is set
+    default_val: deup-val_ood_cat-val_ood_ads
+    deup-train-val_id:
+      src: /network/scratch/a/alexandre.duval/ocp/runs/4642835/deup_dataset
+    deup-val_ood_cat-val_ood_ads:
+      src: /network/scratch/a/alexandre.duval/ocp/runs/4642835/deup_dataset
+  deup_dataset:
+    create: False
+
+runs:
+  - note: deup-faenet d=0.2 (not trained with d)
\ No newline at end of file
diff --git a/configs/exps/deup/uncertainty/v1.yaml b/configs/exps/deup/uncertainty/v1.yaml
index 4f69d7828..1f6a064c2 100644
--- a/configs/exps/deup/uncertainty/v1.yaml
+++ b/configs/exps/deup/uncertainty/v1.yaml
@@ -6,25 +6,24 @@ job:
 
 default:
   config: deup_faenet-deup_is2re-all
-
   wandb_project: ocp-deup
-  wandb_tags: base-model, MC-D, 3264530
+  wandb_tags: base-model, MC-D, 4616500-model, 4642835-dataset
   test_ri: True
   mode: train
   model:
     dropout_lowest_layer: null
     first_trainable_layer: output
-    dropout_lin: 0.7
+    dropout_lin: 0.3
   cp_data_to_tmpdir: false
   inference_time_loops: 1
-  restart_from_dir: /network/scratch/s/schmidtv/ocp/runs/3264530
-  checkpoint: /network/scratch/s/schmidtv/ocp/runs/3264530
+  restart_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4621042/
+  # checkpoint: /network/scratch/a/alexandre.duval/ocp/runs/4621042/
   dataset: # mandatory if restart_from_dir is set
     default_val: deup-val_ood_cat-val_ood_ads
     deup-train-val_id:
-      src: /network/scratch/s/schmidtv/ocp/runs/3264530/deup_dataset
+      src: /network/scratch/a/alexandre.duval/ocp/runs/4642835/deup_dataset
     deup-val_ood_cat-val_ood_ads:
-      src: /network/scratch/s/schmidtv/ocp/runs/3264530/deup_dataset
+      src: /network/scratch/a/alexandre.duval/ocp/runs/4642835/deup_dataset
   deup_dataset:
     create: False
 
diff --git a/configs/models/deup_depfaenet.yaml b/configs/models/deup_depfaenet.yaml
new file mode 100644
index 000000000..24ab2587c
--- /dev/null
+++ b/configs/models/deup_depfaenet.yaml
@@ -0,0 +1,273 @@
+default:
+  model:
+    name: deup_depfaenet
+    act: swish
+    dropout_lin: 0.0
+    dropout_edge: 0.0
+    dropout_lowest_layer: output # lowest layer where `dropout_lin` is applied. Can be `inter-{i}` or `output`. Defaults to `output`.
+    first_trainable_layer: dropout # lowest layer to NOT freeze. All previous layers will be frozen. Can be ``, `embed`, `inter-{i}`, `output`, or `dropout`.
+                            # if it is `` then no layer is frozen. If it is `dropout` then it will be set to the layer before `dropout_lowest_layer`.
+                            # Defaults to ``.
+    hidden_channels: 384
+    num_filters: 480
+    num_interactions: 5
+    num_gaussians: 104
+    cutoff: 6.0
+    use_pbc: True
+    regress_forces: False
+    tag_hidden_channels: 64 # only for OC20
+    pg_hidden_channels: 64 # period & group embedding hidden channels
+    phys_embeds: True # physics-aware embeddings for atoms
+    phys_hidden_channels: 0
+    energy_head: weighted-av-final-embeds # Energy head: {False, weighted-av-initial-embeds, weighted-av-final-embeds}
+    skip_co: False # Skip connections {False, "add", "concat"}
+    second_layer_MLP: False # in EmbeddingBlock
+    complex_mp: True # 2-layer MLP in Interaction blocks
+    mp_type: base # Message Passing type {'base', 'simple', 'updownscale', 'updownscale_base'}
+    graph_norm: True # graph normalization layer
+    force_decoder_type: "mlp" # force head (`"simple"`, `"mlp"`, `"res"`, `"res_updown"`)
+    force_decoder_model_config:
+      simple:
+        hidden_channels: 128
+        norm: batch1d # batch1d, layer or null
+      mlp:
+        hidden_channels: 256
+        norm: batch1d # batch1d, layer or null
+      res:
+        hidden_channels: 128
+        norm: batch1d # batch1d, layer or null
+      res_updown:
+        hidden_channels: 128
+        norm: batch1d # batch1d, layer or null
+    deup_features: [s, energy_pred_std]
+  optim:
+    batch_size: 256
+    eval_batch_size: 256
+    max_epochs: 12
+    scheduler: LinearWarmupCosineAnnealingLR
+    optimizer: AdamW
+    num_workers: 4
+    warmup_steps: 6000
+    warmup_factor: 0.2
+    lr_initial: 0.002
+    lr_gamma: 0.1
+    energy_grad_coefficient: 10
+    force_coefficient: 30
+    energy_coefficient: 1
+    lr_milestones:
+      - 18000
+      - 27000
+      - 37000
+    epoch_fine_tune: 4
+
+  frame_averaging: "" # 2D, 3D, da, False
+  fa_method: "" # can be {None, full, random, det, e3, e3-random, e3-det}
+
+# -------------------
+# -----  IS2RE  -----
+# -------------------
+
+deup_is2re: # was: is2re
+  10k:
+    optim:
+      lr_initial: 0.005
+      lr_milestones: # epochs at which lr_initial <- lr_initial * lr_gamma
+        - 1562
+        - 2343
+        - 3125
+      warmup_steps: 468
+      max_epochs: 20
+
+  100k:
+    model:
+      hidden_channels: 256
+    optim:
+      lr_initial: 0.005
+      lr_milestones: # epochs at which lr_initial <- lr_initial * lr_gamma
+        - 1562
+        - 2343
+        - 3125
+      warmup_steps: 468
+      max_epochs: 20
+
+  all:
+    model:
+      hidden_channels: 384
+      num_interactions: 4
+    optim:
+      batch_size: 256
+      eval_batch_size: 256
+      lr_initial: 0.001
+      lr_gamma: 0.1
+      lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
+        - 18000
+        - 27000
+        - 37000
+      warmup_steps: 6000
+      max_epochs: 20
+
+# ------------------
+# -----  S2EF  -----
+# ------------------
+
+# For 2 GPUs
+
+s2ef:
+  default:
+    model:
+      num_interactions: 4
+      hidden_channels: 750
+      num_gaussians: 200
+      num_filters: 256
+      regress_forces: "direct"
+    optim:
+      batch_size: 96
+      eval_batch_size: 96
+      warmup_factor: 0.2
+      lr_gamma: 0.1
+      lr_initial: 0.0001
+      max_epochs: 15
+      warmup_steps: 30000
+      lr_milestones:
+        - 55000
+        - 75000
+        - 10000
+
+  200k: {}
+
+  # 1 gpus
+  2M:
+    model:
+      num_interactions: 5
+      hidden_channels: 1024
+      num_gaussians: 200
+      num_filters: 256
+    optim:
+      batch_size: 192
+      eval_batch_size: 192
+
+  20M: {}
+
+  all: {}
+
+qm9:
+  default:
+    model:
+      act: swish
+      att_heads: 1
+      complex_mp: true
+      cutoff: 6.0
+      energy_head: ''
+      graph_norm: true
+      graph_rewiring: null
+      hidden_channels: 400
+      max_num_neighbors: 30
+      mp_type: updownscale_base
+      num_filters: 480
+      num_gaussians: 100
+      num_interactions: 5
+      otf_graph: false
+      pg_hidden_channels: 32
+      phys_embeds: false
+      phys_hidden_channels: 0
+      regress_forces: ''
+      second_layer_MLP: true
+      skip_co: true
+      tag_hidden_channels: 0
+      use_pbc: false
+
+    optim:
+      batch_size: 64
+      es_min_abs_change: 1.0e-06
+      es_patience: 20
+      es_warmup_epochs: 600
+      eval_batch_size: 64
+      factor: 0.9
+      loss_energy: mse
+      lr_gamma: 0.1
+      lr_initial: 0.0003
+      max_epochs: 1500
+      min_lr: 1.0e-06
+      mode: min
+      optimizer: AdamW
+      patience: 15
+      scheduler: ReduceLROnPlateau
+      threshold: 0.0001
+      threshold_mode: abs
+      verbose: true
+      warmup_factor: 0.2
+      warmup_steps: 3000
+
+  10k: {}
+  all: {}
+
+qm7x:
+  default:
+    model: # SOTA settings
+      act: swish
+      att_heads: 1
+      complex_mp: true
+      cutoff: 5.0
+      energy_head: false
+      force_decoder_model_config:
+        mlp:
+          hidden_channels: 256
+          norm: batch1d
+        res:
+          hidden_channels: 128
+          norm: batch1d
+        res_updown:
+          hidden_channels: 128
+          norm: layer
+        simple:
+          hidden_channels: 128
+          norm: batch1d
+      force_decoder_type: res_updown
+      graph_norm: false
+      hidden_channels: 500
+      max_num_neighbors: 40
+      mp_type: updownscale_base
+      num_filters: 400
+      num_gaussians: 50
+      num_interactions: 5
+      otf_graph: false
+      pg_hidden_channels: 32
+      phys_embeds: true
+      phys_hidden_channels: 0
+      regress_forces: direct_with_gradient_target
+      second_layer_MLP: true
+      skip_co: false
+      tag_hidden_channels: 0
+      use_pbc: false
+
+    optim:
+      batch_size: 100
+      energy_grad_coefficient: 5
+      eval_batch_size: 100
+      eval_every: 0.34
+      factor: 0.75
+      force_coefficient: 75
+      loss_energy: mae
+      loss_force: mse
+      lr_gamma: 0.1
+      lr_initial: 0.000193
+      max_steps: 4000000
+      min_lr: 1.0e-06
+      mode: min
+      optimizer: AdamW
+      scheduler: ReduceLROnPlateau
+      threshold: 0.001
+      threshold_mode: abs
+      verbose: true
+      warmup_factor: 0.2
+      warmup_steps: 3000
+
+  all: {}
+  1k: {}
+
+qm9:
+  default:
+    model:
+      use_pbc: False
+  all: {}
+  10k: {}
diff --git a/configs/models/deup_faenet.yaml b/configs/models/deup_faenet.yaml
index bdc723bb5..f6e52681f 100644
--- a/configs/models/deup_faenet.yaml
+++ b/configs/models/deup_faenet.yaml
@@ -8,27 +8,24 @@ default:
     first_trainable_layer: dropout # lowest layer to NOT freeze. All previous layers will be frozen. Can be ``, `embed`, `inter-{i}`, `output`, or `dropout`.
                             # if it is `` then no layer is frozen. If it is `dropout` then it will be set to the layer before `dropout_lowest_layer`.
                             # Defaults to ``.
-    hidden_channels: 128
-    num_filters: 100
-    num_interactions: 3
-    num_gaussians: 100
+    hidden_channels: 384
+    num_filters: 480
+    num_interactions: 5
+    num_gaussians: 104
     cutoff: 6.0
     use_pbc: True
     regress_forces: False
-    # drlab attributes:
-    tag_hidden_channels: 0 # 32
-    pg_hidden_channels: 0 # 32 -> period & group embedding hidden channels
-    phys_embeds: False # True
+    tag_hidden_channels: 64 # only for OC20
+    pg_hidden_channels: 64 # period & group embedding hidden channels
+    phys_embeds: True # physics-aware embeddings for atoms
     phys_hidden_channels: 0
-    energy_head: False # can be {False, weighted-av-initial-embeds, weighted-av-final-embeds}
-    # faenet new features
-    skip_co: False # output skip connections {False, "add", "concat"}
+    energy_head: weighted-av-final-embeds # Energy head: {False, weighted-av-initial-embeds, weighted-av-final-embeds}
+    skip_co: False # Skip connections {False, "add", "concat"}
     second_layer_MLP: False # in EmbeddingBlock
-    complex_mp: False
-    mp_type: base # {'base', 'simple', 'updownscale', 'att', 'base_with_att', 'local_env'}
-    graph_norm: False  # bool
-    att_heads: 1  # int
-    force_decoder_type: "mlp" # can be {"" or "simple"} | only used if regress_forces is True
+    complex_mp: True # 2-layer MLP in Interaction blocks
+    mp_type: base # Message Passing type {'base', 'simple', 'updownscale', 'updownscale_base'}
+    graph_norm: True # graph normalization layer
+    force_decoder_type: "mlp" # force head (`"simple"`, `"mlp"`, `"res"`, `"res_updown"`)
     force_decoder_model_config:
       simple:
         hidden_channels: 128
@@ -44,19 +41,27 @@ default:
         norm: batch1d # batch1d, layer or null
     deup_features: [s, energy_pred_std]
   optim:
-    batch_size: 64
-    eval_batch_size: 64
+    batch_size: 256
+    eval_batch_size: 256
+    max_epochs: 12
+    scheduler: LinearWarmupCosineAnnealingLR
+    optimizer: AdamW
     num_workers: 4
-    lr_gamma: 0.1
-    lr_initial: 0.001
+    warmup_steps: 6000
     warmup_factor: 0.2
-    max_epochs: 20
-    energy_grad_coefficient: 5
+    lr_initial: 0.002
+    lr_gamma: 0.1
+    energy_grad_coefficient: 10
     force_coefficient: 30
     energy_coefficient: 1
+    lr_milestones:
+      - 18000
+      - 27000
+      - 37000
+    epoch_fine_tune: 4
 
-  frame_averaging: False # 2D, 3D, da, False
-  fa_method: False # can be {None, full, random, det, e3, e3-random, e3-det}
+  frame_averaging: "" # 2D, 3D, da, False
+  fa_method: "" # can be {None, full, random, det, e3, e3-random, e3-det}
 
 # -------------------
 # -----  IS2RE  -----
diff --git a/configs/models/tasks/deup_is2re.yaml b/configs/models/tasks/deup_is2re.yaml
index 65aab2e31..fa85d99ab 100644
--- a/configs/models/tasks/deup_is2re.yaml
+++ b/configs/models/tasks/deup_is2re.yaml
@@ -41,7 +41,7 @@ default:
     n_samples: 7
 
   ensemble_checkpoints: /network/scratch/a/alexandre.duval/ocp/runs/2935198
-  ensemble_dropout: 0.7
+  ensemble_dropout: 0.3
 
 
 10k:

From 58b992727c63e6156698c6dabdfb5c3d250f3d4a Mon Sep 17 00:00:00 2001
From: Christina <ginihumer96@yahoo.de>
Date: Thu, 25 Apr 2024 07:53:11 -0400
Subject: [PATCH 15/27] fix module load

---
 ocpmodels/common/utils.py    | 18 +++++++++++-------
 ocpmodels/models/__init__.py |  6 +++++-
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/ocpmodels/common/utils.py b/ocpmodels/common/utils.py
index af7cddc22..99185a39a 100644
--- a/ocpmodels/common/utils.py
+++ b/ocpmodels/common/utils.py
@@ -755,7 +755,7 @@ def add_edge_distance_to_graph(
 
 
 # Copied from https://github.com/facebookresearch/mmf/blob/master/mmf/utils/env.py#L89.
-def setup_imports():
+def setup_imports(skip_modules=[]):
     from ocpmodels.common.registry import registry
 
     try:
@@ -803,10 +803,14 @@ def setup_imports():
                 splits = f.split(os.sep)
                 file_name = splits[-1]
                 module_name = file_name[: file_name.find(".py")]
-                importlib.import_module("ocpmodels.%s.%s" % (key[1:], module_name))
+                if module_name not in skip_modules:
+                    importlib.import_module("ocpmodels.%s.%s" % (key[1:], module_name))
 
     # manual model imports
-    importlib.import_module("ocpmodels.models.gemnet_oc.gemnet_oc")
+    try:
+        importlib.import_module("ocpmodels.models.gemnet_oc.gemnet_oc")
+    except:
+        print("unable to load gemnet_oc")
 
     experimental_folder = os.path.join(root_folder, "../experimental/")
     if os.path.exists(experimental_folder):
@@ -1797,7 +1801,7 @@ def make_script_trainer(str_args=[], overrides={}, silent=False, mode="train"):
     return trainer
 
 
-def make_config_from_dir(path, mode, overrides={}, silent=None):
+def make_config_from_dir(path, mode, overrides={}, silent=None, setup_imports=[]):
     """
     Make a config from a directory. This is useful when restarting or continuing from a
     previous run.
@@ -1834,11 +1838,11 @@ def make_config_from_dir(path, mode, overrides={}, silent=None):
     config = build_config(default_args, silent=silent)
     config = merge_dicts(config, overrides)
 
-    setup_imports()
+    setup_imports(setup_imports=setup_imports)
     return config
 
 
-def make_trainer_from_dir(path, mode, overrides={}, silent=None):
+def make_trainer_from_dir(path, mode, overrides={}, silent=None, skip_imports=[]):
     """
     Make a trainer from a directory.
 
@@ -1854,7 +1858,7 @@ def make_trainer_from_dir(path, mode, overrides={}, silent=None):
     Returns:
         Trainer: The loaded trainer.
     """
-    config = make_config_from_dir(path, mode, overrides, silent)
+    config = make_config_from_dir(path, mode, overrides, silent, skip_imports)
     return registry.get_trainer_class(config["trainer"])(**config)
 
 
diff --git a/ocpmodels/models/__init__.py b/ocpmodels/models/__init__.py
index 9241e161f..8a56eaea4 100644
--- a/ocpmodels/models/__init__.py
+++ b/ocpmodels/models/__init__.py
@@ -8,7 +8,11 @@
 from .dimenet import DimeNet  # noqa: F401
 from .faenet import FAENet  # noqa: F401
 from .depfaenet import DepFAENet  # noqa: F401
-from .gemnet.gemnet import GemNetT  # noqa: F401
+
+try:
+    from .gemnet.gemnet import GemNetT  # noqa: F401
+except:
+    print("unable to load gemnet")
 from .dimenet_plus_plus import DimeNetPlusPlus  # noqa: F401
 from .forcenet import ForceNet  # noqa: F401
 from .schnet import SchNet  # noqa: F401

From 5a5524c59f467d406ab5fffc2dc391c685b18441 Mon Sep 17 00:00:00 2001
From: Christina <ginihumer96@yahoo.de>
Date: Thu, 25 Apr 2024 08:14:50 -0400
Subject: [PATCH 16/27] return hidden state in wrapper

---
 ocpmodels/common/gfn.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ocpmodels/common/gfn.py b/ocpmodels/common/gfn.py
index 95257359f..0a1f45521 100644
--- a/ocpmodels/common/gfn.py
+++ b/ocpmodels/common/gfn.py
@@ -107,6 +107,7 @@ def forward(
         self,
         batch: Union[Batch, Data, List[Data], List[Batch]],
         preprocess: bool = True,
+        retrieve_hidden: bool = False,
     ):
         """Perform a forward pass of the model when frame averaging is applied.
 
@@ -162,6 +163,8 @@ def forward(
         if preds["energy"].shape[-1] == 1:
             preds["energy"] = preds["energy"].view(-1)
 
+        if retrieve_hidden:
+            return preds
         return preds["energy"]  # denormalize?
 
     def freeze(self):

From 6594960177e4f3b14853ac62ffe146ef6092d5af Mon Sep 17 00:00:00 2001
From: vict0rsch <vsch@pm.me>
Date: Thu, 25 Apr 2024 09:49:10 -0400
Subject: [PATCH 17/27] `scatter` `q` in `energy_forward`

---
 ocpmodels/models/faenet.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/ocpmodels/models/faenet.py b/ocpmodels/models/faenet.py
index dc7dd1efd..7d5c6044d 100644
--- a/ocpmodels/models/faenet.py
+++ b/ocpmodels/models/faenet.py
@@ -7,17 +7,17 @@
 import torch.nn.functional as F
 from torch import nn
 from torch.nn import Embedding, Linear
-from torch_geometric.utils import dropout_edge
 from torch_geometric.nn import MessagePassing, radius_graph
 from torch_geometric.nn.norm import GraphNorm
+from torch_geometric.utils import dropout_edge
 from torch_scatter import scatter
 
 from ocpmodels.common.registry import registry
+from ocpmodels.common.utils import conditional_grad, get_pbc_distances
 from ocpmodels.models.base_model import BaseModel
 from ocpmodels.models.force_decoder import ForceDecoder
 from ocpmodels.models.utils.activations import swish
 from ocpmodels.modules.phys_embeddings import PhysEmbedding
-from ocpmodels.common.utils import get_pbc_distances, conditional_grad
 
 
 class GaussianSmearing(nn.Module):
@@ -751,6 +751,9 @@ def energy_forward(self, data, q=None):
                 q = h.clone().detach()
 
         else:
+            # WARNING
+            # q which is NOT the hidden state h if it was stored as a scattered
+            # version of h. This works for GPs, NOT for MC-dropout
             h = q
             alpha = None
 
@@ -763,6 +766,9 @@ def energy_forward(self, data, q=None):
         elif self.skip_co == "add":
             energy = sum(energy_skip_co)
 
+        if q and len(q) > len(energy):
+            q = scatter(q, batch, dim=0, reduce="mean")  # N_graphs x hidden_channels
+
         preds = {
             "energy": energy,
             "hidden_state": h,

From 5ce2f3f7c074aa063238d153817afca1418be35f Mon Sep 17 00:00:00 2001
From: AlexDuvalinho <afonsalex@free.fr>
Date: Thu, 25 Apr 2024 14:03:17 -0400
Subject: [PATCH 18/27] fix configs for depfaenet/faenet fine-tuning

---
 ...-training.yaml => depfaenet-finetune.yaml} | 46 +++++++++++--------
 configs/exps/deup/gnn/faenet-finetune.yaml    | 46 +++++++++++++++++++
 2 files changed, 74 insertions(+), 18 deletions(-)
 rename configs/exps/deup/gnn/{depfaenet-training.yaml => depfaenet-finetune.yaml} (78%)
 create mode 100644 configs/exps/deup/gnn/faenet-finetune.yaml

diff --git a/configs/exps/deup/gnn/depfaenet-training.yaml b/configs/exps/deup/gnn/depfaenet-finetune.yaml
similarity index 78%
rename from configs/exps/deup/gnn/depfaenet-training.yaml
rename to configs/exps/deup/gnn/depfaenet-finetune.yaml
index d81ac5d38..1da2e29bc 100644
--- a/configs/exps/deup/gnn/depfaenet-training.yaml
+++ b/configs/exps/deup/gnn/depfaenet-finetune.yaml
@@ -13,6 +13,33 @@ default:
   graph_rewiring: ""
   frame_averaging: 2D
   fa_method: se3-random
+  is_disconnected: true
+
+runs:
+
+  - config: depfaenet-is2re-all
+    note: Depfaenet per-ads-dataset
+    continue_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4647488  #4647466 #4023244
+    adsorbates: "*O, *OH, *OH2, *H"
+    optim:
+      max_epochs: 10
+      lr_initial: 0.0002
+
+  - config: depfaenet-is2re-all
+    note: Depfaenet per-ads-dataset
+    continue_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4647488 #4647466  # 4023244
+    adsorbates: "*O, *OH, *OH2, *H"
+    optim:
+      max_epochs: 12
+      lr_initial: 0.0001
+
+- config: depfaenet-is2re-all
+  note: Depfaenet per-ads-dataset
+  continue_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4647466 # 4023244
+  adsorbates: "*O, *OH, *OH2, *H"
+  graph_rewiring: ""
+  frame_averaging: 2D
+  fa_method: se3-random
   cp_data_to_tmpdir: True
   is_disconnected: true
   model:
@@ -37,21 +64,4 @@ default:
     lr_initial: 0.002
     scheduler: LinearWarmupCosineAnnealingLR
     eval_every: 0.4
-
-runs:
-
-  - config: depfaenet-is2re-all
-    note: Depfaenet per-ads-dataset
-    continue_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4023244
-    adsorbates: "*O, *OH, *OH2, *H"
-    optim:
-      max_epochs: 10
-      lr_initial: 0.0002
-
-  - config: depfaenet-is2re-all
-    note: Depfaenet per-ads-dataset
-    continue_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4023244
-    adsorbates: "*O, *OH, *OH2, *H"
-    optim:
-      max_epochs: 12
-      lr_initial: 0.0001
+    max_epochs: 12
\ No newline at end of file
diff --git a/configs/exps/deup/gnn/faenet-finetune.yaml b/configs/exps/deup/gnn/faenet-finetune.yaml
new file mode 100644
index 000000000..fae81269a
--- /dev/null
+++ b/configs/exps/deup/gnn/faenet-finetune.yaml
@@ -0,0 +1,46 @@
+job:
+  mem: 32GB
+  cpus: 4
+  gres: gpu:1
+  partition: long
+  time: 18:00:00
+
+default:
+  test_ri: True
+  mode: train
+  wandb_tags: faenet, no-concat, with-tag0, dropout, fine-tuned
+  wandb_project: ocp-deup
+  optim:
+    batch_size: 256
+    eval_batch_size: 256
+  cp_data_to_tmpdir: True
+
+runs:
+  - config: faenet-is2re-all
+    note: "fine-tuned faenet"
+    continue_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4647489
+    adsorbates: "*O, *OH, *OH2, *H"
+    frame_averaging: 2D
+    fa_method: se3-random
+    model:
+      mp_type: updownscale_base
+      phys_embeds: True
+      tag_hidden_channels: 32
+      pg_hidden_channels: 96
+      energy_head: weighted-av-final-embeds
+      complex_mp: True
+      graph_norm: True
+      hidden_channels: 384
+      num_filters: 480
+      num_gaussians: 104
+      num_interactions: 5
+      second_layer_MLP: False
+      skip_co: False
+      cutoff: 6.0
+      dropout_lin: 0.3
+      dropout_lowest_layer: output
+    optim:
+      lr_initial: 0.002
+      scheduler: LinearWarmupCosineAnnealingLR
+      max_epochs: 14
+      eval_every: 0.25
\ No newline at end of file

From 606fcd07ae72aa3b80d48abce3fc62ed13593dbc Mon Sep 17 00:00:00 2001
From: Christina <ginihumer96@yahoo.de>
Date: Fri, 26 Apr 2024 02:33:56 -0400
Subject: [PATCH 19/27] quickfixes

---
 ocpmodels/common/utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/ocpmodels/common/utils.py b/ocpmodels/common/utils.py
index 99185a39a..b7a39d391 100644
--- a/ocpmodels/common/utils.py
+++ b/ocpmodels/common/utils.py
@@ -755,7 +755,7 @@ def add_edge_distance_to_graph(
 
 
 # Copied from https://github.com/facebookresearch/mmf/blob/master/mmf/utils/env.py#L89.
-def setup_imports(skip_modules=[]):
+def setup_imports(skip_imports=[]):
     from ocpmodels.common.registry import registry
 
     try:
@@ -803,7 +803,7 @@ def setup_imports(skip_modules=[]):
                 splits = f.split(os.sep)
                 file_name = splits[-1]
                 module_name = file_name[: file_name.find(".py")]
-                if module_name not in skip_modules:
+                if module_name not in skip_imports:
                     importlib.import_module("ocpmodels.%s.%s" % (key[1:], module_name))
 
     # manual model imports
@@ -1801,7 +1801,7 @@ def make_script_trainer(str_args=[], overrides={}, silent=False, mode="train"):
     return trainer
 
 
-def make_config_from_dir(path, mode, overrides={}, silent=None, setup_imports=[]):
+def make_config_from_dir(path, mode, overrides={}, silent=None, skip_imports=[]):
     """
     Make a config from a directory. This is useful when restarting or continuing from a
     previous run.
@@ -1838,7 +1838,7 @@ def make_config_from_dir(path, mode, overrides={}, silent=None, setup_imports=[]
     config = build_config(default_args, silent=silent)
     config = merge_dicts(config, overrides)
 
-    setup_imports(setup_imports=setup_imports)
+    setup_imports(skip_imports=skip_imports)
     return config
 
 

From fcf265055d8f9c7f6ce146670eed92655af94f8c Mon Sep 17 00:00:00 2001
From: AlexDuvalinho <afonsalex@free.fr>
Date: Fri, 26 Apr 2024 03:30:53 -0400
Subject: [PATCH 20/27] update configs deup-depfaenet

---
 configs/exps/deup/gnn/faenet-finetune.yaml    |  5 +-
 configs/exps/deup/gnn/faenet-training.yaml    |  5 +-
 .../exps/deup/uncertainty/deup_depfaenet.yaml | 61 +++++++++++++++++++
 ocpmodels/datasets/deup_dataset_creator.py    |  4 +-
 scripts/gnn_dev.py                            |  9 ++-
 5 files changed, 73 insertions(+), 11 deletions(-)
 create mode 100644 configs/exps/deup/uncertainty/deup_depfaenet.yaml

diff --git a/configs/exps/deup/gnn/faenet-finetune.yaml b/configs/exps/deup/gnn/faenet-finetune.yaml
index fae81269a..de5180834 100644
--- a/configs/exps/deup/gnn/faenet-finetune.yaml
+++ b/configs/exps/deup/gnn/faenet-finetune.yaml
@@ -10,9 +10,10 @@ default:
   mode: train
   wandb_tags: faenet, no-concat, with-tag0, dropout, fine-tuned
   wandb_project: ocp-deup
+  graph_rewiring: ""
   optim:
-    batch_size: 256
-    eval_batch_size: 256
+    batch_size: 232
+    eval_batch_size: 232
   cp_data_to_tmpdir: True
 
 runs:
diff --git a/configs/exps/deup/gnn/faenet-training.yaml b/configs/exps/deup/gnn/faenet-training.yaml
index 0d6aa34d5..c3775e66e 100644
--- a/configs/exps/deup/gnn/faenet-training.yaml
+++ b/configs/exps/deup/gnn/faenet-training.yaml
@@ -11,13 +11,14 @@ default:
   wandb_tags: faenet, no-concat, with-tag0, dropout
   wandb_project: ocp-deup
   optim:
-    batch_size: 256
-    eval_batch_size: 256
+    batch_size: 200
+    eval_batch_size: 200
   cp_data_to_tmpdir: True
 
 runs:
   - config: faenet-is2re-all
     note: "top run no concat"
+    graph_rewiring: ""
     frame_averaging: 2D
     fa_method: se3-random
     model:
diff --git a/configs/exps/deup/uncertainty/deup_depfaenet.yaml b/configs/exps/deup/uncertainty/deup_depfaenet.yaml
new file mode 100644
index 000000000..7b3ccd8a1
--- /dev/null
+++ b/configs/exps/deup/uncertainty/deup_depfaenet.yaml
@@ -0,0 +1,61 @@
+job:
+  mem: 32GB
+  cpus: 4
+  gres: gpu:1
+  partition: long
+
+default:
+  config: deup_faenet-deup_is2re-all
+  wandb_project: ocp-deup
+  wandb_tags: deup-depfaenet, 4648581-model, 4657270-dataset
+  test_ri: True
+  mode: train
+  model:
+    dropout_lowest_layer: output
+    first_trainable_layer: output
+    dropout_lin: 0.3
+  cp_data_to_tmpdir: false
+  inference_time_loops: 1
+  restart_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4648581/
+  checkpoint: /network/scratch/a/alexandre.duval/ocp/runs/4648581/
+  dataset: # mandatory if restart_from_dir is set
+    default_val: deup-val_ood_cat-val_ood_ads
+    deup-train-val_id:
+      src: /network/scratch/a/alexandre.duval/ocp/runs/4657270/deup_dataset
+    deup-val_ood_cat-val_ood_ads:
+      src: /network/scratch/a/alexandre.duval/ocp/runs/4657270/deup_dataset
+  deup_dataset:
+    create: False
+
+runs:
+  - note: deup-depfaenet (with dropout)
+    graph_rewiring: ""
+    frame_averaging: 2D
+    fa_method: se3-random
+    cp_data_to_tmpdir: True
+    is_disconnected: true
+    model:
+      mp_type: updownscale_base
+      phys_embeds: True
+      tag_hidden_channels: 0
+      pg_hidden_channels: 96
+      energy_head: weighted-av-final-embeds
+      complex_mp: True
+      graph_norm: True
+      hidden_channels: 352
+      num_filters: 288
+      num_gaussians: 68
+      num_interactions: 5
+      second_layer_MLP: False
+      skip_co: False
+      cutoff: 4.0
+      dropout_lin: 0.3
+    optim:
+      batch_size: 256
+      eval_batch_size: 256
+      lr_initial: 0.002
+      scheduler: LinearWarmupCosineAnnealingLR
+      eval_every: 0.4
+      max_epochs: 12
+  
+  - note: deup-depfaenet (without specifying configs)
\ No newline at end of file
diff --git a/ocpmodels/datasets/deup_dataset_creator.py b/ocpmodels/datasets/deup_dataset_creator.py
index f3a4e3adc..fca3fea8b 100644
--- a/ocpmodels/datasets/deup_dataset_creator.py
+++ b/ocpmodels/datasets/deup_dataset_creator.py
@@ -441,13 +441,13 @@ def parse_args():
         parser.add_argument(
             "--checkpoints",
             nargs="+",
-            default="/network/scratch/a/alexandre.duval/ocp/runs/4616500/",
+            default="/network/scratch/a/alexandre.duval/ocp/runs/4648581/",
             help="Paths to the checkpoints",
         )
         parser.add_argument(
             "--dropout",
             type=float,
-            default=0.2,
+            default=0.3,
             help="Dropout value",
         )
         return parser.parse_args()
diff --git a/scripts/gnn_dev.py b/scripts/gnn_dev.py
index bc2205553..bc3924fbe 100644
--- a/scripts/gnn_dev.py
+++ b/scripts/gnn_dev.py
@@ -16,7 +16,7 @@
 if __name__ == "__main__":
     config = {}
     # Customize args
-    config["graph_rewiring"] = "remove-tag-0"
+    config["graph_rewiring"] = ""
     config["frame_averaging"] = "2D"
     config["fa_method"] = "random"  # "random"
     config["test_ri"] = False
@@ -29,10 +29,9 @@
     str_args = sys.argv[1:]
     if all("config" not in arg for arg in str_args):
         str_args.append("--is_debug")
-        # str_args.append("--config=faenet-is2re-all")
-        str_args.append("--config=faenet-is2re-10k")
-        str_args.append("--adsorbates={'*O', '*OH', '*OH2', '*H'}")
-        # str_args.append("--is_disconnected=True")
+        str_args.append("--config=deup_depfaenet-deup_is2re-10k")
+        # str_args.append("--adsorbates={'*O', '*OH', '*OH2', '*H'}")
+        str_args.append("--is_disconnected=True")
         # str_args.append("--silent=0")
         warnings.warn(
             "No model / mode is given; chosen as default" + f"Using: {str_args[-1]}"

From 4d73707b4399c29970ee68c6ed870d74660837e2 Mon Sep 17 00:00:00 2001
From: AlexDuvalinho <afonsalex@free.fr>
Date: Fri, 26 Apr 2024 03:48:24 -0400
Subject: [PATCH 21/27] test use deup-dataset in an active learning framework

---
 scripts/active_learning.py | 97 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 97 insertions(+)
 create mode 100644 scripts/active_learning.py

diff --git a/scripts/active_learning.py b/scripts/active_learning.py
new file mode 100644
index 000000000..9d8d33bb6
--- /dev/null
+++ b/scripts/active_learning.py
@@ -0,0 +1,97 @@
+"""
+Copyright (c) Facebook, Inc. and its affiliates.
+
+This source code is licensed under the MIT license found in the
+LICENSE file in the root directory of this source tree.
+"""
+import sys
+import warnings
+from pathlib import Path
+
+sys.path.append(str(Path(__file__).resolve().parent.parent))
+
+from ocpmodels.common.utils import make_script_trainer, make_trainer_from_dir
+from ocpmodels.common.gfn import FAENetWrapper
+from ocpmodels.trainers import SingleTrainer
+from ocpmodels.datasets.lmdb_dataset import DeupDataset
+from ocpmodels.datasets.data_transforms import get_transforms
+
+if __name__ == "__main__":
+
+    deup_dataset_chkpt = "/network/scratch/a/alexandre.duval/ocp/runs/4657270/deup_dataset"
+    model_chkpt = "/network/scratch/a/alexandre.duval/ocp/runs/4648581/checkpoints/best_checkpoint.pt"
+
+    data_config = {
+        "default_val": "deup-val_ood_cat-val_ood_ads",
+        "deup-train-val_id": {
+            "src": deup_dataset_chkpt
+        },
+        "deup-val_ood_cat-val_ood_ads": {
+            "src": deup_dataset_chkpt
+        },
+        "train": {
+            "src": "/network/scratch/s/schmidtv/ocp/datasets/ocp/is2re/all/train/",
+            "normalize_labels": True,
+        },
+        "val_id": {
+            "src": "/network/scratch/s/schmidtv/ocp/datasets/ocp/is2re/all/val_id/"
+        },
+        "val_ood_cat": {
+            "src": "/network/scratch/s/schmidtv/ocp/datasets/ocp/is2re/all/val_ood_cat/"
+        },
+        "val_ood_ads": {
+            "src": "/network/scratch/s/schmidtv/ocp/datasets/ocp/is2re/all/val_ood_ads/"
+        },
+        "val_ood_both": {
+            "src": "/network/scratch/s/schmidtv/ocp/datasets/ocp/is2re/all/val_ood_both/"
+        },
+    }
+
+    trainer = make_trainer_from_dir(
+        model_chkpt,
+        mode="continue",
+        overrides={
+            "is_debug": True,
+            "silent": True,
+            "cp_data_to_tmpdir": False,
+            "config": "depfaenet-deup_is2re-all",
+            "deup_dataset.create": False,
+            "dataset": data_config,
+        },
+        silent=True,
+    )
+
+    wrapper = FAENetWrapper(
+        faenet=trainer.model,
+        transform=get_transforms(trainer.config),
+        frame_averaging=trainer.config.get("frame_averaging", ""),
+        trainer_config=trainer.config,
+    )
+
+    wrapper.freeze()
+    loaders = trainer.loaders
+
+    data_gen = iter(loaders["deup-train-val_id"])
+    batch = next(data_gen)
+    preds = wrapper(batch)
+
+    # trainer.config["dataset"].update({
+    #     "deup-train-val_id": {
+    #         "src": "/network/scratch/s/schmidtv/ocp/runs/3301084/deup_dataset"
+    #     },
+    #     "deup-val_ood_cat-val_ood_ads": {
+    #         "src": "/network/scratch/s/schmidtv/ocp/runs/3301084/deup_dataset"
+    #     },
+    #     "default_val": "deup-val_ood_cat-val_ood_ads"
+    # })
+
+    # deup_dataset_path = "/network/scratch/a/alexandre.duval/ocp/runs/4642835/deup_dataset"
+    # deup_dataset = DeupDataset(
+    #     {
+    #         **trainer.config["dataset"],  
+    #     },
+    #     "deup-train-val_id",
+    #     transform=get_transforms(trainer.config),
+    # )
+
+    # deup_sample = deup_dataset[0]
\ No newline at end of file

From 175567efaebe65587de13c74b1b8b01415bb509f Mon Sep 17 00:00:00 2001
From: AlexDuvalinho <afonsalex@free.fr>
Date: Fri, 17 May 2024 10:58:25 -0400
Subject: [PATCH 22/27] deupdepfaenet configs

---
 configs/exps/deup/uncertainty/deup_depfaenet.yaml | 6 +++---
 configs/models/deup_depfaenet.yaml                | 2 +-
 configs/models/deup_faenet.yaml                   | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/configs/exps/deup/uncertainty/deup_depfaenet.yaml b/configs/exps/deup/uncertainty/deup_depfaenet.yaml
index 7b3ccd8a1..07f4f142f 100644
--- a/configs/exps/deup/uncertainty/deup_depfaenet.yaml
+++ b/configs/exps/deup/uncertainty/deup_depfaenet.yaml
@@ -5,7 +5,7 @@ job:
   partition: long
 
 default:
-  config: deup_faenet-deup_is2re-all
+  config: deup_depfaenet-deup_is2re-all
   wandb_project: ocp-deup
   wandb_tags: deup-depfaenet, 4648581-model, 4657270-dataset
   test_ri: True
@@ -16,8 +16,8 @@ default:
     dropout_lin: 0.3
   cp_data_to_tmpdir: false
   inference_time_loops: 1
-  restart_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4648581/
-  checkpoint: /network/scratch/a/alexandre.duval/ocp/runs/4648581/
+  # restart_from_dir: /network/scratch/a/alexandre.duval/ocp/runs/4648581/
+  # checkpoint: /network/scratch/a/alexandre.duval/ocp/runs/4648581/
   dataset: # mandatory if restart_from_dir is set
     default_val: deup-val_ood_cat-val_ood_ads
     deup-train-val_id:
diff --git a/configs/models/deup_depfaenet.yaml b/configs/models/deup_depfaenet.yaml
index 24ab2587c..be23501f7 100644
--- a/configs/models/deup_depfaenet.yaml
+++ b/configs/models/deup_depfaenet.yaml
@@ -39,7 +39,7 @@ default:
       res_updown:
         hidden_channels: 128
         norm: batch1d # batch1d, layer or null
-    deup_features: [s, energy_pred_std]
+    deup_features: [s, energy_pred_std]  # add q for density
   optim:
     batch_size: 256
     eval_batch_size: 256
diff --git a/configs/models/deup_faenet.yaml b/configs/models/deup_faenet.yaml
index f6e52681f..c11f6e450 100644
--- a/configs/models/deup_faenet.yaml
+++ b/configs/models/deup_faenet.yaml
@@ -39,7 +39,7 @@ default:
       res_updown:
         hidden_channels: 128
         norm: batch1d # batch1d, layer or null
-    deup_features: [s, energy_pred_std]
+    deup_features: [s, energy_pred_std]  # add q for density if it exists
   optim:
     batch_size: 256
     eval_batch_size: 256

From 03f30388994907671668c6b77396f23bf1b84e3a Mon Sep 17 00:00:00 2001
From: AlexDuvalinho <afonsalex@free.fr>
Date: Fri, 17 May 2024 11:01:41 -0400
Subject: [PATCH 23/27] fix issues with q + enforce graph-level deup-dataset

---
 ocpmodels/datasets/deup_dataset_creator.py |  2 ++
 ocpmodels/models/depfaenet.py              |  2 +-
 ocpmodels/models/deup_depfaenet.py         | 20 +++++++++++---------
 ocpmodels/models/deup_faenet.py            | 15 ++++++++-------
 ocpmodels/models/faenet.py                 |  6 ++++--
 5 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/ocpmodels/datasets/deup_dataset_creator.py b/ocpmodels/datasets/deup_dataset_creator.py
index fca3fea8b..8b6e193c5 100644
--- a/ocpmodels/datasets/deup_dataset_creator.py
+++ b/ocpmodels/datasets/deup_dataset_creator.py
@@ -329,6 +329,7 @@ def create_deup_dataset(
                     pred_mean, batch.y_relaxed.to(pred_mean.device)
                 )
                 # Store deup samples
+                assert len(preds["q"]) == len(batch)
                 deup_samples += [
                     {
                         "energy_target": batch.y_relaxed.clone(),
@@ -481,6 +482,7 @@ def parse_args():
     # base_config = make_config_from_conf_str("faenet-is2re-all")
     # base_datasets_config = base_config["dataset"]
 
+    # Load deup dataset
     deup_dataset = DeupDataset(
         {
             **base_datasets_config,
diff --git a/ocpmodels/models/depfaenet.py b/ocpmodels/models/depfaenet.py
index 87f76e08c..4e83dbc0e 100644
--- a/ocpmodels/models/depfaenet.py
+++ b/ocpmodels/models/depfaenet.py
@@ -92,6 +92,6 @@ def __init__(self, **kwargs):
     def energy_forward(self, data, q=None):
         # We need to save the tags so this step is necessary.
         self.output_block.tags_saver(data.tags)
-        pred = super().energy_forward(data)
+        pred = super().energy_forward(data, q)
 
         return pred
diff --git a/ocpmodels/models/deup_depfaenet.py b/ocpmodels/models/deup_depfaenet.py
index 8457acf45..619ff6a68 100644
--- a/ocpmodels/models/deup_depfaenet.py
+++ b/ocpmodels/models/deup_depfaenet.py
@@ -30,6 +30,7 @@ def __init__(
             )
 
     def forward(self, h, edge_index, edge_weight, batch, alpha, data=None):
+        # If sample density is used as feature, we need to add the extra dimension
         if self._set_q_dim:
             assert data is not None
             assert "deup_q" in data.to_dict().keys()
@@ -58,13 +59,14 @@ def forward(self, h, edge_index, edge_weight, batch, alpha, data=None):
         }:
             h = h * alpha
 
-        # Global pooling -- get final graph rep
-        out = scatter(
-            h,
-            batch,
-            dim=0,
-            reduce="mean" if self.deup_extra_dim > 0 else "add",
-        )
+        # Pool into a graph rep if necessary
+        if len(h) > len(batch):
+            h = scatter(
+                h,
+                batch,
+                dim=0,
+                reduce="mean" if self.deup_extra_dim > 0 else "add",
+            )
 
         # Concat graph representation with deup features (s, kde(q), std)
         # and apply MLPs
@@ -76,7 +78,7 @@ def forward(self, h, edge_index, edge_weight, batch, alpha, data=None):
                 + f" from the data dict ({data_keys})"
             )
             out = torch.cat(
-                [out]
+                [h]
                 + [data[f"deup_{k}"][:, None].float() for k in self.deup_features],
                 dim=-1,
             )
@@ -87,7 +89,7 @@ def forward(self, h, edge_index, edge_weight, batch, alpha, data=None):
         return out
 
 @registry.register_model("deup_depfaenet")
-class DeupFAENet(DepFAENet):
+class DeupDepFAENet(DepFAENet):
     def __init__(self, *args, **kwargs):
         kwargs["dropout_edge"] = 0
         super().__init__(*args, **kwargs)
diff --git a/ocpmodels/models/deup_faenet.py b/ocpmodels/models/deup_faenet.py
index 88a55964c..726e0e350 100644
--- a/ocpmodels/models/deup_faenet.py
+++ b/ocpmodels/models/deup_faenet.py
@@ -58,12 +58,13 @@ def forward(self, h, edge_index, edge_weight, batch, alpha, data=None):
             h = h * alpha
 
         # Global pooling -- get final graph rep
-        out = scatter(
-            h,
-            batch,
-            dim=0,
-            reduce="mean" if self.deup_extra_dim > 0 else "add",
-        )
+        if len(h) > len(batch):
+            h = scatter(
+                h,
+                batch,
+                dim=0,
+                reduce="mean" if self.deup_extra_dim > 0 else "add",
+            )
 
         # Concat graph representation with deup features (s, kde(q), std)
         # and apply MLPs
@@ -75,7 +76,7 @@ def forward(self, h, edge_index, edge_weight, batch, alpha, data=None):
                 + f" from the data dict ({data_keys})"
             )
             out = torch.cat(
-                [out]
+                [h]
                 + [data[f"deup_{k}"][:, None].float() for k in self.deup_features],
                 dim=-1,
             )
diff --git a/ocpmodels/models/faenet.py b/ocpmodels/models/faenet.py
index 7d5c6044d..78b9980cd 100644
--- a/ocpmodels/models/faenet.py
+++ b/ocpmodels/models/faenet.py
@@ -711,7 +711,7 @@ def energy_forward(self, data, q=None):
                 edge_attr = edge_attr[edge_mask]
                 rel_pos = rel_pos[edge_mask]
 
-        if q is None:
+        if not hasattr(data, "deup_q"):
             # Embedding block
             h, e = self.embed_block(z, rel_pos, edge_attr, data.tags)
 
@@ -754,6 +754,7 @@ def energy_forward(self, data, q=None):
             # WARNING
             # q which is NOT the hidden state h if it was stored as a scattered
             # version of h. This works for GPs, NOT for MC-dropout
+            q = data.deup_q   # No need to clone # TODO: check that it's not a problem (move to deup models)
             h = q
             alpha = None
 
@@ -766,7 +767,8 @@ def energy_forward(self, data, q=None):
         elif self.skip_co == "add":
             energy = sum(energy_skip_co)
 
-        if q and len(q) > len(energy):
+        # Store graph-level representation. # TODO: maybe want node-level rep
+        if q is not None and len(q) > len(energy):  # N_atoms x hidden_channels
             q = scatter(q, batch, dim=0, reduce="mean")  # N_graphs x hidden_channels
 
         preds = {

From ae7b17559354e3cc5e3af29bee4b363a32c7c987 Mon Sep 17 00:00:00 2001
From: AlexDuvalinho <afonsalex@free.fr>
Date: Mon, 20 May 2024 06:21:08 -0400
Subject: [PATCH 24/27] random instead of randon in yaml

---
 configs/models/faenet.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configs/models/faenet.yaml b/configs/models/faenet.yaml
index 3e66bba5f..97d113632 100644
--- a/configs/models/faenet.yaml
+++ b/configs/models/faenet.yaml
@@ -1,6 +1,6 @@
 default:
   frame_averaging: "" # {"2D", "3D", "DA", ""}
-  fa_method: "" # {"", all, randon, det, se3-all, se3-randon, se3-det}
+  fa_method: "" # {"", all, random, det, se3-all, se3-random, se3-det}
   model:
     name: faenet
     act: swish
@@ -69,7 +69,7 @@ is2re:
   default:
     graph_rewiring: remove-tag-0
     frame_averaging: "2D" # {"2D", "3D", "DA", ""}
-    fa_method: "se3-random" # {"", all, randon, det, se3-all, se3-randon, se3-det}
+    fa_method: "se3-random" # {"", all, random, det, se3-all, se3-random, se3-det}
   # *** Important note ***
   #   The total number of gpus used for this run was 1.
   #   If the global batch size (num_gpus * batch_size) is modified

From 7c2714cc51c35a09875c577c647371d8e9c7634c Mon Sep 17 00:00:00 2001
From: AlexDuvalinho <afonsalex@free.fr>
Date: Mon, 20 May 2024 06:23:13 -0400
Subject: [PATCH 25/27] random, not stochastic

---
 ocpmodels/datasets/data_transforms.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ocpmodels/datasets/data_transforms.py b/ocpmodels/datasets/data_transforms.py
index 17a63dfa5..db01cbb1e 100644
--- a/ocpmodels/datasets/data_transforms.py
+++ b/ocpmodels/datasets/data_transforms.py
@@ -41,11 +41,11 @@ class FrameAveraging(Transform):
             Can be 2D FA, 3D FA, Data Augmentation or no FA, respectively denoted by
             (`"2D"`, `"3D"`, `"DA"`, `""`)
         fa_method (str): the actual frame averaging technique used.
-            "stochastic" refers to sampling one frame at random (at each epoch), "det"
+            "random" refers to sampling one frame at random (at each epoch), "det"
             to chosing deterministically one frame, and "all" to using all frames. The
             prefix "se3-" refers to the SE(3) equivariant version of the method. ""
-            means that no frame averaging is used. (`""`, `"stochastic"`, `"all"`,
-            `"det"`, `"se3-stochastic"`, `"se3-all"`, `"se3-det"`)
+            means that no frame averaging is used. (`""`, `"random"`, `"all"`,
+            `"det"`, `"se3-random"`, `"se3-all"`, `"se3-det"`)
 
     Returns:
         (data.Data): updated data object with new positions (+ unit cell) attributes

From b006540fe4027a8a82854fbd3b1901411908c234 Mon Sep 17 00:00:00 2001
From: AlexDuvalinho <afonsalex@free.fr>
Date: Tue, 21 May 2024 09:59:08 -0400
Subject: [PATCH 26/27] signnet analysis (workshop submission)

---
 scripts/signnet.py | 116 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 116 insertions(+)
 create mode 100644 scripts/signnet.py

diff --git a/scripts/signnet.py b/scripts/signnet.py
new file mode 100644
index 000000000..9d1dfb843
--- /dev/null
+++ b/scripts/signnet.py
@@ -0,0 +1,116 @@
+import sys
+from pathlib import Path
+import torch
+
+sys.path.append(str(Path(__file__).resolve().parent.parent))
+
+from ocpmodels.common.utils import make_script_trainer
+from ocpmodels.trainers import SingleTrainer
+from torch_geometric.data import Batch
+
+if __name__ == "__main__":
+    config = {}
+
+    # Customize args
+    config["graph_rewiring"] = "remove-tag-0"
+    config["frame_averaging"] = "3D"
+    config["fa_method"] = "all"
+    config["test_ri"] = False
+    # config["optim"] = {"batch_size": 1}
+
+    str_args = sys.argv[1:]
+    if all("config" not in arg for arg in str_args):
+        str_args.append("--is_debug")
+        str_args.append("--config=faenet-is2re-10k")
+
+    # Create trainer
+    trainer: SingleTrainer = make_script_trainer(str_args=str_args, overrides=config)
+
+    for batch in trainer.loaders["train"]:
+        break
+    b = batch[0]
+    rotated_b = b.clone()
+    rotated_b = trainer.rotate_graph(rotated_b, rotation="z")
+    rotation_matrix = rotated_b["rot"]
+    rotated_b = rotated_b["batch_list"][0]
+
+    # Check: X' = X R (or X = X' R^T)
+    assert torch.allclose(rotated_b[0].pos @ rotation_matrix.T, b[0].pos, atol=1e-04)
+    assert torch.allclose(b[0].pos @ rotation_matrix, rotated_b[0].pos, atol=1e-04)
+    # Check: X U_i = X' U_i (compare X_fa and X'fa, abs values to deal with different frames)
+    assert torch.allclose(
+        torch.abs(b[0].pos @ b[0].fa_rot[0].squeeze(0)),
+        torch.abs(rotated_b[0].pos @ rotated_b[0].fa_rot[0].squeeze(0)),
+        atol=10e-03,
+    )
+    # Check: U_i' = R U_i
+
+    # SignNet model
+    class SignNet(torch.nn.Module):
+        def __init__(self, in_channels=3, hidden_channels=12, out_channels=3):
+            super(SignNet, self).__init__()
+            self.mlp = torch.nn.Sequential(
+                torch.nn.Linear(in_channels, hidden_channels),
+                torch.nn.ReLU(),
+                torch.nn.Linear(hidden_channels, out_channels),
+            )
+            torch.nn.init.xavier_uniform_(self.mlp[0].weight)
+            torch.nn.init.xavier_uniform_(self.mlp[2].weight)
+            self.mlp2 = torch.nn.Linear(3 * out_channels, 3 * out_channels)
+
+            torch.nn.init.xavier_uniform_(self.mlp2.weight)
+
+        def forward(self, x, second_mlp=False):
+            if second_mlp:
+                res = self.mlp(x) + self.mlp(-x)
+                res = res.view(-1)  # flatten res
+                res = self.mlp2(res)
+                return res.view((3, -1)).T  # reshape as eigenvector column matrix
+            return (self.mlp(x) + self.mlp(-x)).T
+
+    signnet = SignNet()
+    second_mlp = True
+
+    for i in range(len(b.sid)):
+        g = Batch.get_example(b, i)
+        rotated_g = Batch.get_example(rotated_b, i)
+
+        # Test: X_fa = R X_fa'
+        torch.allclose(rotation_matrix @ rotated_g.fa_rot[0], g.fa_rot[0], atol=5e-01)
+
+        # SignNet on eigenvector matrix U for g and rotated_g
+        # Need SignNet(U_i) = U*, for every frame U_i
+        # Eigenvectors are the columns of fa_rot. Need rows for SignNet MLPs
+        eigen = signnet(g.fa_rot[0].squeeze(0).T, second_mlp)
+        eigen_bis = signnet(g.fa_rot[1].squeeze(0).T, second_mlp)
+        assert torch.allclose(eigen, eigen_bis, atol=1e-04)
+
+        # Compare with rotated graph
+        rot_eigen = signnet(rotated_g.fa_rot[0].squeeze(0).T, second_mlp)
+        # Check U*' = R U*
+        if torch.allclose(rot_eigen, eigen, atol=1e-4):
+            print("U* is invariant to rotations")
+        elif torch.allclose(rot_eigen, rotation_matrix @ eigen, atol=1e-4):
+            print("U* is equivariant to rotations")
+        else:
+            print("U* is neither invariant nor equivariant")
+        # Double-Check: X U* = X' U*'
+        new_pos = g.pos @ eigen
+        new_rotated_pos = rotated_g.pos @ rot_eigen
+        if not torch.allclose(new_pos, new_rotated_pos, atol=1e-4):
+            print("No equivariance: X U* != X' U*'")
+
+        # Different eigenvalues matrix => want different U*
+        m = g.fa_rot[0].squeeze(0).T + torch.randn(3, 3)
+        e = signnet(m, second_mlp)
+        if torch.allclose(e, eigen, atol=1e-4):
+            print("Issue: distinct graph has same signnet eigenvectors")
+        
+        # Same but on real eigenvec matrix
+        next_g = Batch.get_example(b, i+1)
+        e = signnet(next_g.fa_rot[0].squeeze(0).T, second_mlp)
+        if torch.allclose(e, eigen, atol=1e-4):
+            print("Issue: distinct graph has same signnet eigenvectors")
+
+        # Try with more complex network
+        # Repalce False by True in signnet above

From cc503353961b78ea83294c9d72444702bec169b4 Mon Sep 17 00:00:00 2001
From: AlexDuvalinho <afonsalex@free.fr>
Date: Thu, 31 Oct 2024 14:42:47 -0400
Subject: [PATCH 27/27] denormalise predictions

---
 ocpmodels/common/gfn.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/ocpmodels/common/gfn.py b/ocpmodels/common/gfn.py
index 0a1f45521..0e4ba7389 100644
--- a/ocpmodels/common/gfn.py
+++ b/ocpmodels/common/gfn.py
@@ -20,6 +20,7 @@ def __init__(
         transform: Callable = None,
         frame_averaging: str = None,
         trainer_config: dict = None,
+        normalizers: dict = None,
     ):
         """
         `FAENetWrapper` is a wrapper class for the FAENet model. It is used to perform
@@ -31,6 +32,7 @@ def __init__(
             frame_averaging (str, optional): The frame averaging method to use.
             trainer_config (dict, optional): The trainer config used to create the model.
                 Defaults to None.
+            normalizers (dict, optional): The normalizers used to create the model.
         """
         super().__init__()
 
@@ -39,6 +41,7 @@ def __init__(
         self.frame_averaging = frame_averaging
         self.trainer_config = trainer_config
         self._is_frozen = None
+        self.normalizers = normalizers
 
     @property
     def frozen(self):
@@ -165,7 +168,15 @@ def forward(
 
         if retrieve_hidden:
             return preds
-        return preds["energy"]  # denormalize?
+        breakpoint()
+
+        # Denormalize predictions
+        preds["energy"] = self.normalizers["target"].denorm(
+                    preds["energy"],
+        )
+        # preds["energy"] = preds["energy"].to(torch.float16)
+
+        return preds["energy"]
 
     def freeze(self):
         """Freeze the model parameters."""
@@ -274,6 +285,7 @@ def prepare_for_gfn(ckpt_paths: dict, release: str) -> tuple:
         transform=get_transforms(trainer.config),
         frame_averaging=trainer.config.get("frame_averaging", ""),
         trainer_config=trainer.config,
+        normalizers=trainer.normalizers,
     )
     wrapper.freeze()
     loaders = trainer.loaders
@@ -288,10 +300,10 @@ def prepare_for_gfn(ckpt_paths: dict, release: str) -> tuple:
     from ocpmodels.common.gfn import prepare_for_gfn
 
     ckpt_paths = {"mila": "/path/to/releases_dir"}
-    release = "v2.3_graph_phys"
+    release = "0.0.1"
     # or
     ckpt_paths = {
-        "mila": "/network/scratch/s/schmidtv/ocp/runs/3789733/checkpoints/best_checkpoint.pt"
+        "mila": "/network/scratch/a/alexandre.duval/ocp/catalyst-ckpts/0.0.1/best_checkpoint.pt"
     }
     release = None
     wrapper, loaders = prepare_for_gfn(ckpt_paths, release)