Skip to content

Commit

Permalink
add verbose mode
Browse files Browse the repository at this point in the history
  • Loading branch information
joel99 committed May 29, 2024
1 parent 1547066 commit 065dc98
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 13 deletions.
21 changes: 14 additions & 7 deletions decoder_demos/ndt2_sample.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,28 @@ ENV EVALUATION_LOC remote
# Note that Docker cannot easily import across symlinks, make sure data is not symlinked

# H1
# ADD ./local_data/ndt2_h1_sample_nokey.pth data/decoder.pth
# ADD ./local_data/ndt2_h1_sample.pth data/decoder.pth
# ADD ./local_data/ndt2_zscore_h1.pt data/zscore.pt
# ENV SPLIT "h1"
# ENV CONFIG_STEM falcon/h1/h1_100

# M1
ADD ./local_data/ndt2_m1_sample_continual.pth data/decoder.pth
ADD ./local_data/ndt2_zscore_m1.pt data/zscore.pt
ENV SPLIT "m1"
ENV CONFIG_STEM falcon/m1/m1_100

# M2
ADD ./local_data/ndt2_m2_sample_continual.pth data/decoder.pth
ADD ./local_data/ndt2_zscore_m2.pt data/zscore.pt
# # M2
# ADD ./local_data/ndt2_m2_sample_continual.pth data/decoder.pth
# ADD ./local_data/ndt2_zscore_m2.pt data/zscore.pt
# ENV SPLIT "m2"
# ENV CONFIG_STEM falcon/m2/m2_100

# Add runfile
RUN pwd
ADD ./decoder_demos/ndt2_sample.py decode.py
ADD ./decoder_demos/ndt2_decoder.py ndt2_decoder.py

ENV SPLIT "h1"
ENV BATCH_SIZE 16
ENV PHASE "test"

# Make sure this matches the mounted data volume path. Generally leave as is.
Expand All @@ -50,4 +57,4 @@ ENV EVAL_DATA_PATH "/dataset/evaluation_data"
# CMD specifies a default command to run when the container is launched.
# It can be overridden with any cmd e.g. sudo docker run -it my_image /bin/bash
CMD ["/bin/bash", "-c", \
"python decode.py --evaluation $EVALUATION_LOC --model-path data/decoder.pth --zscore-path data/zscore.pt --split $SPLIT --phase $PHASE"]
"python decode.py --evaluation $EVALUATION_LOC --model-path data/decoder.pth --config-stem $CONFIG_STEM --zscore-path data/zscore.pt --split $SPLIT --batch-size $BATCH_SIZE --phase $PHASE"]
2 changes: 1 addition & 1 deletion decoder_demos/ndt2_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def main():
task = getattr(FalconTask, args.split)
config = FalconConfig(task=task)
max_bins = 50 if task in [FalconTask.m1, FalconTask.m2] else 200 # h1

decoder = NDT2Decoder(
task_config=config,
model_ckpt_path=args.model_path,
Expand Down
20 changes: 16 additions & 4 deletions falcon_challenge/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ def evaluate(
mask_dict['held_out'].append(dataset_mask)
else:
raise ValueError(f"Dataset {dataset} submitted but not found in held-in or held-out list of split {datasplit}.")

for in_or_out in pred_dict:
if len(pred_dict[in_or_out]) < len(DATASET_HELDINOUT_MAP[datasplit][in_or_out]):
raise ValueError(f"Missing predictions for {datasplit} {in_or_out}. User submitted: {user_submission[datasplit].keys()}. Expecting more like: {HELDIN_OR_OUT_MAP[datasplit][in_or_out]}.")
Expand Down Expand Up @@ -312,13 +313,17 @@ def simple_collater(batch, task):

class FalconEvaluator:

def __init__(self, eval_remote=False, split='h1'):
def __init__(self, eval_remote=False, split='h1', verbose=False):
r"""
verbose: Print out dataset specific metrics for movement tasks.
"""
self.eval_remote = eval_remote
assert split in ['h1', 'h2', 'm1', 'm2'], "Split must be h1, h2, m1, or m2."
if split in ['h1', 'm1', 'm2']:
self.continual = True
else:
self.continual = False
self.verbose = verbose
self.dataset: FalconTask = getattr(FalconTask, split)
self.cfg = FalconConfig(self.dataset)

Expand Down Expand Up @@ -554,9 +559,9 @@ def evaluate(
else:
for k, v in metrics.items():
logger.info("{}: {}".format(k, v))

@staticmethod
def compute_metrics_regression(preds, targets, eval_mask, dset_lens):
def compute_metrics_regression(preds, targets, eval_mask, dset_lens, verbose=False): # Verbose drop-in
dset_lens = np.cumsum([sum(dset_lens[key]) for key in sorted(dset_lens.keys())])
masked_points = np.cumsum(~eval_mask)
dset_lens = [0] + [dset_len - masked_points[dset_len - 1] for dset_len in dset_lens]
Expand All @@ -566,11 +571,18 @@ def compute_metrics_regression(preds, targets, eval_mask, dset_lens):
raise ValueError(f"Targets and predictions have different lengths: {targets.shape[0]} vs {preds.shape[0]}.")
r2_scores = [r2_score(targets[dset_lens[i]:dset_lens[i+1]], preds[dset_lens[i]:dset_lens[i+1]],
multioutput='variance_weighted') for i in range(len(dset_lens) - 1)]
if verbose:
dsets = sorted(dset_lens.keys())
print([f'{k}: {r2}' for k, r2 in zip(dsets, r2_scores)])
preds_dict = {k: preds[dset_lens[i]:dset_lens[i+1]] for i, k in enumerate(dsets)}
with open('preds.pkl', 'wb') as f:
pickle.dump(preds_dict, f)
return {
"R2 Mean": np.mean(r2_scores),
"R2 Std.": np.std(r2_scores)
}


@staticmethod
def compute_metrics_edit_distance(preds, targets, eval_mask):
if len(preds) != len(targets):
Expand Down Expand Up @@ -609,7 +621,7 @@ def compute_metrics(self, all_preds, all_targets, all_eval_mask=None):
all_eval_mask: array of shape (n_timesteps, k_dim). True if we should evaluate this timestep.
"""
if self.dataset in [FalconTask.h1, FalconTask.m1, FalconTask.m2]:
metrics = self.compute_metrics_regression(all_preds, all_targets, all_eval_mask)
metrics = self.compute_metrics_regression(all_preds, all_targets, all_eval_mask, verbose=self.verbose)
elif self.dataset in [FalconTask.h2]:
metrics = self.compute_metrics_edit_distance(all_preds, all_targets, all_eval_mask)
else:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name='falcon_challenge',
version='0.3.9',
version='0.3.10',

url='https://github.com/snel-repo/stability-benchmark',
author='Joel Ye',
Expand Down
1 change: 1 addition & 0 deletions test_docker_local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,5 @@ done
docker run \
-v $(pwd)/data:/dataset/evaluation_data \
-e "EVALUATION_LOC=local" \
--gpus all \
${DOCKER_NAME}\

0 comments on commit 065dc98

Please sign in to comment.