diff --git a/configs/local_setup_comet.yml b/configs/local_setup_comet.yml new file mode 100644 index 000000000..9f898b265 --- /dev/null +++ b/configs/local_setup_comet.yml @@ -0,0 +1,33 @@ +# Suggested data paths when using GPT-NeoX locally +{ + "data_path": "/workspace/gpt-neox-main/data/enwik8/enwik8_text_document", + + # or for weighted datasets: + # "train-data-paths": ["data/enwik8/enwik8_text_document", "data/enwik8/enwik8_text_document"], + # "test-data-paths": ["data/enwik8/enwik8_text_document", "data/enwik8/enwik8_text_document"], + # "valid-data-paths": ["data/enwik8/enwik8_text_document", "data/enwik8/enwik8_text_document"], + # "train-data-weights": [1., 2.], + # "test-data-weights": [2., 1.], + # "valid-data-weights": [0.5, 0.4], + + # If weight_by_num_documents is True, Builds dataset weights from a multinomial distribution over groups of data according to the number of documents in each group. + # WARNING: setting this to True will override any user provided weights + # "weight_by_num_documents": false, + # "weighted_sampler_alpha": 0.3, + + "vocab_file": "/workspace/gpt-neox-main/data/gpt2-vocab.json", + "merge_file": "/workspace/gpt-neox-main/data/gpt2-merges.txt", + + "save": "checkpoints", + "load": "checkpoints", + "checkpoint_validation_with_forward_pass": False, + + "tensorboard_dir": "tensorboard", + "log_dir": "logs", + "use_comet": True, + "comet_workspace": "test_workspace", # CHANGE ME + "comet_project": "test_project", + "comet_experiment_name": "test_experiment", + "comet_tags": ["test_tag1", "test_tag2"], + "comet_others": {"test_others"}, +}