forked from allenai/dolma
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Makefile
57 lines (47 loc) · 2.24 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
UNAME := $(shell uname)
ifeq ($(UNAME), Darwin)
OS_MESSAGE := "MacOS detected"
CMAKE_SETUP := "which cmake || brew install cmake"
PROTOBUF_SETUP := "which protoc || brew install protobuf"
OPENSSL_SETUP := "which openssl || brew install openssl"
else ifeq ($(UNAME), Linux)
OS_MESSAGE := "Linux detected"
CMAKE_SETUP := "which cmake || sudo apt-get install --yes build-essential cmake"
PROTOBUF_SETUP := "which protoc || sudo apt-get install --yes protobuf-compiler"
OPENSSL_SETUP := "which openssl || sudo apt-get install --yes libssl-dev"
else
OS_MESSAGE := "Unsupported OS; please install rust, cmake, protobuf, and openssl manually"
CMAKE_SETUP := ""
PROTOBUF_SETUP := ""
OPENSSL_SETUP := ""
endif
setup:
@echo "${OS_MESSAGE}: installing..."
$(shell "${CMAKE_SETUP}")
$(shell "${PROTOBUF_SETUP}")
$(shell "${OPENSSL_SETUP}")
which cargo || curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
which maturin || pip install maturin
publish:
maturin publish
test: setup develop setup-test test-python test-rust clean-test
test-python:
pytest -vs tests/python
test-rust:
cargo test -- --nocapture
clean-test:
rm -rf tests/work/*
aws s3 rm --recursive s3://ai2-llm/pretraining-data/tests/mixer/
setup-test:
aws s3 cp tests/data/documents.json.gz s3://ai2-llm/pretraining-data/tests/mixer/inputs/v0/documents/head/0000.json.gz
aws s3 cp tests/data/pii-attributes.json.gz s3://ai2-llm/pretraining-data/tests/mixer/inputs/v0/attributes/pii/head/0000.json.gz
aws s3 cp tests/data/toxicity-attributes.json.gz s3://ai2-llm/pretraining-data/tests/mixer/inputs/v0/attributes/toxicity/head/0000.json.gz
aws s3 cp tests/data/sample-attributes.json.gz s3://ai2-llm/pretraining-data/tests/mixer/inputs/v0/attributes/sample/head/0000.json.gz
aws s3 cp tests/data/duplicate-paragraphs.json.gz s3://ai2-llm/pretraining-data/tests/mixer/inputs/v0/attributes/duplicate_paragraphs/head/0000.json.gz
aws s3 sync tests/data/expected s3://ai2-llm/pretraining-data/tests/mixer/expected --exclude ".*" --exclude "*/.*"
develop:
maturin develop --extras=dev
style:
rustfmt --edition 2021 src/*.rs
autopep8 --in-place --recursive python/ && isort python/ && black python/
autopep8 --in-place --recursive tests/python/ && isort tests/python/ && black tests/python/