diff --git a/Makefile b/Makefile index d76f9d6..f37f55e 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,6 @@ ECR_URL_DEV:=222053980223.dkr.ecr.us-east-1.amazonaws.com/browsertrix-harvester- ### End of Terraform-generated header ### SHELL=/bin/bash DATETIME:=$(shell date -u +%Y%m%dT%H%M%SZ) -CURRENT_DATE=$(date +"%Y-%m-%d") ### Dependency commands ### install: # install python dependencies @@ -67,13 +66,14 @@ run-harvest-local: # Test Dev1 harvest run-harvest-dev: - CRAWL_NAME=test-harvest-ecs-$CURRENT_DATE aws ecs run-task \ + CRAWL_NAME=test-harvest-ecs-$(DATETIME); \ + aws ecs run-task \ --cluster timdex-dev \ --task-definition timdex-browsertrixharvester-dev \ --launch-type="FARGATE" \ --region us-east-1 \ --network-configuration '{"awsvpcConfiguration": {"subnets": ["subnet-0488e4996ddc8365b","subnet-022e9ea19f5f93e65"], "securityGroups": ["sg-044033bf5f102c544"]}}' \ - --overrides '{"containerOverrides": [ {"name":"browsertrix-harvester", "command": ["--verbose", "harvest", "--crawl-name", "'"$CRAWL_NAME"'", "--config-yaml-file", "/browsertrix-harvester/tests/fixtures/lib-website-homepage.yaml", "--metadata-output-file", "s3://timdex-extract-dev-222053980223/librarywebsite/'"$CRAWL_NAME"'.xml", "--wacz-output-file", "s3://timdex-extract-dev-222053980223/librarywebsite/'"$CRAWL_NAME"'.wacz", "--num-workers", "2"]}]}' + --overrides '{"containerOverrides": [ {"name":"browsertrix-harvester", "command": ["--verbose", "harvest", "--crawl-name", "'$$CRAWL_NAME'", "--config-yaml-file", "/browsertrix-harvester/tests/fixtures/lib-website-homepage.yaml", "--metadata-output-file", "s3://timdex-extract-dev-222053980223/librarywebsite/'$$CRAWL_NAME'.xml", "--wacz-output-file", "s3://timdex-extract-dev-222053980223/librarywebsite/'$$CRAWL_NAME'.wacz", "--num-workers", "2"]}]}' # Test local URL content parsing test-parse-url-content: diff --git a/README.md b/README.md index b043361..ffc209c 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ make run-harvest-dev ``` * Set AWS credentials are required in calling context * Kicks off an ECS Fargate task in Dev1 - * WACZ file and metadata file are written to S3 at `timdex-extract-dev-222053980223/librarywebsite/test-harvest-ecs-$CURRENT_DATE.xml|wacz` + * WACZ file and metadata file are written to S3 at `timdex-extract-dev-222053980223/librarywebsite/test-harvest-ecs-.xml|wacz` ## CLI commands diff --git a/bin/test-harvest-ecs-dev1.sh b/bin/test-harvest-ecs-dev1.sh deleted file mode 100755 index 3542415..0000000 --- a/bin/test-harvest-ecs-dev1.sh +++ /dev/null @@ -1,13 +0,0 @@ -# extract date -export CURRENT_DATE=$(date +"%Y-%m-%d") -export CRAWL_NAME=test-harvest-ecs-$CURRENT_DATE -echo "Invoking test crawl as ECS task in Dev1 named: $CRAWL_NAME" - -# invoke ECS task -aws ecs run-task \ ---cluster timdex-dev \ ---task-definition timdex-browsertrixharvester-dev \ ---launch-type="FARGATE" \ ---region us-east-1 \ ---network-configuration '{"awsvpcConfiguration": {"subnets": ["subnet-0488e4996ddc8365b","subnet-022e9ea19f5f93e65"], "securityGroups": ["sg-044033bf5f102c544"]}}' \ ---overrides '{"containerOverrides": [ {"name":"browsertrix-harvester", "command": ["--verbose", "harvest", "--crawl-name", "'"$CRAWL_NAME"'", "--config-yaml-file", "/browsertrix-harvester/tests/fixtures/lib-website-homepage.yaml", "--metadata-output-file", "s3://timdex-extract-dev-222053980223/librarywebsite/'"$CRAWL_NAME"'.xml", "--wacz-output-file", "s3://timdex-extract-dev-222053980223/librarywebsite/'"$CRAWL_NAME"'.wacz", "--num-workers", "2"]}]}' \ No newline at end of file