diff --git a/deployment/gcp/README.md b/deployment/gcp/README.md index bbc8b1c..aa05a17 100644 --- a/deployment/gcp/README.md +++ b/deployment/gcp/README.md @@ -100,12 +100,17 @@ Remember to change the `crawl.yaml` to point to `image: gcr.io/$PROJECT/openwpm` ## Deploy the redis server which we use for the work queue -This will launch a 1GB Basic tier Google Cloud Memorystore for Redis instance ($0.049/GB/hour): +Launch a 1GB Basic tier Google Cloud Memorystore for Redis instance ($0.049/GB/hour): ``` gcloud redis instances create crawlredis --size=1 --region=us-central1 --redis-version=redis_4_0 ``` -Next, use the following output: +Launch a temporary redis-box pod deployed to the cluster which we use to interact with the above Redis instance: +``` +kubectl apply -f redis-box.yaml +``` + +Use the following output: ``` gcloud redis instances describe crawlredis --region=us-central1 ``` @@ -178,9 +183,8 @@ Note that for the remainder of these instructions, `metadata.name` is assumed to #### Queue status -Open a temporary instance and launch redis-cli: +Launch redis-cli: ``` -kubectl apply -f redis-box.yaml kubectl exec -it redis-box -- sh -c "redis-cli -h $REDIS_HOST" ``` diff --git a/deployment/load_site_list_into_redis.sh b/deployment/load_site_list_into_redis.sh index a818e27..5489fb8 100755 --- a/deployment/load_site_list_into_redis.sh +++ b/deployment/load_site_list_into_redis.sh @@ -24,7 +24,6 @@ echo "DEL $REDIS_QUEUE_NAME:processing" >> joblist.txt # awk #1 = Add the RPUSH command with the site value within single quotes cat "$SITE_LIST_CSV" | sed '1!G;h;$!d' | sed "s/'/\\\'/g" | awk -F ',' 'FNR > 0 {print "RPUSH '$REDIS_QUEUE_NAME' '\''"$1","$2"'\''"}' >> joblist.txt -kubectl apply -f redis-box.yaml kubectl cp joblist.txt redis-box:/tmp/joblist.txt kubectl exec redis-box -- sh -c "cat /tmp/joblist.txt | redis-cli -h $REDIS_HOST --pipe"