From 5cf0a8caec8a935e85b2e145b906824d4e97b5d6 Mon Sep 17 00:00:00 2001 From: algo7 <11154774+algo7@users.noreply.github.com> Date: Sat, 9 Dec 2023 08:19:13 +0100 Subject: [PATCH 1/3] Add Proxy Pool section to the readme.md --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 14aa412..0b83d1e 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ A simple scraper for TripAdvisor reviews. - [Run the container provisioner](#run-the-container-provisioner) - [Visit the UI](#visit-the-ui) - [Live Demo](#live-demo) +- [Proxy Pool](#proxy-pool) ## How to Install Docker: 1. [Windows](https://docs.docker.com/desktop/windows/install/) @@ -83,4 +84,6 @@ The `docker-compose.yml` for the provisioner is located in the `container_provis The UI is accessible at `http://localhost:3000`. ## Live Demo -A live demo of the container provisioner is available at [https://algo7.tools](https://algo7.tools). \ No newline at end of file +A live demo of the container provisioner is available at [https://algo7.tools](https://algo7.tools). + +# Proxy Pool From 9c2a7509adbd01e3119ca23c0cf63e3286d000d4 Mon Sep 17 00:00:00 2001 From: algo7 <11154774+algo7@users.noreply.github.com> Date: Sat, 9 Dec 2023 08:20:03 +0100 Subject: [PATCH 2/3] Update VPN worker image in docker-compose file --- proxy_pool/docker-compose-dev.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/proxy_pool/docker-compose-dev.yml b/proxy_pool/docker-compose-dev.yml index 752eca7..e13bafb 100644 --- a/proxy_pool/docker-compose-dev.yml +++ b/proxy_pool/docker-compose-dev.yml @@ -1,7 +1,7 @@ version: '3.9' services: vpnch: - image: su:latest + image: ghcr.io/algo7/tripadvisor-review-scraper/vpn_worker:latest labels: # This label is used by the container_provisioner to identify the containers that are part of the proxy pool. - 'TaskOwner=PROXY' @@ -44,7 +44,7 @@ services: create_host_path: true vpnse: - image: su:latest + image: ghcr.io/algo7/tripadvisor-review-scraper/vpn_worker:latest labels: - 'TaskOwner=PROXY' - 'vpn.region=SE' @@ -79,7 +79,7 @@ services: create_host_path: true vpnuk: - image: su:latest + image: ghcr.io/algo7/tripadvisor-review-scraper/vpn_worker:latest labels: - 'TaskOwner=PROXY' - 'vpn.region=UK' @@ -114,7 +114,7 @@ services: create_host_path: true vpnbe: - image: su:latest + image: ghcr.io/algo7/tripadvisor-review-scraper/vpn_worker:latest labels: - 'TaskOwner=PROXY' - 'vpn.region=BE' From 67802b604edcda5722e67b1d98ce85596c948afc Mon Sep 17 00:00:00 2001 From: algo7 <11154774+algo7@users.noreply.github.com> Date: Sat, 9 Dec 2023 08:32:55 +0100 Subject: [PATCH 3/3] Add Proxy Pool service doc to readme --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index 0b83d1e..d7dd086 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ A simple scraper for TripAdvisor reviews. - [Visit the UI](#visit-the-ui) - [Live Demo](#live-demo) - [Proxy Pool](#proxy-pool) + - [Running the Proxy Pool](#running-the-proxy-pool) ## How to Install Docker: 1. [Windows](https://docs.docker.com/desktop/windows/install/) @@ -87,3 +88,15 @@ The UI is accessible at `http://localhost:3000`. A live demo of the container provisioner is available at [https://algo7.tools](https://algo7.tools). # Proxy Pool +Proxy Pool is a docker image that runs both HTTP and SOCKS5 Proxies over OpenVPN (config to be provided by the user via docker bind mounts). `sockd`, `squid`, and `openvpn` client are managed by `supervisord` in the container. The service integrates with the Container Provisioner to provide a pool of proxies for the scraper to use. The container provisioner uses `docker-compose labels` to distinguish between different proxies. At this moment, the container provisioner only supports connecting to the Proxy Pool using HTTP proxies. Each service in the `docker-compose.yml` file represents a single proxy in the pool. The `docker-compose.yml` file for the proxy pool is located in the `proxy_pool` folder. + +The Proxy Pool service can also be used directly with the scraper. Just make sure that the `PROXY_ADDRESS` environment variable is in the `docker-compose.yml` file for the scraper. + +## Running the Proxy Pool +1. Pull the latest scraper Docker image +```bash +docker pull ghcr.io/algo7/tripadvisor-review-scraper/vpn_worker:latest +``` +2. Create a docker-compose.yml file containing the configurations for each proxy (see the docker-compose.yml provided in the proxy_pool folder). +3. Place the OpenVPN config file of each proxy in the corresponding bind mount folder speicified in the docker-compose.yml file. +4. Run `docker-compose up` to start the container. \ No newline at end of file