diff --git a/.github/workflows/mysql-to-es-batch-job.yml b/.github/workflows/mysql-to-es-batch-job.yml new file mode 100644 index 0000000..e48e808 --- /dev/null +++ b/.github/workflows/mysql-to-es-batch-job.yml @@ -0,0 +1,33 @@ +name: Mysql To ElasticSearch Batch Job + +on: + workflow_dispatch: + push: + branches: + - main + schedule: + - cron: '0 2 * * *' + +jobs: + batch-job: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Docker Build And Push + run: | + docker login -u ${{ secrets.USERNAME }} -p ${{ secrets.PASSWORD }} + docker build -f mysql-to-es.Dockerfile -t skku-mysql-to-es-batch . + docker tag skku-mysql-to-es-batch sinkyoungdeok/skku-mysql-to-es-batch + docker push sinkyoungdeok/skku-mysql-to-es-batch + - name: Deploy Prod + uses: appleboy/ssh-action@v0.1.4 + with: + key: ${{ secrets.SSH_KEY }} + host: ${{ secrets.HOST_NAME }} + username: ubuntu + port: 22 + script: | + docker login -u ${{ secrets.USERNAME }} -p ${{ secrets.PASSWORD }} + docker pull sinkyoungdeok/skku-mysql-to-es-batch + + docker run --net ubuntu_default sinkyoungdeok/skku-mysql-to-es-batch \ No newline at end of file diff --git a/csv-to-es.py b/csv-to-es.py index 53e425c..2999d6f 100644 --- a/csv-to-es.py +++ b/csv-to-es.py @@ -31,9 +31,12 @@ "id": {"type": "long"}, "name": {"type": "text", "analyzer": "korean"}, "original_category": {"type": "text", "analyzer": "korean"}, - "naver_review_count": {"type": "long"}, "address": {"type": "text", "analyzer": "korean"}, - "naver_rating": {"type": "float"}, + "naver_review_count": {"type": "long"}, + "naver_rating_avg": {"type": "float"}, + "review_count": {"type": "long"}, + "rating_avg": {"type": "float"}, + "like_count": {"type": "long"}, "number": {"type": "text"}, "image_url": {"type": "text"}, "category": {"type": "text", "analyzer": "korean"}, @@ -91,7 +94,7 @@ "original_category": row['category'], "naver_review_count": row['review_count'].replace('+', ''), "address": row['address'], - "naver_rating": rating, + "naver_rating_avg": rating, "number": number, "image_url": restaurant_image_url, "category": row['custom_category'], @@ -102,14 +105,14 @@ data.pop("discount_content") if data.get("naver_review_count") is None: data.pop("naver_review_count") - if data.get("naver_rating") is None: - data.pop("naver_rating") + if data.get("naver_rating_avg") is None: + data.pop("naver_rating_avg") if data.get("number") is None: data.pop("number") if data.get("image_url") is None: data.pop("image_url") - response = es.index(index=index_name, id=row['name'], document=data) + response = es.index(index=index_name, id=row['id'], document=data) print(f"Indexed document ID: {response['_id']}, Result: {response['result']}") # 앨리어스 확인 및 설정 diff --git a/mysql-to-es-requirements.txt b/mysql-to-es-requirements.txt new file mode 100644 index 0000000..f676724 --- /dev/null +++ b/mysql-to-es-requirements.txt @@ -0,0 +1,2 @@ +mysql-connector-python +elasticsearch \ No newline at end of file diff --git a/mysql-to-es.Dockerfile b/mysql-to-es.Dockerfile new file mode 100644 index 0000000..06d4f3a --- /dev/null +++ b/mysql-to-es.Dockerfile @@ -0,0 +1,8 @@ +FROM python:3.8-slim + +COPY mysql-to-es-requirements.txt mysql-to-es-requirements.txt +COPY mysql-to-es.py mysql-to-es.py + +RUN pip install -r mysql-to-es-requirements.txt + +ENTRYPOINT ["python3", "mysql-to-es.py"] \ No newline at end of file diff --git a/mysql-to-es.py b/mysql-to-es.py new file mode 100644 index 0000000..eb7ca94 --- /dev/null +++ b/mysql-to-es.py @@ -0,0 +1,54 @@ +import mysql.connector +from elasticsearch import Elasticsearch, helpers + +db_config = { + 'user': 'skku-user', + 'password': 'skku-pw', + 'host': 'skku-db', + 'database': 'skku', + 'port': 3306 +} + +es = Elasticsearch(['http://es-singlenode:9200']) + + +def fetch_restaurant_data(): + connection = mysql.connector.connect(**db_config) + cursor = connection.cursor(dictionary=True) + + query = """ + SELECT id, name, rating_avg, review_count, like_count + FROM restaurants + """ + + cursor.execute(query) + data = cursor.fetchall() + + cursor.close() + connection.close() + + return data + + +def update_elasticsearch(data): + actions = [ + { + "_op_type": "update", + "_index": "restaurant", + "_id": restaurant['id'], + "doc": { + "rating_avg": restaurant['rating_avg'], + "review_count": restaurant['review_count'], + "like_count": restaurant['like_count'] + }, + "doc_as_upsert": True + } + for restaurant in data + ] + + helpers.bulk(es, actions) + + +restaurant_data = fetch_restaurant_data() +update_elasticsearch(restaurant_data) +print("Elasticsearch update complete") diff --git a/mysql_batch/insert.py b/mysql_batch/insert.py index 26c93b7..c16abab 100644 --- a/mysql_batch/insert.py +++ b/mysql_batch/insert.py @@ -18,11 +18,6 @@ def insert_into_restaurants(cursor, restaurant): ); """ - try: - rating = float(restaurant['rating']) - except ValueError: - rating = 0.0 - cursor.execute(insert_query, ( restaurant['id'], restaurant['name'], @@ -31,7 +26,7 @@ def insert_into_restaurants(cursor, restaurant): 0, restaurant['address'], restaurant['number'], - rating, + 0, restaurant['image_url'], 0, restaurant['discount_content']