From 7f01aec4db64d13963ff70794909c2acc21d54e6 Mon Sep 17 00:00:00 2001 From: Bob Sin Date: Fri, 10 May 2024 23:22:25 +0900 Subject: [PATCH] =?UTF-8?q?[KAN-46]=20ES=20=EC=97=90=20=EB=A9=94=EB=89=B4?= =?UTF-8?q?=20=EB=8D=B0=EC=9D=B4=ED=84=B0=20=EB=B0=8F=20=EA=B8=B0=ED=83=80?= =?UTF-8?q?=20=EB=8D=B0=EC=9D=B4=ED=84=B0=EB=93=A4=20=EC=B6=94=EA=B0=80=20?= =?UTF-8?q?(#7)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [KAN-46] ES 에 메뉴 데이터 및 기타 데이터들 추가 * [KAN-46] ES 에 메뉴 데이터 및 기타 데이터들 추가 --- csv-to-es.py | 45 ++++++++++++++++++++++++++++++++++++++++++--- es.Dockerfile | 1 + 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/csv-to-es.py b/csv-to-es.py index 251d361..c883d0c 100644 --- a/csv-to-es.py +++ b/csv-to-es.py @@ -3,8 +3,8 @@ import pandas as pd from elasticsearch import Elasticsearch -file_path = 'restaurants.csv' -df = pd.read_csv(file_path) +restaurant_df = pd.read_csv('restaurants.csv') +menu_df = pd.read_csv('menus.csv') now = datetime.datetime.now() index_name = f"restaurant_{now.strftime('%Y_%m_%d_%H-%M')}" @@ -18,14 +18,53 @@ "properties": { "name": {"type": "text"}, "category": {"type": "text"}, + "review_count": {"type": "text"}, + "address": {"type": "text"}, + "rating": {"type": "float"}, + "number": {"type": "text"}, + "image_url": {"type": "text"}, + "custom_category": {"type": "text"}, + "menus": { + "type": "nested", + "properties": { + "menu_name": {"type": "text"}, + "price": {"type": "text"}, + "description": {"type": "text"}, + "is_representative": {"type": "text"}, + "image_url": {"type": "text"} + } + } } }) # 데이터 인덱싱 -for _, row in df.iterrows(): +for _, row in restaurant_df.iterrows(): + menus = menu_df[menu_df['restaurant_id'] == row['id']].to_dict('records') + + for menu in menus: + if pd.isna(menu['image_url']): + menu.pop('image_url') # image_url 필드가 NaN이면 제거 + + if pd.isna(row['image_url']): + restaurant_image_url = None # NaN 값을 None으로 설정 + else: + restaurant_image_url = row['image_url'] + + if pd.notna(row['rating']): + rating = float(row['rating']) + else: + rating = None + response = es.index(index=index_name, document={ "name": row['name'], "category": row['category'], + "review_count": row['review_count'], + "address": row['address'], + "rating": rating, + "number": row['number'], + "image_url": restaurant_image_url, + "custom_category": row['custom_category'], + "menus": menus, }) print(f"Indexed document ID: {response['_id']}, Result: {response['result']}") diff --git a/es.Dockerfile b/es.Dockerfile index 8631070..1722da0 100644 --- a/es.Dockerfile +++ b/es.Dockerfile @@ -3,6 +3,7 @@ FROM python:3.8-slim COPY es-requirements.txt es-requirements.txt COPY restaurants.csv restaurants.csv COPY csv-to-es.py csv-to-es.py +COPY menus.csv menus.csv RUN pip install -r es-requirements.txt