From a906163abd342a85e8c3fdb110937495a763bc39 Mon Sep 17 00:00:00 2001 From: Re-st Date: Wed, 22 Nov 2023 00:17:22 +0900 Subject: [PATCH] =?UTF-8?q?[scrap]=20age=5Fgroup=EC=9D=98=20analysis=20?= =?UTF-8?q?=EB=82=B4=EC=9A=A9=EC=9D=84=20MongoDB=EB=A1=9C=20=EC=98=AC?= =?UTF-8?q?=EB=A6=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- analysis/age/hist_groups.py | 67 ++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/analysis/age/hist_groups.py b/analysis/age/hist_groups.py index b7eda73..575f0e6 100644 --- a/analysis/age/hist_groups.py +++ b/analysis/age/hist_groups.py @@ -6,7 +6,7 @@ from sklearn.cluster import KMeans from matplotlib import cm from analysis.age.draw import make_scatterplot, make_hist - +from db.client import client def plot_young_and_old(youngest_cluster, oldest_cluster): try: @@ -79,6 +79,13 @@ def cluster(df, year, n_clst, method, cluster_by, outdir, font_name, folder_name 결과가 mongodb등으로 옮겨가야 하므로, 사용하지 않도록 바꿔야 함. """ os.makedirs(os.path.join(outdir, method), exist_ok=True) + database_list = client.list_database_names() + print("Available databases:", database_list) + db = client["agehist"] + level = "1level" if cluster_by == "sdName" else "2level" + main_collection = db[year + "_" + level + "_" + method] + # 기존 histogram 정보는 삭제 (나이별로 넣는 것이기 때문에 찌꺼기값 존재가능) + main_collection.delete_many({}) youngest_age = ("", 100) oldest_age = ("", 0) print(f"({year}), {n_clst} clusters") @@ -129,7 +136,6 @@ def cluster(df, year, n_clst, method, cluster_by, outdir, font_name, folder_name "maxAge": age + 1, "count": count, "ageGroup": age_group, - "color": colors[age_group] } for age, count, age_group in zip( range(df_clst['age'].min(), df_clst['age'].max() + 1), @@ -137,21 +143,22 @@ def cluster(df, year, n_clst, method, cluster_by, outdir, font_name, folder_name df_clst.groupby('age')['cluster_label'].first() ) ] + main_collection.insert_one({"name": area, "data": data}) - # 그리기 - package = ( - outdir, - df_clst, - year, - area, - n_clst, - method, - cluster_by, - folder_name, - colors, - font_name, - ) - make_hist(package) + # # 그리기 + # package = ( + # outdir, + # df_clst, + # year, + # area, + # n_clst, + # method, + # cluster_by, + # folder_name, + # colors, + # font_name, + # ) + # make_hist(package) print(f"Number of data points per cluster for {area}") for cluster_label in range(n_clst): @@ -162,17 +169,17 @@ def cluster(df, year, n_clst, method, cluster_by, outdir, font_name, folder_name print(f"Youngest in {youngest_age[0]}: {youngest_age[1]}") print(f"Oldest in {oldest_age[0]}: {oldest_age[1]}") - # 그리기 - package = ( - outdir, - df.shape[0], - year, - df_age, - n_clst, - method, - cluster_by, - folder_name, - colors, - font_name, - ) - make_scatterplot(package) + # # 그리기 + # package = ( + # outdir, + # df.shape[0], + # year, + # df_age, + # n_clst, + # method, + # cluster_by, + # folder_name, + # colors, + # font_name, + # ) + # make_scatterplot(package)