Skip to content

Commit

Permalink
[scrap] age_group의 analysis 내용을 MongoDB로 올림
Browse files Browse the repository at this point in the history
  • Loading branch information
Re-st committed Nov 21, 2023
1 parent 7fd022d commit a906163
Showing 1 changed file with 37 additions and 30 deletions.
67 changes: 37 additions & 30 deletions analysis/age/hist_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from sklearn.cluster import KMeans
from matplotlib import cm
from analysis.age.draw import make_scatterplot, make_hist

from db.client import client

def plot_young_and_old(youngest_cluster, oldest_cluster):
try:
Expand Down Expand Up @@ -79,6 +79,13 @@ def cluster(df, year, n_clst, method, cluster_by, outdir, font_name, folder_name
결과가 mongodb등으로 옮겨가야 하므로, 사용하지 않도록 바꿔야 함.
"""
os.makedirs(os.path.join(outdir, method), exist_ok=True)
database_list = client.list_database_names()
print("Available databases:", database_list)
db = client["agehist"]
level = "1level" if cluster_by == "sdName" else "2level"
main_collection = db[year + "_" + level + "_" + method]
# 기존 histogram 정보는 삭제 (나이별로 넣는 것이기 때문에 찌꺼기값 존재가능)
main_collection.delete_many({})
youngest_age = ("", 100)
oldest_age = ("", 0)
print(f"({year}), {n_clst} clusters")
Expand Down Expand Up @@ -129,29 +136,29 @@ def cluster(df, year, n_clst, method, cluster_by, outdir, font_name, folder_name
"maxAge": age + 1,
"count": count,
"ageGroup": age_group,
"color": colors[age_group]
}
for age, count, age_group in zip(
range(df_clst['age'].min(), df_clst['age'].max() + 1),
df_clst.groupby('age').size(),
df_clst.groupby('age')['cluster_label'].first()
)
]
main_collection.insert_one({"name": area, "data": data})

# 그리기
package = (
outdir,
df_clst,
year,
area,
n_clst,
method,
cluster_by,
folder_name,
colors,
font_name,
)
make_hist(package)
# # 그리기
# package = (
# outdir,
# df_clst,
# year,
# area,
# n_clst,
# method,
# cluster_by,
# folder_name,
# colors,
# font_name,
# )
# make_hist(package)

print(f"Number of data points per cluster for {area}")
for cluster_label in range(n_clst):
Expand All @@ -162,17 +169,17 @@ def cluster(df, year, n_clst, method, cluster_by, outdir, font_name, folder_name
print(f"Youngest in {youngest_age[0]}: {youngest_age[1]}")
print(f"Oldest in {oldest_age[0]}: {oldest_age[1]}")

# 그리기
package = (
outdir,
df.shape[0],
year,
df_age,
n_clst,
method,
cluster_by,
folder_name,
colors,
font_name,
)
make_scatterplot(package)
# # 그리기
# package = (
# outdir,
# df.shape[0],
# year,
# df_age,
# n_clst,
# method,
# cluster_by,
# folder_name,
# colors,
# font_name,
# )
# make_scatterplot(package)

0 comments on commit a906163

Please sign in to comment.