Skip to content

Commit

Permalink
[analysis] 일부 수정
Browse files Browse the repository at this point in the history
  • Loading branch information
Re-st committed Nov 27, 2023
1 parent 2eba982 commit abb4816
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 26 deletions.
8 changes: 5 additions & 3 deletions analysis/age/hist_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,10 +197,12 @@ def cluster(df_original, n_clst, basedic):
if basedic.method == "equal":
statcoll.delete_many(basedic.__dict__)
# 연도별로 데이터 찾아서 넣기!
years = [int(sgId//10000) for sgId in df_original["sgId"].unique()]
df_original["year"] = df_original["sgId"] // 10000
df_original = df_original[df_original["year"].isin([2010, 2014, 2018, 2022])]
years = df_original["year"].unique()
for year in years:
basedic.year = year
df = df_original[df_original["sgId"] // 10000 == year]
basedic.year = int(year)
df = df_original[df_original["year"] == year]
youngest_age = ("", 100)
oldest_age = ("", 0)
print(f"year {year}, {n_clst} clusters")
Expand Down
53 changes: 30 additions & 23 deletions analysis/age/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,34 +28,41 @@ def main(N=5, folder_name="To_be_filled"):
## 이 링크에 구현될 save_to_mongo함수 참고 : https://github.com/NewWays-TechForImpactKAIST/API-scrap-and-analysis//blob/bd817e9a15086d313d9615b2515a81e0dbd73850/API/utils.py#L34
## 1. 지역의회
# cluster_by = input("구역을 나눌 기준을 입력해주세요 (sdName 즉 시/도 또는 wiwName 즉 기초단체단위): ")
cluster_by = "sdName"
cluster_by = "wiwName"
assert cluster_by in ["sdName", "wiwName"]
level = 1 if cluster_by == "sdName" else 2
datadir = os.path.join(BASE_DIR, "_data", folder_name)
for d in os.listdir(datadir):
# for d in os.listdir(datadir):
# xlsx 파일을 읽어옵니다.
if not d.endswith(".xlsx"):
continue
df = pd.read_excel(os.path.join(datadir, d))
# 필요한 열만 추출합니다.
if level == 1:
df = df[["sgId", "sdName", "name", "age", "gender"]]
else:
df = df[["sgId", "sdName", "wiwName", "name", "age", "gender"]]
df = df.sort_values(by="age")
is_elected = (
True
if "당선" in d
else False
if "후보" in d
else ValueError("엑셀파일 이름에 '당선'이든지 '후보'가 있어야 합니다.")
# if not d.endswith(".xlsx"):
# continue
# df = pd.read_excel(os.path.join(datadir, d))
# d = "[당선][시도의원].xlsx"
d = "[당선][구시군의회의원].xlsx"
df_1 = pd.read_excel(os.path.join(datadir, d))
# d = "[당선][광역의원비례대표].xlsx"
d = "[당선][기초의원비례대표].xlsx"
df_2 = pd.read_excel(os.path.join(datadir, d))
df = pd.concat([df_1, df_2])
# 필요한 열만 추출합니다.
if level == 1:
df = df[["sgId", "sdName", "name", "age", "gender"]]
else:
df = df[["sgId", "sdName", "wiwName", "name", "age", "gender"]]
df = df.sort_values(by="age")
is_elected = (
True
if "당선" in d
else False
if "후보" in d
else ValueError("엑셀파일 이름에 '당선'이든지 '후보'가 있어야 합니다.")
)
councilorType = councilordict[d.split('[')[-1].split(']')[0]]
for method in ["kmeans", "equal"]:
basedic = BasicArgument(councilorType=councilorType, is_elected=is_elected, level=level, method=method)
cluster(
df, N, basedic
)
councilorType = councilordict[d.split('[')[-1].split(']')[0]]
for method in ["kmeans", "equal"]:
basedic = BasicArgument(councilorType=councilorType, is_elected=is_elected, level=level, method=method)
cluster(
df, N, basedic
)
## 2. 광역의회


Expand Down

0 comments on commit abb4816

Please sign in to comment.