Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
happycastle114 committed Nov 27, 2023
2 parents d1548ab + 1d948da commit e63e02e
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 13 deletions.
10 changes: 5 additions & 5 deletions analysis/age/hist_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def insert_data_to_mongo(
)


def cluster(df_original, n_clst, basedic):
def cluster(df_original, n_clst, basedic, clean_flag=True):
"""구역별 그룹을 만듭니다.
df_original: 데이터프레임
n_clst: 그룹 수
Expand All @@ -194,11 +194,11 @@ def cluster(df_original, n_clst, basedic):
histcoll = statdb["age_hist"]
statcoll = statdb["age_stat"] # method = "equal"에서 써 줄 통계.
# 기존 histogram 정보는 삭제 (나이별로 넣는 것이기 때문에 찌꺼기값 존재가능)
histcoll.delete_many(basedic.__dict__)
if basedic.method == "equal":
statcoll.delete_many(basedic.__dict__)
if clean_flag:
histcoll.delete_many(basedic.__dict__)
if basedic.method == "equal":
statcoll.delete_many(basedic.__dict__)
# 연도별로 데이터 찾아서 넣기!
df_original["year"] = df_original["sgId"] // 10000
df_original = df_original[df_original["year"].isin([2010, 2014, 2018, 2022])]
years = df_original["year"].unique()
for year in years:
Expand Down
73 changes: 65 additions & 8 deletions analysis/age/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from analysis.age.most_common_age_group import most_common_age_group
from analysis.age.hist_groups import cluster
from analysis.age import BasicArgument
from db.client import client

# 경고 무시
warnings.filterwarnings("ignore", category=FutureWarning)
Expand All @@ -23,9 +24,10 @@
"기초의원비례대표": "local_councilor",
}

personDB = client["council"]

def run(cluster_by, filenames, N=5, folder_name="To_be_filled"):
## TO-DO: excel말고 mongodb에서 받아오도록 합니다.

def run_by_excel(cluster_by, filenames, N=5, folder_name="To_be_filled"):
assert cluster_by in ["sdName", "wiwName"]
level = 1 if cluster_by == "sdName" else 2
datadir = os.path.join(BASE_DIR, "_data", folder_name)
Expand All @@ -38,6 +40,7 @@ def run(cluster_by, filenames, N=5, folder_name="To_be_filled"):
else:
df = df[["sgId", "sdName", "wiwName", "name", "age", "gender"]]
df = df.sort_values(by="age")
df["year"] = df["sgId"] // 10000
is_elected = (
True
if "당선" in d
Expand All @@ -56,13 +59,67 @@ def run(cluster_by, filenames, N=5, folder_name="To_be_filled"):
cluster(df, N, basedic)


# def main(N=5):
# run_by_excel("sdName", ["[당선][시도의원].xlsx", "[당선][광역의원비례대표].xlsx"])
# run_by_excel("sdName", ["[후보][시도의원].xlsx", "[후보][광역의원비례대표].xlsx"])
# run_by_excel("sdName", ["[당선][구시군의회의원].xlsx", "[당선][기초의원비례대표].xlsx"])
# run_by_excel("sdName", ["[후보][구시군의회의원].xlsx", "[후보][기초의원비례대표].xlsx"])
# run_by_excel("wiwName", ["[당선][구시군의회의원].xlsx", "[당선][기초의원비례대표].xlsx"])
# run_by_excel("wiwName", ["[후보][구시군의회의원].xlsx", "[후보][기초의원비례대표].xlsx"])


def run_by_mongo(cluster_by, is_elected, councilorType, N=5):
assert cluster_by in ["sdName", "wiwName"]
level = 1 if cluster_by == "sdName" else 2
data = []
if not is_elected:
councilorType = councilorType + "_candidate"
cursor = personDB[councilorType].find()
if level == 1:
for person in cursor:
data.append(
{
"year": person.get("year"),
"sdName": person.get("sdName"),
"name": person.get("name"),
"age": person.get("age"),
"gender": person.get("gender"),
}
)
else:
for person in cursor:
data.append(
{
"year": person.get("year"),
"sdName": person.get("sdName"),
"wiwName": person.get("wiwName"),
"name": person.get("name"),
"age": person.get("age"),
"gender": person.get("gender"),
}
)

df = pd.DataFrame(data)
df = df.sort_values(by="age")

for method in ["kmeans", "equal"]:
basedic = BasicArgument(
councilorType=councilorType,
is_elected=is_elected,
level=level,
method=method,
)
cluster(df, N, basedic, clean_flag=True)


def main(N=5):
run("sdName", ["[당선][시도의원].xlsx", "[당선][광역의원비례대표].xlsx"])
run("sdName", ["[후보][시도의원].xlsx", "[후보][광역의원비례대표].xlsx"])
run("sdName", ["[당선][구시군의회의원].xlsx", "[당선][기초의원비례대표].xlsx"])
run("sdName", ["[후보][구시군의회의원].xlsx", "[후보][기초의원비례대표].xlsx"])
run("wiwName", ["[당선][구시군의회의원].xlsx", "[당선][기초의원비례대표].xlsx"])
run("wiwName", ["[후보][구시군의회의원].xlsx", "[후보][기초의원비례대표].xlsx"])
# 세종시의 경우 어느 순간 승급하기 때문에 sdName을 먼저 해야, sdName이 cluster 시작 때 밀려도 괜챃다. (cluster 함수 참조)
run_by_mongo("sdName", is_elected=True, councilorType="metro_councilor")
run_by_mongo("sdName", is_elected=False, councilorType="metro_councilor")
run_by_mongo("sdName", is_elected=True, councilorType="local_councilor")
run_by_mongo("sdName", is_elected=False, councilorType="local_councilor")
run_by_mongo("wiwName", is_elected=True, councilorType="local_councilor")
run_by_mongo("wiwName", is_elected=False, councilorType="local_councilor")


main()

0 comments on commit e63e02e

Please sign in to comment.