-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
1,153 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,345 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from pathlib import Path\n", | ||
"import numpy as np" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"lines = Path('../results/gen7vgc2018_ns_32_poke2vec.model').read_text().split('\\n')\n", | ||
"index2poke = []\n", | ||
"n_vocab, n_units = map(int, lines[0].split())\n", | ||
"vec = np.zeros((n_vocab, n_units))\n", | ||
"\n", | ||
"for i, line in enumerate(lines[1:]):\n", | ||
" line = line.split()\n", | ||
" if len(line) == 0:\n", | ||
" break\n", | ||
" name, v = line[0], line[1:]\n", | ||
" v = list(map(float, v))\n", | ||
" index2poke.append(name)\n", | ||
" vec[i] = v" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"array([[-0.19213457, 0.39025375, -0.36565906, ..., -0.18254557,\n", | ||
" 0.10498768, -0.35236248],\n", | ||
" [-0.61358386, -0.14285038, 0.7308076 , ..., 1.9705235 ,\n", | ||
" -0.8620409 , 1.1889096 ],\n", | ||
" [-0.36718768, -1.8430903 , -0.50245476, ..., -1.2942839 ,\n", | ||
" 0.5975897 , 1.1119113 ],\n", | ||
" ...,\n", | ||
" [-0.23748733, 0.18743554, -0.20721523, ..., 0.08759026,\n", | ||
" -0.22725153, 0.0684476 ],\n", | ||
" [ 0.02403239, -0.33811316, -0.06650057, ..., -0.12179834,\n", | ||
" 0.23604423, 0.5437267 ],\n", | ||
" [-0.06221102, 0.0660575 , -1.3806828 , ..., 0.20800947,\n", | ||
" -0.54373425, 0.40150735]])" | ||
] | ||
}, | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"vec" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from gensim.models.keyedvectors import Vocab\n", | ||
"vocab = {x: Vocab(index=i) for i, x in enumerate(index2poke)}" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from gensim.models import KeyedVectors\n", | ||
"poke2vec = KeyedVectors(vec.shape[1])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"poke2vec.vectors = vec\n", | ||
"poke2vec.vocab = vocab\n", | ||
"poke2vec.index2entity = index2poke" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"<div>\n", | ||
"<style scoped>\n", | ||
" .dataframe tbody tr th:only-of-type {\n", | ||
" vertical-align: middle;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe tbody tr th {\n", | ||
" vertical-align: top;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe thead th {\n", | ||
" text-align: right;\n", | ||
" }\n", | ||
"</style>\n", | ||
"<table border=\"1\" class=\"dataframe\">\n", | ||
" <thead>\n", | ||
" <tr style=\"text-align: right;\">\n", | ||
" <th></th>\n", | ||
" <th>Unnamed: 0</th>\n", | ||
" <th>Japanese</th>\n", | ||
" <th>English</th>\n", | ||
" <th>French</th>\n", | ||
" <th>Spanish</th>\n", | ||
" <th>German</th>\n", | ||
" <th>Italian</th>\n", | ||
" <th>Korean</th>\n", | ||
" <th>MandarinChinese</th>\n", | ||
" <th>CantoneseChinese</th>\n", | ||
" </tr>\n", | ||
" </thead>\n", | ||
" <tbody>\n", | ||
" <tr>\n", | ||
" <th>0</th>\n", | ||
" <td>0</td>\n", | ||
" <td>フシギダネ</td>\n", | ||
" <td>Bulbasaur</td>\n", | ||
" <td>Bulbizarre</td>\n", | ||
" <td>Bulbasaur</td>\n", | ||
" <td>Bisasam</td>\n", | ||
" <td>Bulbasaur</td>\n", | ||
" <td>이상해씨</td>\n", | ||
" <td>妙蛙种子</td>\n", | ||
" <td>妙蛙種子</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>1</th>\n", | ||
" <td>1</td>\n", | ||
" <td>フシギソウ</td>\n", | ||
" <td>Ivysaur</td>\n", | ||
" <td>Herbizarre</td>\n", | ||
" <td>Ivysaur</td>\n", | ||
" <td>Bisaknosp</td>\n", | ||
" <td>Ivysaur</td>\n", | ||
" <td>이상해풀</td>\n", | ||
" <td>妙蛙草</td>\n", | ||
" <td>妙蛙草</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>2</th>\n", | ||
" <td>2</td>\n", | ||
" <td>フシギバナ</td>\n", | ||
" <td>Venusaur</td>\n", | ||
" <td>Florizarre</td>\n", | ||
" <td>Venusaur</td>\n", | ||
" <td>Bisaflor</td>\n", | ||
" <td>Venusaur</td>\n", | ||
" <td>이상해꽃</td>\n", | ||
" <td>妙蛙花</td>\n", | ||
" <td>妙蛙花</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>3</th>\n", | ||
" <td>3</td>\n", | ||
" <td>ヒトカゲ</td>\n", | ||
" <td>Charmander</td>\n", | ||
" <td>Salamèche</td>\n", | ||
" <td>Charmander</td>\n", | ||
" <td>Glumanda</td>\n", | ||
" <td>Charmander</td>\n", | ||
" <td>파이리</td>\n", | ||
" <td>小火龙</td>\n", | ||
" <td>小火龍</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>4</th>\n", | ||
" <td>4</td>\n", | ||
" <td>リザード</td>\n", | ||
" <td>Charmeleon</td>\n", | ||
" <td>Reptincel</td>\n", | ||
" <td>Charmeleon</td>\n", | ||
" <td>Glutexo</td>\n", | ||
" <td>Charmeleon</td>\n", | ||
" <td>리자드</td>\n", | ||
" <td>火恐龙</td>\n", | ||
" <td>火恐龍</td>\n", | ||
" </tr>\n", | ||
" </tbody>\n", | ||
"</table>\n", | ||
"</div>" | ||
], | ||
"text/plain": [ | ||
" Unnamed: 0 Japanese English French Spanish German \\\n", | ||
"0 0 フシギダネ Bulbasaur Bulbizarre Bulbasaur Bisasam \n", | ||
"1 1 フシギソウ Ivysaur Herbizarre Ivysaur Bisaknosp \n", | ||
"2 2 フシギバナ Venusaur Florizarre Venusaur Bisaflor \n", | ||
"3 3 ヒトカゲ Charmander Salamèche Charmander Glumanda \n", | ||
"4 4 リザード Charmeleon Reptincel Charmeleon Glutexo \n", | ||
"\n", | ||
" Italian Korean MandarinChinese CantoneseChinese \n", | ||
"0 Bulbasaur 이상해씨 妙蛙种子 妙蛙種子 \n", | ||
"1 Ivysaur 이상해풀 妙蛙草 妙蛙草 \n", | ||
"2 Venusaur 이상해꽃 妙蛙花 妙蛙花 \n", | ||
"3 Charmander 파이리 小火龙 小火龍 \n", | ||
"4 Charmeleon 리자드 火恐龙 火恐龍 " | ||
] | ||
}, | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"df = pd.read_csv('../pokemon.csv')\n", | ||
"df.head()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import re\n", | ||
"def to_id(name):\n", | ||
" return re.sub(r'[^a-z0-9]+', '', name.lower())" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 9, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"_to_ja = {to_id(y): x for x, y in zip(df.Japanese,df.English)}\n", | ||
"def to_ja(name):\n", | ||
" if name in _to_ja:\n", | ||
" return _to_ja[name]\n", | ||
" if name.startswith('vivillon'):\n", | ||
" return _to_ja['vivillon']\n", | ||
" if name.startswith('meowstic'):\n", | ||
" return 'ニャオニクス' + ' ♂' if name[-1] == 'm' else '♀'\n", | ||
" return name" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 10, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"thundurustherian 0.8745791912078857\n", | ||
"ライチュウ 0.7674826383590698\n", | ||
"naganadel 0.7397462129592896\n", | ||
"thundurus 0.7042409777641296\n", | ||
"rotomheat 0.6822828054428101\n", | ||
"フワライド 0.6556263566017151\n", | ||
"カプ・コケコ 0.5665922164916992\n", | ||
"トゲデマル 0.5359745025634766\n", | ||
"ジャローダ 0.4981088638305664\n", | ||
"デンジュモク 0.4966435134410858\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"ret = poke2vec.most_similar(positive=['zapdos'])\n", | ||
"for name, sim in ret:\n", | ||
" print(to_ja(name), sim)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 11, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"ライチュウ 0.7268299460411072\n", | ||
"naganadel 0.6770119071006775\n", | ||
"thundurus 0.672311007976532\n", | ||
"rotomheat 0.6123127937316895\n", | ||
"フワライド 0.5884656310081482\n", | ||
"ジャローダ 0.580478310585022\n", | ||
"rotomwash 0.5405862927436829\n", | ||
"カプ・コケコ 0.5096319317817688\n", | ||
"トゲデマル 0.5073633790016174\n", | ||
"トゲキッス 0.48859071731567383\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"ret = poke2vec.most_similar(positive=['thundurustherian', 'zapdos'], negative=['manectric'])\n", | ||
"for name, sim in ret:\n", | ||
" print(to_ja(name), sim)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.0" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.