Skip to content

Commit

Permalink
add notebooks
Browse files Browse the repository at this point in the history
  • Loading branch information
odanado committed Apr 8, 2018
1 parent 8cdad92 commit 7308137
Show file tree
Hide file tree
Showing 4 changed files with 1,153 additions and 0 deletions.
215 changes: 215 additions & 0 deletions notebooks/pca.ipynb

Large diffs are not rendered by default.

345 changes: 345 additions & 0 deletions notebooks/plot_log.ipynb

Large diffs are not rendered by default.

345 changes: 345 additions & 0 deletions notebooks/poke2vec.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,345 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"lines = Path('../results/gen7vgc2018_ns_32_poke2vec.model').read_text().split('\\n')\n",
"index2poke = []\n",
"n_vocab, n_units = map(int, lines[0].split())\n",
"vec = np.zeros((n_vocab, n_units))\n",
"\n",
"for i, line in enumerate(lines[1:]):\n",
" line = line.split()\n",
" if len(line) == 0:\n",
" break\n",
" name, v = line[0], line[1:]\n",
" v = list(map(float, v))\n",
" index2poke.append(name)\n",
" vec[i] = v"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[-0.19213457, 0.39025375, -0.36565906, ..., -0.18254557,\n",
" 0.10498768, -0.35236248],\n",
" [-0.61358386, -0.14285038, 0.7308076 , ..., 1.9705235 ,\n",
" -0.8620409 , 1.1889096 ],\n",
" [-0.36718768, -1.8430903 , -0.50245476, ..., -1.2942839 ,\n",
" 0.5975897 , 1.1119113 ],\n",
" ...,\n",
" [-0.23748733, 0.18743554, -0.20721523, ..., 0.08759026,\n",
" -0.22725153, 0.0684476 ],\n",
" [ 0.02403239, -0.33811316, -0.06650057, ..., -0.12179834,\n",
" 0.23604423, 0.5437267 ],\n",
" [-0.06221102, 0.0660575 , -1.3806828 , ..., 0.20800947,\n",
" -0.54373425, 0.40150735]])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vec"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from gensim.models.keyedvectors import Vocab\n",
"vocab = {x: Vocab(index=i) for i, x in enumerate(index2poke)}"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"from gensim.models import KeyedVectors\n",
"poke2vec = KeyedVectors(vec.shape[1])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"poke2vec.vectors = vec\n",
"poke2vec.vocab = vocab\n",
"poke2vec.index2entity = index2poke"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Unnamed: 0</th>\n",
" <th>Japanese</th>\n",
" <th>English</th>\n",
" <th>French</th>\n",
" <th>Spanish</th>\n",
" <th>German</th>\n",
" <th>Italian</th>\n",
" <th>Korean</th>\n",
" <th>MandarinChinese</th>\n",
" <th>CantoneseChinese</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>フシギダネ</td>\n",
" <td>Bulbasaur</td>\n",
" <td>Bulbizarre</td>\n",
" <td>Bulbasaur</td>\n",
" <td>Bisasam</td>\n",
" <td>Bulbasaur</td>\n",
" <td>이상해씨</td>\n",
" <td>妙蛙种子</td>\n",
" <td>妙蛙種子</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>フシギソウ</td>\n",
" <td>Ivysaur</td>\n",
" <td>Herbizarre</td>\n",
" <td>Ivysaur</td>\n",
" <td>Bisaknosp</td>\n",
" <td>Ivysaur</td>\n",
" <td>이상해풀</td>\n",
" <td>妙蛙草</td>\n",
" <td>妙蛙草</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>フシギバナ</td>\n",
" <td>Venusaur</td>\n",
" <td>Florizarre</td>\n",
" <td>Venusaur</td>\n",
" <td>Bisaflor</td>\n",
" <td>Venusaur</td>\n",
" <td>이상해꽃</td>\n",
" <td>妙蛙花</td>\n",
" <td>妙蛙花</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>ヒトカゲ</td>\n",
" <td>Charmander</td>\n",
" <td>Salamèche</td>\n",
" <td>Charmander</td>\n",
" <td>Glumanda</td>\n",
" <td>Charmander</td>\n",
" <td>파이리</td>\n",
" <td>小火龙</td>\n",
" <td>小火龍</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>リザード</td>\n",
" <td>Charmeleon</td>\n",
" <td>Reptincel</td>\n",
" <td>Charmeleon</td>\n",
" <td>Glutexo</td>\n",
" <td>Charmeleon</td>\n",
" <td>리자드</td>\n",
" <td>火恐龙</td>\n",
" <td>火恐龍</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Unnamed: 0 Japanese English French Spanish German \\\n",
"0 0 フシギダネ Bulbasaur Bulbizarre Bulbasaur Bisasam \n",
"1 1 フシギソウ Ivysaur Herbizarre Ivysaur Bisaknosp \n",
"2 2 フシギバナ Venusaur Florizarre Venusaur Bisaflor \n",
"3 3 ヒトカゲ Charmander Salamèche Charmander Glumanda \n",
"4 4 リザード Charmeleon Reptincel Charmeleon Glutexo \n",
"\n",
" Italian Korean MandarinChinese CantoneseChinese \n",
"0 Bulbasaur 이상해씨 妙蛙种子 妙蛙種子 \n",
"1 Ivysaur 이상해풀 妙蛙草 妙蛙草 \n",
"2 Venusaur 이상해꽃 妙蛙花 妙蛙花 \n",
"3 Charmander 파이리 小火龙 小火龍 \n",
"4 Charmeleon 리자드 火恐龙 火恐龍 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"df = pd.read_csv('../pokemon.csv')\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"def to_id(name):\n",
" return re.sub(r'[^a-z0-9]+', '', name.lower())"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"_to_ja = {to_id(y): x for x, y in zip(df.Japanese,df.English)}\n",
"def to_ja(name):\n",
" if name in _to_ja:\n",
" return _to_ja[name]\n",
" if name.startswith('vivillon'):\n",
" return _to_ja['vivillon']\n",
" if name.startswith('meowstic'):\n",
" return 'ニャオニクス' + ' ♂' if name[-1] == 'm' else '♀'\n",
" return name"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"thundurustherian 0.8745791912078857\n",
"ライチュウ 0.7674826383590698\n",
"naganadel 0.7397462129592896\n",
"thundurus 0.7042409777641296\n",
"rotomheat 0.6822828054428101\n",
"フワライド 0.6556263566017151\n",
"カプ・コケコ 0.5665922164916992\n",
"トゲデマル 0.5359745025634766\n",
"ジャローダ 0.4981088638305664\n",
"デンジュモク 0.4966435134410858\n"
]
}
],
"source": [
"ret = poke2vec.most_similar(positive=['zapdos'])\n",
"for name, sim in ret:\n",
" print(to_ja(name), sim)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ライチュウ 0.7268299460411072\n",
"naganadel 0.6770119071006775\n",
"thundurus 0.672311007976532\n",
"rotomheat 0.6123127937316895\n",
"フワライド 0.5884656310081482\n",
"ジャローダ 0.580478310585022\n",
"rotomwash 0.5405862927436829\n",
"カプ・コケコ 0.5096319317817688\n",
"トゲデマル 0.5073633790016174\n",
"トゲキッス 0.48859071731567383\n"
]
}
],
"source": [
"ret = poke2vec.most_similar(positive=['thundurustherian', 'zapdos'], negative=['manectric'])\n",
"for name, sim in ret:\n",
" print(to_ja(name), sim)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 7308137

Please sign in to comment.