-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
377 additions
and
0 deletions.
There are no files selected for viewing
1 change: 1 addition & 0 deletions
1
computer-science/machine-learning/_src/deep-learning-from-scratch/.gitignore
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
deep-learning-from-scratch |
5 changes: 5 additions & 0 deletions
5
computer-science/machine-learning/_src/deep-learning-from-scratch/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# [O'Reilly Japan - ゼロから作るDeep Learning](https://www.oreilly.co.jp/books/9784873117584/) | ||
|
||
```powershell | ||
./latest.bat | ||
``` |
11 changes: 11 additions & 0 deletions
11
computer-science/machine-learning/_src/deep-learning-from-scratch/latest.bat
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
@echo off | ||
SET repo_path=deep-learning-from-scratch | ||
|
||
IF NOT EXIST "%repo_path%" ( | ||
echo Repository not found. Cloning... | ||
git clone https://github.com/oreilly-japan/deep-learning-from-scratch.git | ||
) ELSE ( | ||
echo Repository found. Updating... | ||
cd %repo_path% | ||
git pull | ||
) |
360 changes: 360 additions & 0 deletions
360
computer-science/machine-learning/_src/deep-learning-from-scratch/notebooks/ch07.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,360 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# 7章 畳み込みニューラルネットワーク" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 27, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import sys, os\n", | ||
"sys.path.append(os.pardir)\n", | ||
"sys.path.append(f\"{os.pardir}/deep-learning-from-scratch\")\n", | ||
"\n", | ||
"import numpy as np\n", | ||
"from beartype import beartype\n", | ||
"from collections import OrderedDict\n", | ||
"from nptyping import NDArray, Shape, Float, Int\n", | ||
"from common.util import im2col,col2im" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 18, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"(10, 1, 28, 28)" | ||
] | ||
}, | ||
"execution_count": 18, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"x = np.random.rand(10,1,28,28)\n", | ||
"x.shape\n", | ||
"# (10, 1, 28, 28) ミニバッチ, チャンネル, 高さ, 幅" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 19, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"(9, 75)\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"x1 = np.random.rand(1,3,7,7)\n", | ||
"col1 = im2col(x1, 5, 5, stride=1, pad=0)\n", | ||
"print(col1.shape) # (9, 75) フィルターの適用領域の数(=1*(((7-5)/1)+1)**2), 入力特徴マップの要素数" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 20, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"[[0.77557663 0.33806582 0.37237046 0.83112068 0.49650122 0.03914491\n", | ||
" 0.20773407]\n", | ||
" [0.51288944 0.50293884 0.85671902 0.72090202 0.27010591 0.5556635\n", | ||
" 0.59713498]\n", | ||
" [0.02457308 0.90281899 0.87469388 0.02009157 0.13361658 0.19458219\n", | ||
" 0.02499915]\n", | ||
" [0.0209891 0.9720402 0.55244596 0.95761181 0.27203654 0.97884945\n", | ||
" 0.37895102]\n", | ||
" [0.19315752 0.15809505 0.68105605 0.29481389 0.35691675 0.23714536\n", | ||
" 0.03193322]\n", | ||
" [0.61227175 0.0194606 0.16553847 0.74204764 0.42913091 0.26977271\n", | ||
" 0.19511624]\n", | ||
" [0.60038899 0.99727459 0.99125549 0.69182925 0.95594471 0.56926757\n", | ||
" 0.72815871]]\n", | ||
"[0.77557663 0.33806582 0.37237046 0.83112068 0.49650122 0.51288944\n", | ||
" 0.50293884 0.85671902 0.72090202 0.27010591 0.02457308 0.90281899\n", | ||
" 0.87469388 0.02009157 0.13361658 0.0209891 0.9720402 0.55244596\n", | ||
" 0.95761181 0.27203654 0.19315752 0.15809505 0.68105605 0.29481389\n", | ||
" 0.35691675 0.56177801 0.51579877 0.55239822 0.66654575 0.49598721\n", | ||
" 0.07402139 0.5829163 0.38529097 0.04022566 0.6660402 0.04880685\n", | ||
" 0.29328377 0.72894727 0.40273677 0.58444065 0.23715671 0.76501373\n", | ||
" 0.2617112 0.43930511 0.8667773 0.87877624 0.21587922 0.25574579\n", | ||
" 0.14144719 0.10852229 0.61531416 0.63418458 0.52175757 0.04859568\n", | ||
" 0.00995961 0.62378664 0.53930438 0.68877897 0.70155326 0.37456113\n", | ||
" 0.91382188 0.08136818 0.64643685 0.82147964 0.72842887 0.45922596\n", | ||
" 0.46440204 0.88385712 0.36370997 0.23976922 0.87628869 0.13027836\n", | ||
" 0.43845715 0.42018312 0.57520152]\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"print(x1[0][0])\n", | ||
"print(col1[0])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 21, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"(90, 75)\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"x2 = np.random.rand(10,3,7,7)\n", | ||
"col2 = im2col(x2, 5, 5, stride=1, pad=0)\n", | ||
"print(col2.shape) # (90, 75) フィルターの適用領域の数(=10*(((7-5)/1)+1)**2), 入力特徴マップの要素数" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 22, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def output_size(x_len, pad, filter_len, stride):\n", | ||
" rest = ((x_len + 2 * pad) - filter_len)\n", | ||
" if rest < 0:\n", | ||
" raise Exception(f\"Filter length {filter_len} is longer than input size {x_len} + {pad}!\")\n", | ||
" elif rest % stride != 0:\n", | ||
" raise Exception(f\"Rest length {rest} and stride {stride} are conflicted!\")\n", | ||
" else:\n", | ||
" return rest / stride + 1\n", | ||
"\n", | ||
"class Convolution:\n", | ||
" # 今回はフィルターのチャンネル数を3で固定している\n", | ||
" @beartype\n", | ||
" def __init__(self, W: NDArray[Shape['FN,3,FH,FW'],Float], b: NDArray[Shape['FN'], Float], stride=1, pad=0):\n", | ||
" # FN: Filter Number\n", | ||
" self.W = W\n", | ||
" self.b = b\n", | ||
" self.stride = stride\n", | ||
" self.pad = pad\n", | ||
"\n", | ||
" # 中間データ(backward時に使用)\n", | ||
" self.x = None \n", | ||
" self.col = None\n", | ||
" self.col_W = None\n", | ||
" \n", | ||
" # 重み・バイアスパラメータの勾配\n", | ||
" self.dW = None\n", | ||
" self.db = None\n", | ||
"\n", | ||
" @beartype\n", | ||
" def forward(self, x: NDArray[Shape['N,3,H,W'], Float]):\n", | ||
" FN, C, FH, FW = self.W.shape\n", | ||
" N, C, H, W = x.shape\n", | ||
" out_h = output_size(H, self.pad, FH, self.stride)\n", | ||
" out_w = output_size(W, self.pad, FW, self.stride)\n", | ||
"\n", | ||
" col: NDArray[Shape['N*out_h*out_w,3*FH*FW'], Float] = im2col(x, FH, FW, self.stride, self.pad)\n", | ||
" col_W: NDArray[Shape['3*FH*FW,FN'], Float] = self.W.reshape(FN, -1).T\n", | ||
" out: NDArray[Shape['N*out_h*out_w,FN', Float]] = np.dot(col, col_W) + self.b\n", | ||
" reshaped_out: NDArray[Shape['N,FN,out_h,out_w'], float] = out.reshape(N, out_h, out_w, FN).transpose(0,3,1,2)\n", | ||
"\n", | ||
" self.x = x\n", | ||
" self.col = col\n", | ||
" self.col_W = col_W\n", | ||
"\n", | ||
" return reshaped_out\n", | ||
"\n", | ||
" @beartype\n", | ||
" def backward(self, dout: NDArray[Shape['N,FN,out_h,out_w'], Float]):\n", | ||
" FN, C, FH, FW = self.W.shape\n", | ||
" dout_matrix: NDArray[Shape['N*out_h*out_w,FN'], Float] = dout.transpose(0,2,3,1).reshape(-1, FN)\n", | ||
"\n", | ||
" self.db: NDArray[Shape['1,N'], Float] = np.sum(dout_matrix, axis=0)\n", | ||
" dW_matrix: NDArray[Shape['C*FH*FW,FN'], Float] = np.dot(self.col.T, dout_matrix)\n", | ||
" self.dW = self.dW_matrix.transpose(1, 0).reshape(FN, C, FH, FW)\n", | ||
"\n", | ||
" dcol: NDArray[Shape['N*out_h*out_w,3*FH*FN'], Float] = np.dot(dout_matrix, self.col_W.T)\n", | ||
" dx: NDArray[Shape['N,3,H,W']] = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)\n", | ||
"\n", | ||
" return dx\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 23, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"class Pooling:\n", | ||
" def __init__(self, pool_h, pool_w, stride=2, pad=0):\n", | ||
" # pool_h, pool_wはそれぞれプーリング適用領域の高さ・幅。例えば3x3=9からmaxを取るなら、pool_h=3, pool_w=3\n", | ||
" self.pool_h = pool_h\n", | ||
" self.pool_w = pool_w\n", | ||
" self.stride = stride\n", | ||
" self.pad = pad\n", | ||
"\n", | ||
" @beartype\n", | ||
" def forward(self, x: NDArray[Shape['N,C,H,W'], Float]):\n", | ||
" # 出力特徴マップの奥行きを、対象が色ではないのにチャンネルと呼ぶのは個人的にまだ違和感があるが、そのうち慣れる。\n", | ||
" N, C, H, W = x.shape\n", | ||
" out_h = int(1 + (H - self.pool_h) / self.stride)\n", | ||
" out_w = int(1 + (W - self.pool_w) / self.stride)\n", | ||
"\n", | ||
" col: NDArray[Shape['N,C,H*W'], Float] = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)\n", | ||
" reshaped_col: NDArray[Shape['N*C,pool_h*pool_w'], Float] = out.reshaped(N*C, self.pool_h*self.pool_w)\n", | ||
" out: NDArray[Shape['N*C,1'], Float] = np.max(col, axis=1)\n", | ||
" reshaped_out: NDArray[Shape['N,C,out_h,out_w'], Float] = out.reshape(N, out_h, out_w, C).transpose(0,3,1,2)\n", | ||
"\n", | ||
" return reshaped_out\n", | ||
"\n", | ||
" @beartype\n", | ||
" def backward(self, dout: NDArray[Shape['N,C,out_h,out_w'], Float]):\n", | ||
" # TODO\n", | ||
" return dx\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 24, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Rectifyは電流の交流を整流にすることから名付けられた。電流の交流は正負の電流が交互に流れるが、整流にすると正の電流のみが流れる。\n", | ||
"class Relu:\n", | ||
" def __init__(self):\n", | ||
" self.mask = None\n", | ||
"\n", | ||
" @beartype\n", | ||
" def forward(self, x: NDArray[Shape['N'], Float]):\n", | ||
" self.mask = (x <= 0)\n", | ||
" out = x.copy()\n", | ||
" out[self.mask] = 0\n", | ||
"\n", | ||
" return out\n", | ||
"\n", | ||
" @beartype\n", | ||
" def backward(self, dout: NDArray[Shape['N'], Float]):\n", | ||
" dout[self.mask] = 0\n", | ||
" dx = dout\n", | ||
"\n", | ||
" return dx\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 25, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"class Affine:\n", | ||
" def __init__(self, W: NDArray[Shape['S,WS'], Float], b: NDArray[Shape['D'], Float]):\n", | ||
" self.W = W\n", | ||
" self.b = b\n", | ||
" self.x = None\n", | ||
" self.dW = None\n", | ||
" self.db = None\n", | ||
"\n", | ||
" @beartype\n", | ||
" def forward(self, x: NDArray[Shape['N,S'], Float]):\n", | ||
" self.x = x\n", | ||
" out = np.dot(x, self.W) + self.b\n", | ||
"\n", | ||
" return out\n", | ||
"\n", | ||
" @beartype\n", | ||
" def backward(self, dout: NDArray[Shape['N,WS'], Float]):\n", | ||
" dx: NDArray[Shape['N,S'], Float] = np.dot(dout, self.W.T)\n", | ||
" self.dW: NDArray[Shape['S,WS'], Float] = np.dot(self.x.T, dout)\n", | ||
" self.db: NDArray[Shape['1'], Float] = np.sum(dout, axis=0)\n", | ||
"\n", | ||
" return dx" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 26, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# 畳み込み層 → 全結合層(ReLU) → 全結合層(Softmax)を想定\n", | ||
"\n", | ||
"class SimpleConvNet:\n", | ||
" def __init__(self, input_dim=(1,28,28),\n", | ||
" # filter_size:5は、5x5を表す。正方形がメジャー。\n", | ||
" conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},\n", | ||
" hidden_size=100, output_size=10, weight_init_std=0.01):\n", | ||
" filter_num = conv_param['filter_num']\n", | ||
" filter_size = conv_param['filter_size']\n", | ||
" filter_pad = conv_param['pad']\n", | ||
" filter_stride = conv_param['stride']\n", | ||
" input_size = input_dim[1]\n", | ||
" conv_output_size = int(1 + (input_size + 2*filter_pad - filter_size) / filter_stride)\n", | ||
" pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2))\n", | ||
"\n", | ||
" self.params = {}\n", | ||
" self.params['W1']: NDArray[Shape['FN,C,FS,FS'],Float] = weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size)\n", | ||
" self.params['b1'] = np.zeros[filter_num]\n", | ||
" self.params['W2']: NDArray[Shape['PS,HS']] = weight_init_std * np.random.randn(pool_output_size, hidden_size)\n", | ||
" self.params['b2'] = np.zeros[hidden_size]\n", | ||
" self.params['W3']: NDArray[Shape['HS,OS']] = weight_init_std * np.random.randn(hidden_size, output_size)\n", | ||
" self.params['b3'] = np.zeros[output_size]\n", | ||
"\n", | ||
" self.layers = OrderedDict()\n", | ||
" self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], conv_param['stride'], conv_param['pad'])\n", | ||
" self.layers['Relu1'] = Relu()\n", | ||
" self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)\n", | ||
" self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])\n", | ||
" self.layers['Relu2'] = Relu()\n", | ||
" self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])\n", | ||
"\n", | ||
" self.last_layer = SoftmaxWithLoss()\n", | ||
"\n", | ||
" def predict(self, x: NDArray):\n", | ||
" for layer in self.layers.values():\n", | ||
" x = layer.forward(x)\n", | ||
" return x\n", | ||
"\n", | ||
" def loss(self, x, t):\n", | ||
" y = self.predict(x)\n", | ||
" return self.last_layer.forward(y,t)\n" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "til-machine-learning", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.11.7" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |