Skip to content

Commit

Permalink
chore: (wip) CNN
Browse files Browse the repository at this point in the history
  • Loading branch information
xhiroga committed Jan 12, 2024
1 parent 7eabfa6 commit 39eb576
Show file tree
Hide file tree
Showing 4 changed files with 377 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
deep-learning-from-scratch
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# [O'Reilly Japan - ゼロから作るDeep Learning](https://www.oreilly.co.jp/books/9784873117584/)

```powershell
./latest.bat
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
@echo off
SET repo_path=deep-learning-from-scratch

IF NOT EXIST "%repo_path%" (
echo Repository not found. Cloning...
git clone https://github.com/oreilly-japan/deep-learning-from-scratch.git
) ELSE (
echo Repository found. Updating...
cd %repo_path%
git pull
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,360 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 7章 畳み込みニューラルネットワーク"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"import sys, os\n",
"sys.path.append(os.pardir)\n",
"sys.path.append(f\"{os.pardir}/deep-learning-from-scratch\")\n",
"\n",
"import numpy as np\n",
"from beartype import beartype\n",
"from collections import OrderedDict\n",
"from nptyping import NDArray, Shape, Float, Int\n",
"from common.util import im2col,col2im"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(10, 1, 28, 28)"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = np.random.rand(10,1,28,28)\n",
"x.shape\n",
"# (10, 1, 28, 28) ミニバッチ, チャンネル, 高さ, 幅"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(9, 75)\n"
]
}
],
"source": [
"x1 = np.random.rand(1,3,7,7)\n",
"col1 = im2col(x1, 5, 5, stride=1, pad=0)\n",
"print(col1.shape) # (9, 75) フィルターの適用領域の数(=1*(((7-5)/1)+1)**2), 入力特徴マップの要素数"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[0.77557663 0.33806582 0.37237046 0.83112068 0.49650122 0.03914491\n",
" 0.20773407]\n",
" [0.51288944 0.50293884 0.85671902 0.72090202 0.27010591 0.5556635\n",
" 0.59713498]\n",
" [0.02457308 0.90281899 0.87469388 0.02009157 0.13361658 0.19458219\n",
" 0.02499915]\n",
" [0.0209891 0.9720402 0.55244596 0.95761181 0.27203654 0.97884945\n",
" 0.37895102]\n",
" [0.19315752 0.15809505 0.68105605 0.29481389 0.35691675 0.23714536\n",
" 0.03193322]\n",
" [0.61227175 0.0194606 0.16553847 0.74204764 0.42913091 0.26977271\n",
" 0.19511624]\n",
" [0.60038899 0.99727459 0.99125549 0.69182925 0.95594471 0.56926757\n",
" 0.72815871]]\n",
"[0.77557663 0.33806582 0.37237046 0.83112068 0.49650122 0.51288944\n",
" 0.50293884 0.85671902 0.72090202 0.27010591 0.02457308 0.90281899\n",
" 0.87469388 0.02009157 0.13361658 0.0209891 0.9720402 0.55244596\n",
" 0.95761181 0.27203654 0.19315752 0.15809505 0.68105605 0.29481389\n",
" 0.35691675 0.56177801 0.51579877 0.55239822 0.66654575 0.49598721\n",
" 0.07402139 0.5829163 0.38529097 0.04022566 0.6660402 0.04880685\n",
" 0.29328377 0.72894727 0.40273677 0.58444065 0.23715671 0.76501373\n",
" 0.2617112 0.43930511 0.8667773 0.87877624 0.21587922 0.25574579\n",
" 0.14144719 0.10852229 0.61531416 0.63418458 0.52175757 0.04859568\n",
" 0.00995961 0.62378664 0.53930438 0.68877897 0.70155326 0.37456113\n",
" 0.91382188 0.08136818 0.64643685 0.82147964 0.72842887 0.45922596\n",
" 0.46440204 0.88385712 0.36370997 0.23976922 0.87628869 0.13027836\n",
" 0.43845715 0.42018312 0.57520152]\n"
]
}
],
"source": [
"print(x1[0][0])\n",
"print(col1[0])"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(90, 75)\n"
]
}
],
"source": [
"x2 = np.random.rand(10,3,7,7)\n",
"col2 = im2col(x2, 5, 5, stride=1, pad=0)\n",
"print(col2.shape) # (90, 75) フィルターの適用領域の数(=10*(((7-5)/1)+1)**2), 入力特徴マップの要素数"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"def output_size(x_len, pad, filter_len, stride):\n",
" rest = ((x_len + 2 * pad) - filter_len)\n",
" if rest < 0:\n",
" raise Exception(f\"Filter length {filter_len} is longer than input size {x_len} + {pad}!\")\n",
" elif rest % stride != 0:\n",
" raise Exception(f\"Rest length {rest} and stride {stride} are conflicted!\")\n",
" else:\n",
" return rest / stride + 1\n",
"\n",
"class Convolution:\n",
" # 今回はフィルターのチャンネル数を3で固定している\n",
" @beartype\n",
" def __init__(self, W: NDArray[Shape['FN,3,FH,FW'],Float], b: NDArray[Shape['FN'], Float], stride=1, pad=0):\n",
" # FN: Filter Number\n",
" self.W = W\n",
" self.b = b\n",
" self.stride = stride\n",
" self.pad = pad\n",
"\n",
" # 中間データ(backward時に使用)\n",
" self.x = None \n",
" self.col = None\n",
" self.col_W = None\n",
" \n",
" # 重み・バイアスパラメータの勾配\n",
" self.dW = None\n",
" self.db = None\n",
"\n",
" @beartype\n",
" def forward(self, x: NDArray[Shape['N,3,H,W'], Float]):\n",
" FN, C, FH, FW = self.W.shape\n",
" N, C, H, W = x.shape\n",
" out_h = output_size(H, self.pad, FH, self.stride)\n",
" out_w = output_size(W, self.pad, FW, self.stride)\n",
"\n",
" col: NDArray[Shape['N*out_h*out_w,3*FH*FW'], Float] = im2col(x, FH, FW, self.stride, self.pad)\n",
" col_W: NDArray[Shape['3*FH*FW,FN'], Float] = self.W.reshape(FN, -1).T\n",
" out: NDArray[Shape['N*out_h*out_w,FN', Float]] = np.dot(col, col_W) + self.b\n",
" reshaped_out: NDArray[Shape['N,FN,out_h,out_w'], float] = out.reshape(N, out_h, out_w, FN).transpose(0,3,1,2)\n",
"\n",
" self.x = x\n",
" self.col = col\n",
" self.col_W = col_W\n",
"\n",
" return reshaped_out\n",
"\n",
" @beartype\n",
" def backward(self, dout: NDArray[Shape['N,FN,out_h,out_w'], Float]):\n",
" FN, C, FH, FW = self.W.shape\n",
" dout_matrix: NDArray[Shape['N*out_h*out_w,FN'], Float] = dout.transpose(0,2,3,1).reshape(-1, FN)\n",
"\n",
" self.db: NDArray[Shape['1,N'], Float] = np.sum(dout_matrix, axis=0)\n",
" dW_matrix: NDArray[Shape['C*FH*FW,FN'], Float] = np.dot(self.col.T, dout_matrix)\n",
" self.dW = self.dW_matrix.transpose(1, 0).reshape(FN, C, FH, FW)\n",
"\n",
" dcol: NDArray[Shape['N*out_h*out_w,3*FH*FN'], Float] = np.dot(dout_matrix, self.col_W.T)\n",
" dx: NDArray[Shape['N,3,H,W']] = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)\n",
"\n",
" return dx\n"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"class Pooling:\n",
" def __init__(self, pool_h, pool_w, stride=2, pad=0):\n",
" # pool_h, pool_wはそれぞれプーリング適用領域の高さ・幅。例えば3x3=9からmaxを取るなら、pool_h=3, pool_w=3\n",
" self.pool_h = pool_h\n",
" self.pool_w = pool_w\n",
" self.stride = stride\n",
" self.pad = pad\n",
"\n",
" @beartype\n",
" def forward(self, x: NDArray[Shape['N,C,H,W'], Float]):\n",
" # 出力特徴マップの奥行きを、対象が色ではないのにチャンネルと呼ぶのは個人的にまだ違和感があるが、そのうち慣れる。\n",
" N, C, H, W = x.shape\n",
" out_h = int(1 + (H - self.pool_h) / self.stride)\n",
" out_w = int(1 + (W - self.pool_w) / self.stride)\n",
"\n",
" col: NDArray[Shape['N,C,H*W'], Float] = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)\n",
" reshaped_col: NDArray[Shape['N*C,pool_h*pool_w'], Float] = out.reshaped(N*C, self.pool_h*self.pool_w)\n",
" out: NDArray[Shape['N*C,1'], Float] = np.max(col, axis=1)\n",
" reshaped_out: NDArray[Shape['N,C,out_h,out_w'], Float] = out.reshape(N, out_h, out_w, C).transpose(0,3,1,2)\n",
"\n",
" return reshaped_out\n",
"\n",
" @beartype\n",
" def backward(self, dout: NDArray[Shape['N,C,out_h,out_w'], Float]):\n",
" # TODO\n",
" return dx\n"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"# Rectifyは電流の交流を整流にすることから名付けられた。電流の交流は正負の電流が交互に流れるが、整流にすると正の電流のみが流れる。\n",
"class Relu:\n",
" def __init__(self):\n",
" self.mask = None\n",
"\n",
" @beartype\n",
" def forward(self, x: NDArray[Shape['N'], Float]):\n",
" self.mask = (x <= 0)\n",
" out = x.copy()\n",
" out[self.mask] = 0\n",
"\n",
" return out\n",
"\n",
" @beartype\n",
" def backward(self, dout: NDArray[Shape['N'], Float]):\n",
" dout[self.mask] = 0\n",
" dx = dout\n",
"\n",
" return dx\n"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"class Affine:\n",
" def __init__(self, W: NDArray[Shape['S,WS'], Float], b: NDArray[Shape['D'], Float]):\n",
" self.W = W\n",
" self.b = b\n",
" self.x = None\n",
" self.dW = None\n",
" self.db = None\n",
"\n",
" @beartype\n",
" def forward(self, x: NDArray[Shape['N,S'], Float]):\n",
" self.x = x\n",
" out = np.dot(x, self.W) + self.b\n",
"\n",
" return out\n",
"\n",
" @beartype\n",
" def backward(self, dout: NDArray[Shape['N,WS'], Float]):\n",
" dx: NDArray[Shape['N,S'], Float] = np.dot(dout, self.W.T)\n",
" self.dW: NDArray[Shape['S,WS'], Float] = np.dot(self.x.T, dout)\n",
" self.db: NDArray[Shape['1'], Float] = np.sum(dout, axis=0)\n",
"\n",
" return dx"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"# 畳み込み層 → 全結合層(ReLU) → 全結合層(Softmax)を想定\n",
"\n",
"class SimpleConvNet:\n",
" def __init__(self, input_dim=(1,28,28),\n",
" # filter_size:5は、5x5を表す。正方形がメジャー。\n",
" conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},\n",
" hidden_size=100, output_size=10, weight_init_std=0.01):\n",
" filter_num = conv_param['filter_num']\n",
" filter_size = conv_param['filter_size']\n",
" filter_pad = conv_param['pad']\n",
" filter_stride = conv_param['stride']\n",
" input_size = input_dim[1]\n",
" conv_output_size = int(1 + (input_size + 2*filter_pad - filter_size) / filter_stride)\n",
" pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2))\n",
"\n",
" self.params = {}\n",
" self.params['W1']: NDArray[Shape['FN,C,FS,FS'],Float] = weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size)\n",
" self.params['b1'] = np.zeros[filter_num]\n",
" self.params['W2']: NDArray[Shape['PS,HS']] = weight_init_std * np.random.randn(pool_output_size, hidden_size)\n",
" self.params['b2'] = np.zeros[hidden_size]\n",
" self.params['W3']: NDArray[Shape['HS,OS']] = weight_init_std * np.random.randn(hidden_size, output_size)\n",
" self.params['b3'] = np.zeros[output_size]\n",
"\n",
" self.layers = OrderedDict()\n",
" self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], conv_param['stride'], conv_param['pad'])\n",
" self.layers['Relu1'] = Relu()\n",
" self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)\n",
" self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])\n",
" self.layers['Relu2'] = Relu()\n",
" self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])\n",
"\n",
" self.last_layer = SoftmaxWithLoss()\n",
"\n",
" def predict(self, x: NDArray):\n",
" for layer in self.layers.values():\n",
" x = layer.forward(x)\n",
" return x\n",
"\n",
" def loss(self, x, t):\n",
" y = self.predict(x)\n",
" return self.last_layer.forward(y,t)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "til-machine-learning",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 39eb576

Please sign in to comment.