chore: (wip) CNN

xhiroga · Jan 12, 2024 · 39eb576 · 39eb576
1 parent 7eabfa6
commit 39eb576
Show file tree

Hide file tree

Showing 4 changed files with 377 additions and 0 deletions.
diff --git a/computer-science/machine-learning/_src/deep-learning-from-scratch/.gitignore b/computer-science/machine-learning/_src/deep-learning-from-scratch/.gitignore
@@ -0,0 +1 @@
+deep-learning-from-scratch
diff --git a/computer-science/machine-learning/_src/deep-learning-from-scratch/README.md b/computer-science/machine-learning/_src/deep-learning-from-scratch/README.md
@@ -0,0 +1,5 @@
+# [O'Reilly Japan - ゼロから作るDeep Learning](https://www.oreilly.co.jp/books/9784873117584/)
+
+```powershell
+./latest.bat
+```
diff --git a/computer-science/machine-learning/_src/deep-learning-from-scratch/latest.bat b/computer-science/machine-learning/_src/deep-learning-from-scratch/latest.bat
@@ -0,0 +1,11 @@
+@echo off
+SET repo_path=deep-learning-from-scratch
+
+IF NOT EXIST "%repo_path%" (
+    echo Repository not found. Cloning...
+    git clone https://github.com/oreilly-japan/deep-learning-from-scratch.git
+) ELSE (
+    echo Repository found. Updating...
+    cd %repo_path%
+    git pull
+)
diff --git a/computer-science/machine-learning/_src/deep-learning-from-scratch/notebooks/ch07.ipynb b/computer-science/machine-learning/_src/deep-learning-from-scratch/notebooks/ch07.ipynb
@@ -0,0 +1,360 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 7章 畳み込みニューラルネットワーク"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys, os\n",
+    "sys.path.append(os.pardir)\n",
+    "sys.path.append(f\"{os.pardir}/deep-learning-from-scratch\")\n",
+    "\n",
+    "import numpy as np\n",
+    "from beartype import beartype\n",
+    "from collections import OrderedDict\n",
+    "from nptyping import NDArray, Shape, Float, Int\n",
+    "from common.util import im2col,col2im"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(10, 1, 28, 28)"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "x = np.random.rand(10,1,28,28)\n",
+    "x.shape\n",
+    "# (10, 1, 28, 28) ミニバッチ, チャンネル, 高さ, 幅"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(9, 75)\n"
+     ]
+    }
+   ],
+   "source": [
+    "x1 = np.random.rand(1,3,7,7)\n",
+    "col1 = im2col(x1, 5, 5, stride=1, pad=0)\n",
+    "print(col1.shape)   # (9, 75) フィルターの適用領域の数(=1*(((7-5)/1)+1)**2), 入力特徴マップの要素数"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[0.77557663 0.33806582 0.37237046 0.83112068 0.49650122 0.03914491\n",
+      "  0.20773407]\n",
+      " [0.51288944 0.50293884 0.85671902 0.72090202 0.27010591 0.5556635\n",
+      "  0.59713498]\n",
+      " [0.02457308 0.90281899 0.87469388 0.02009157 0.13361658 0.19458219\n",
+      "  0.02499915]\n",
+      " [0.0209891  0.9720402  0.55244596 0.95761181 0.27203654 0.97884945\n",
+      "  0.37895102]\n",
+      " [0.19315752 0.15809505 0.68105605 0.29481389 0.35691675 0.23714536\n",
+      "  0.03193322]\n",
+      " [0.61227175 0.0194606  0.16553847 0.74204764 0.42913091 0.26977271\n",
+      "  0.19511624]\n",
+      " [0.60038899 0.99727459 0.99125549 0.69182925 0.95594471 0.56926757\n",
+      "  0.72815871]]\n",
+      "[0.77557663 0.33806582 0.37237046 0.83112068 0.49650122 0.51288944\n",
+      " 0.50293884 0.85671902 0.72090202 0.27010591 0.02457308 0.90281899\n",
+      " 0.87469388 0.02009157 0.13361658 0.0209891  0.9720402  0.55244596\n",
+      " 0.95761181 0.27203654 0.19315752 0.15809505 0.68105605 0.29481389\n",
+      " 0.35691675 0.56177801 0.51579877 0.55239822 0.66654575 0.49598721\n",
+      " 0.07402139 0.5829163  0.38529097 0.04022566 0.6660402  0.04880685\n",
+      " 0.29328377 0.72894727 0.40273677 0.58444065 0.23715671 0.76501373\n",
+      " 0.2617112  0.43930511 0.8667773  0.87877624 0.21587922 0.25574579\n",
+      " 0.14144719 0.10852229 0.61531416 0.63418458 0.52175757 0.04859568\n",
+      " 0.00995961 0.62378664 0.53930438 0.68877897 0.70155326 0.37456113\n",
+      " 0.91382188 0.08136818 0.64643685 0.82147964 0.72842887 0.45922596\n",
+      " 0.46440204 0.88385712 0.36370997 0.23976922 0.87628869 0.13027836\n",
+      " 0.43845715 0.42018312 0.57520152]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(x1[0][0])\n",
+    "print(col1[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(90, 75)\n"
+     ]
+    }
+   ],
+   "source": [
+    "x2 = np.random.rand(10,3,7,7)\n",
+    "col2 = im2col(x2, 5, 5, stride=1, pad=0)\n",
+    "print(col2.shape)   # (90, 75) フィルターの適用領域の数(=10*(((7-5)/1)+1)**2), 入力特徴マップの要素数"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def output_size(x_len, pad, filter_len, stride):\n",
+    "    rest = ((x_len + 2 * pad) - filter_len)\n",
+    "    if rest < 0:\n",
+    "        raise Exception(f\"Filter length {filter_len} is longer than input size {x_len} + {pad}!\")\n",
+    "    elif rest % stride != 0:\n",
+    "        raise Exception(f\"Rest length {rest} and stride {stride} are conflicted!\")\n",
+    "    else:\n",
+    "        return rest / stride + 1\n",
+    "\n",
+    "class Convolution:\n",
+    "    # 今回はフィルターのチャンネル数を3で固定している\n",
+    "    @beartype\n",
+    "    def __init__(self, W: NDArray[Shape['FN,3,FH,FW'],Float], b: NDArray[Shape['FN'], Float], stride=1, pad=0):\n",
+    "        # FN: Filter Number\n",
+    "        self.W = W\n",
+    "        self.b = b\n",
+    "        self.stride = stride\n",
+    "        self.pad = pad\n",
+    "\n",
+    "        # 中間データ（backward時に使用）\n",
+    "        self.x = None   \n",
+    "        self.col = None\n",
+    "        self.col_W = None\n",
+    "        \n",
+    "        # 重み・バイアスパラメータの勾配\n",
+    "        self.dW = None\n",
+    "        self.db = None\n",
+    "\n",
+    "    @beartype\n",
+    "    def forward(self, x: NDArray[Shape['N,3,H,W'], Float]):\n",
+    "        FN, C, FH, FW = self.W.shape\n",
+    "        N, C, H, W = x.shape\n",
+    "        out_h = output_size(H, self.pad, FH, self.stride)\n",
+    "        out_w = output_size(W, self.pad, FW, self.stride)\n",
+    "\n",
+    "        col: NDArray[Shape['N*out_h*out_w,3*FH*FW'], Float] = im2col(x, FH, FW, self.stride, self.pad)\n",
+    "        col_W: NDArray[Shape['3*FH*FW,FN'], Float] = self.W.reshape(FN, -1).T\n",
+    "        out: NDArray[Shape['N*out_h*out_w,FN', Float]] = np.dot(col, col_W) + self.b\n",
+    "        reshaped_out: NDArray[Shape['N,FN,out_h,out_w'], float] = out.reshape(N, out_h, out_w, FN).transpose(0,3,1,2)\n",
+    "\n",
+    "        self.x = x\n",
+    "        self.col = col\n",
+    "        self.col_W = col_W\n",
+    "\n",
+    "        return reshaped_out\n",
+    "\n",
+    "    @beartype\n",
+    "    def backward(self, dout: NDArray[Shape['N,FN,out_h,out_w'], Float]):\n",
+    "        FN, C, FH, FW = self.W.shape\n",
+    "        dout_matrix: NDArray[Shape['N*out_h*out_w,FN'], Float] = dout.transpose(0,2,3,1).reshape(-1, FN)\n",
+    "\n",
+    "        self.db: NDArray[Shape['1,N'], Float] = np.sum(dout_matrix, axis=0)\n",
+    "        dW_matrix: NDArray[Shape['C*FH*FW,FN'], Float] = np.dot(self.col.T, dout_matrix)\n",
+    "        self.dW = self.dW_matrix.transpose(1, 0).reshape(FN, C, FH, FW)\n",
+    "\n",
+    "        dcol: NDArray[Shape['N*out_h*out_w,3*FH*FN'], Float] = np.dot(dout_matrix, self.col_W.T)\n",
+    "        dx: NDArray[Shape['N,3,H,W']] = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)\n",
+    "\n",
+    "        return dx\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Pooling:\n",
+    "    def __init__(self, pool_h, pool_w, stride=2, pad=0):\n",
+    "        # pool_h, pool_wはそれぞれプーリング適用領域の高さ・幅。例えば3x3=9からmaxを取るなら、pool_h=3, pool_w=3\n",
+    "        self.pool_h = pool_h\n",
+    "        self.pool_w = pool_w\n",
+    "        self.stride = stride\n",
+    "        self.pad = pad\n",
+    "\n",
+    "    @beartype\n",
+    "    def forward(self, x: NDArray[Shape['N,C,H,W'], Float]):\n",
+    "        # 出力特徴マップの奥行きを、対象が色ではないのにチャンネルと呼ぶのは個人的にまだ違和感があるが、そのうち慣れる。\n",
+    "        N, C, H, W = x.shape\n",
+    "        out_h = int(1 + (H - self.pool_h) / self.stride)\n",
+    "        out_w = int(1 + (W - self.pool_w) / self.stride)\n",
+    "\n",
+    "        col: NDArray[Shape['N,C,H*W'], Float] = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)\n",
+    "        reshaped_col: NDArray[Shape['N*C,pool_h*pool_w'], Float] = out.reshaped(N*C, self.pool_h*self.pool_w)\n",
+    "        out: NDArray[Shape['N*C,1'], Float] = np.max(col, axis=1)\n",
+    "        reshaped_out: NDArray[Shape['N,C,out_h,out_w'], Float] = out.reshape(N, out_h, out_w, C).transpose(0,3,1,2)\n",
+    "\n",
+    "        return reshaped_out\n",
+    "\n",
+    "    @beartype\n",
+    "    def backward(self, dout: NDArray[Shape['N,C,out_h,out_w'], Float]):\n",
+    "        # TODO\n",
+    "        return dx\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Rectifyは電流の交流を整流にすることから名付けられた。電流の交流は正負の電流が交互に流れるが、整流にすると正の電流のみが流れる。\n",
+    "class Relu:\n",
+    "    def __init__(self):\n",
+    "        self.mask = None\n",
+    "\n",
+    "    @beartype\n",
+    "    def forward(self, x: NDArray[Shape['N'], Float]):\n",
+    "        self.mask = (x <= 0)\n",
+    "        out = x.copy()\n",
+    "        out[self.mask] = 0\n",
+    "\n",
+    "        return out\n",
+    "\n",
+    "    @beartype\n",
+    "    def backward(self, dout: NDArray[Shape['N'], Float]):\n",
+    "        dout[self.mask] = 0\n",
+    "        dx = dout\n",
+    "\n",
+    "        return dx\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Affine:\n",
+    "    def __init__(self, W: NDArray[Shape['S,WS'], Float], b: NDArray[Shape['D'], Float]):\n",
+    "        self.W = W\n",
+    "        self.b = b\n",
+    "        self.x = None\n",
+    "        self.dW = None\n",
+    "        self.db = None\n",
+    "\n",
+    "    @beartype\n",
+    "    def forward(self, x: NDArray[Shape['N,S'], Float]):\n",
+    "        self.x = x\n",
+    "        out = np.dot(x, self.W) + self.b\n",
+    "\n",
+    "        return out\n",
+    "\n",
+    "    @beartype\n",
+    "    def backward(self, dout: NDArray[Shape['N,WS'], Float]):\n",
+    "        dx: NDArray[Shape['N,S'], Float] = np.dot(dout, self.W.T)\n",
+    "        self.dW: NDArray[Shape['S,WS'], Float] = np.dot(self.x.T, dout)\n",
+    "        self.db: NDArray[Shape['1'], Float] = np.sum(dout, axis=0)\n",
+    "\n",
+    "        return dx"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 畳み込み層 → 全結合層(ReLU) → 全結合層(Softmax)を想定\n",
+    "\n",
+    "class SimpleConvNet:\n",
+    "    def __init__(self, input_dim=(1,28,28),\n",
+    "        # filter_size:5は、5x5を表す。正方形がメジャー。\n",
+    "        conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},\n",
+    "        hidden_size=100, output_size=10, weight_init_std=0.01):\n",
+    "        filter_num = conv_param['filter_num']\n",
+    "        filter_size = conv_param['filter_size']\n",
+    "        filter_pad = conv_param['pad']\n",
+    "        filter_stride = conv_param['stride']\n",
+    "        input_size = input_dim[1]\n",
+    "        conv_output_size = int(1 + (input_size + 2*filter_pad - filter_size) / filter_stride)\n",
+    "        pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2))\n",
+    "\n",
+    "        self.params = {}\n",
+    "        self.params['W1']: NDArray[Shape['FN,C,FS,FS'],Float] = weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size)\n",
+    "        self.params['b1'] = np.zeros[filter_num]\n",
+    "        self.params['W2']: NDArray[Shape['PS,HS']] = weight_init_std * np.random.randn(pool_output_size, hidden_size)\n",
+    "        self.params['b2'] = np.zeros[hidden_size]\n",
+    "        self.params['W3']: NDArray[Shape['HS,OS']] = weight_init_std * np.random.randn(hidden_size, output_size)\n",
+    "        self.params['b3'] = np.zeros[output_size]\n",
+    "\n",
+    "        self.layers = OrderedDict()\n",
+    "        self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], conv_param['stride'], conv_param['pad'])\n",
+    "        self.layers['Relu1'] = Relu()\n",
+    "        self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)\n",
+    "        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])\n",
+    "        self.layers['Relu2'] = Relu()\n",
+    "        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])\n",
+    "\n",
+    "        self.last_layer = SoftmaxWithLoss()\n",
+    "\n",
+    "    def predict(self, x: NDArray):\n",
+    "        for layer in self.layers.values():\n",
+    "            x = layer.forward(x)\n",
+    "        return x\n",
+    "\n",
+    "    def loss(self, x, t):\n",
+    "        y = self.predict(x)\n",
+    "        return self.last_layer.forward(y,t)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "til-machine-learning",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}