From c07d1e1d2312ef78eb36b2ddd432428701651104 Mon Sep 17 00:00:00 2001 From: Trung Trinh Date: Sat, 4 May 2024 17:11:23 +0300 Subject: [PATCH] update --- _layouts/default.html | 3 +++ assets/input_gradient_kernel.svg | 1 + assets/rde_wgd.svg | 1 + index.md | 25 ++++++++----------------- 4 files changed, 13 insertions(+), 17 deletions(-) create mode 100644 assets/input_gradient_kernel.svg create mode 100644 assets/rde_wgd.svg diff --git a/_layouts/default.html b/_layouts/default.html index 5a977f8..b8b5220 100644 --- a/_layouts/default.html +++ b/_layouts/default.html @@ -34,6 +34,9 @@ .my_orange { color: #ED7D31; } + .my_deepred{ + color: rgb(196, 78, 82) + } blockquote p strong { color: #ED7D31; } diff --git a/assets/input_gradient_kernel.svg b/assets/input_gradient_kernel.svg new file mode 100644 index 0000000..bdb9eb1 --- /dev/null +++ b/assets/input_gradient_kernel.svg @@ -0,0 +1 @@ +Compare the input gradients of two particleswith respect to the same input.Take the average over all samples in the mini-batch. \ No newline at end of file diff --git a/assets/rde_wgd.svg b/assets/rde_wgd.svg new file mode 100644 index 0000000..e53851a --- /dev/null +++ b/assets/rde_wgd.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/index.md b/index.md index 962fb65..e2ab614 100644 --- a/index.md +++ b/index.md @@ -23,22 +23,7 @@ Please cite our work if you find it useful: > **Description:** Train an ensemble \\(\\{\boldsymbol{\theta}\_i\\}_{i=1}^M\\) using Wasserstein gradient descent [2], which employs a kernelized repulsion term to diversify the particles to cover the Bayes posterior \\(p(\boldsymbol{\theta} \| \mathcal{D}) \\). -\begin{equation} -\boldsymbol{\theta}\_i^{(t+1)} = \boldsymbol{\theta}\_i^{(t)} + \eta\_t\bigg( - {\color{red} -\underbrace{ -\nabla\_{\boldsymbol{\theta}\_i^{(t)}} \log p(\boldsymbol{\theta}\_i^{(t)} \| \mathcal{D}) -}\_{\text{Driving force}}} - - - {\color[RGB]{68,114,196} - \underbrace{\frac{ - \sum\_{j=1}^N \nabla\_{\boldsymbol{\theta}\_i^{(t)}} k(\boldsymbol{\theta}\_i^{(t)}, \boldsymbol{\theta}\_j^{(t)}) - }{ - \sum\_{j=1}^N k(\boldsymbol{\theta}\_i^{(t)}, \boldsymbol{\theta}\_j^{(t)}) - }}\_{\text{Repulsion force}} - } - \bigg) -\end{equation} +drawing - The driving force directs the particles towards high density regions of the posterior - The repulsion force pushes the particles away from each other to enforce diversity. @@ -54,4 +39,10 @@ Please cite our work if you find it useful: Possible advantages: - Each member is guaranteed to represent a different function; - The issues of weight- and function-space repulsion are avoided; -- Each member is encouraged to learn different features, which can improve robustness. \ No newline at end of file +- Each member is encouraged to learn different features, which can improve robustness. + +# Defining the input-gradient kernel \\(k\\) + +Given a base kernel \\(\kappa\\), we define the kernel in the input-gradient space for a minibatch of training samples \\(\mathcal{B}=\\{(\mathbf{x}\_b, y\_b\\}\_{b=1}^B\\) as follows: + +drawing \ No newline at end of file