Initial commit for version 1.0.0

dan64 · Jun 1, 2024 · 0324cb0 · 0324cb0
1 parent 8db5ea2
commit 0324cb0
Show file tree

Hide file tree

Showing 31 changed files with 5,325 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -0,0 +1,59 @@
+# ProPainter
+Improving Propagation and Transformer for Video Inpainting using Vapoursynth, based on [ProPainter](https://github.com/sczhou/ProPainter).
+
+The Vapoursynth filter version has the advantage of transforming the images directly in memory, without the need to use the filesystem to store the video frames. Using Vapoursynth the filter is faster and don't have any limitation on the number of frames that can be elaborated.
+
+## Dependencies
+- [PyTorch](https://pytorch.org/get-started) 2.4.0 or later
+- [VapourSynth](http://www.vapoursynth.com/) R68 or later
+
+
+## Installation
+```
+pip install vspropainter-x.x.x-py3-none-any.whl
+```
+## Models Download
+The models are not installed with the package, they must be downloaded from the ProPainter github site. 
+
+The models to download are:
+
+- [ProPainter.pth](https://github.com/sczhou/ProPainter/releases/download/v0.1.0/ProPainter.pth)
+- [raft-things.pth](https://github.com/sczhou/ProPainter/releases/download/v0.1.0/raft-things.pth)
+- [recurrent_flow_completion.pth](https://github.com/sczhou/ProPainter/releases/download/v0.1.0/recurrent_flow_completion.pth)
+
+The _model files_ have to be copied in the **weights** directory usually located in:
+
+.\Lib\site-packages\vspropainter\weights
+
+## Usage
+```python
+# adjusting color space to RGB24 (full range) for vsProPainter
+clip = core.resize.Bicubic(clip=clip, format=vs.RGB24, matrix_in_s="709", range_s="full")
+from vspropainter import propainter
+
+# ProPainter using a mask image
+clip = propainter(clip, img_mask="sample.png")
+
+# ProPainter using a clip mask
+clipMask = core.lsmas.LWLibavSource(source="sample_mask.mp4", format="RGB24", cache=0)
+clip = propainter(clip, clip_mask=clipMask)
+```
+See `__init__.py` for the description of the parameters.
+
+## Memory optimization and inference speed-up
+
+Video inpainting typically requires a significant amount of GPU memory. The filter offers various features that facilitate memory-efficient inference, effectively avoiding the Out-Of-Memory error. You can use the following options to reduce memory usage further:
+
+- Reduce the number of local neighbors through decreasing the parameter *neighbor_length* (default 10). 
+- Reduce the number of global references by increasing the parameter *ref_stride* (default 10).
+- Set the parameter *enable_fp16* to **True** to use fp16 (half precision) during inference.
+- Reduce the sequence's length of frames that the model processes, decreasing the parameter *length* (default 100).
+ - Set a smaller mask region via the parameter *mask_region*. The mask region can specified using a tuple with the following format: (width, height, left, top). The reduction of the mask region will allow to speed up significantly the inference, expecially on HD movies, but the region must be big enough to allow the inference. In the case of bad output it will be necessary to increase its size.
+
+With the only exception of parameter *length* the options to reduce the memory usage will allow also to speed up the inference's speed. 
+
+In the case the mask will not be able to totally remove the masked object it is possible to increase the parameter *mask_dilation* to extend the mask's size.
+
+
+
+
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,35 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.build]
+exclude = [
+]
+
+[project]
+name = "vspropainter"
+version = "1.0.0"
+description = "ProPainter function for VapourSynth"
+readme = "README.md"
+requires-python = ">=3.10"
+license = {file = "LICENSE"}
+authors = [{name = "Dan64", email = "whitedan64@gmail.com"}]
+keywords = ["ProPainter", "VapourSynth"]
+classifiers = [
+  "License :: OSI Approved :: MIT License",
+  "Operating System :: OS Independent",
+  "Programming Language :: Python :: 3 :: Only",
+  "Topic :: Multimedia :: Video"
+]
+dependencies = [   
+  "numpy>=1.26.4",
+  "nvidia-cuda-runtime-cu12>=12.5.39"
+  "torchvision>=0.19.0",
+  "torch>=2.4.0",
+  "Pillow>=10.1.0",
+  "VapourSynth>=68",
+]
+
+[project.urls]
+"Homepage" = "https://github.com/dan64/vs-propainter"
+"Bug Tracker" = "https://github.com/dan64/vs-propainter/issues"
diff --git a/vspropainter/RAFT/__init__.py b/vspropainter/RAFT/__init__.py
@@ -0,0 +1,2 @@
+# from .demo import RAFT_infer
+from .raft import RAFT
diff --git a/vspropainter/RAFT/corr.py b/vspropainter/RAFT/corr.py
@@ -0,0 +1,111 @@
+import torch
+import torch.nn.functional as F
+from .utils.utils import bilinear_sampler, coords_grid
+
+try:
+    import alt_cuda_corr
+except:
+    # alt_cuda_corr is not compiled
+    pass
+
+
+class CorrBlock:
+    def __init__(self, fmap1, fmap2, num_levels=4, radius=4):
+        self.num_levels = num_levels
+        self.radius = radius
+        self.corr_pyramid = []
+
+        # all pairs correlation
+        corr = CorrBlock.corr(fmap1, fmap2)
+
+        batch, h1, w1, dim, h2, w2 = corr.shape
+        corr = corr.reshape(batch*h1*w1, dim, h2, w2)
+
+        self.corr_pyramid.append(corr)
+        for i in range(self.num_levels-1):
+            corr = F.avg_pool2d(corr, 2, stride=2)
+            self.corr_pyramid.append(corr)
+
+    def __call__(self, coords):
+        r = self.radius
+        coords = coords.permute(0, 2, 3, 1)
+        batch, h1, w1, _ = coords.shape
+
+        out_pyramid = []
+        for i in range(self.num_levels):
+            corr = self.corr_pyramid[i]
+            dx = torch.linspace(-r, r, 2*r+1)
+            dy = torch.linspace(-r, r, 2*r+1)
+            delta = torch.stack(torch.meshgrid(dy, dx), axis=-1).to(coords.device)
+
+            centroid_lvl = coords.reshape(batch*h1*w1, 1, 1, 2) / 2**i
+            delta_lvl = delta.view(1, 2*r+1, 2*r+1, 2)
+            coords_lvl = centroid_lvl + delta_lvl
+
+            corr = bilinear_sampler(corr, coords_lvl)
+            corr = corr.view(batch, h1, w1, -1)
+            out_pyramid.append(corr)
+
+        out = torch.cat(out_pyramid, dim=-1)
+        return out.permute(0, 3, 1, 2).contiguous().float()
+
+    @staticmethod
+    def corr(fmap1, fmap2):
+        batch, dim, ht, wd = fmap1.shape
+        fmap1 = fmap1.view(batch, dim, ht*wd)
+        fmap2 = fmap2.view(batch, dim, ht*wd)
+
+        corr = torch.matmul(fmap1.transpose(1,2), fmap2)
+        corr = corr.view(batch, ht, wd, 1, ht, wd)
+        return corr  / torch.sqrt(torch.tensor(dim).float())
+
+
+class CorrLayer(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, fmap1, fmap2, coords, r):
+        fmap1 = fmap1.contiguous()
+        fmap2 = fmap2.contiguous()
+        coords = coords.contiguous()
+        ctx.save_for_backward(fmap1, fmap2, coords)
+        ctx.r = r
+        corr, = correlation_cudaz.forward(fmap1, fmap2, coords, ctx.r)
+        return corr
+
+    @staticmethod
+    def backward(ctx, grad_corr):
+        fmap1, fmap2, coords = ctx.saved_tensors
+        grad_corr = grad_corr.contiguous()
+        fmap1_grad, fmap2_grad, coords_grad = \
+            correlation_cudaz.backward(fmap1, fmap2, coords, grad_corr, ctx.r)
+        return fmap1_grad, fmap2_grad, coords_grad, None
+
+
+class AlternateCorrBlock:
+    def __init__(self, fmap1, fmap2, num_levels=4, radius=4):
+        self.num_levels = num_levels
+        self.radius = radius
+
+        self.pyramid = [(fmap1, fmap2)]
+        for i in range(self.num_levels):
+            fmap1 = F.avg_pool2d(fmap1, 2, stride=2)
+            fmap2 = F.avg_pool2d(fmap2, 2, stride=2)
+            self.pyramid.append((fmap1, fmap2))
+
+    def __call__(self, coords):
+
+        coords = coords.permute(0, 2, 3, 1)
+        B, H, W, _ = coords.shape
+
+        corr_list = []
+        for i in range(self.num_levels):
+            r = self.radius
+            fmap1_i = self.pyramid[0][0].permute(0, 2, 3, 1)
+            fmap2_i = self.pyramid[i][1].permute(0, 2, 3, 1)
+
+            coords_i = (coords / 2**i).reshape(B, 1, H, W, 2).contiguous()
+            corr = alt_cuda_corr(fmap1_i, fmap2_i, coords_i, r)
+            corr_list.append(corr.squeeze(1))
+
+        corr = torch.stack(corr_list, dim=1)
+        corr = corr.reshape(B, -1, H, W)
+        return corr / 16.0
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# from .demo import RAFT_infer
		from .raft import RAFT