readme update

volotat · Apr 19, 2023 · 12dcbef · 12dcbef
1 parent b112d6b
commit 12dcbef
Show file tree

Hide file tree

Showing 3 changed files with 78 additions and 87 deletions.
diff --git a/compute_flow.py b/compute_flow.py
@@ -10,62 +10,62 @@
 import argparse
 
 def main(args):
-    W, H = args.width, args.height
-    # Open the input video file
-    input_video = cv2.VideoCapture(args.input_video)
+  W, H = args.width, args.height
+  # Open the input video file
+  input_video = cv2.VideoCapture(args.input_video)
 
-    # Get useful info from the source video
-    fps = int(input_video.get(cv2.CAP_PROP_FPS))
-    total_frames = int(input_video.get(cv2.CAP_PROP_FRAME_COUNT))
+  # Get useful info from the source video
+  fps = int(input_video.get(cv2.CAP_PROP_FPS))
+  total_frames = int(input_video.get(cv2.CAP_PROP_FRAME_COUNT))
 
-    prev_frame = None
+  prev_frame = None
 
-    # create an empty HDF5 file
-    with h5py.File(args.output_file, 'w') as f: pass
+  # create an empty HDF5 file
+  with h5py.File(args.output_file, 'w') as f: pass
 
-    # open the file for writing a flow maps into it
-    with h5py.File(args.output_file, 'a') as f:
-        flow_maps = f.create_dataset('flow_maps', shape=(0, 2, H, W, 2), maxshape=(None, 2, H, W, 2), dtype=np.float16) 
+  # open the file for writing a flow maps into it
+  with h5py.File(args.output_file, 'a') as f:
+    flow_maps = f.create_dataset('flow_maps', shape=(0, 2, H, W, 2), maxshape=(None, 2, H, W, 2), dtype=np.float16) 
 
-        for ind in tqdm(range(total_frames)):
-            # Read the next frame from the input video
-            if not input_video.isOpened(): break
-            ret, cur_frame = input_video.read()
-            if not ret: break
+    for ind in tqdm(range(total_frames)):
+      # Read the next frame from the input video
+      if not input_video.isOpened(): break
+      ret, cur_frame = input_video.read()
+      if not ret: break
 
-            cur_frame = cv2.resize(cur_frame, (W, H))
+      cur_frame = cv2.resize(cur_frame, (W, H))
 
-            if prev_frame is not None:
-                next_flow, prev_flow, occlusion_mask, frame1_bg_removed, frame2_bg_removed = RAFT_estimate_flow(prev_frame, cur_frame)
+      if prev_frame is not None:
+        next_flow, prev_flow, occlusion_mask, frame1_bg_removed, frame2_bg_removed = RAFT_estimate_flow(prev_frame, cur_frame)
 
-                # write data into a file
-                flow_maps.resize(ind, axis=0)
-                flow_maps[ind-1, 0] = next_flow
-                flow_maps[ind-1, 1] = prev_flow
+        # write data into a file
+        flow_maps.resize(ind, axis=0)
+        flow_maps[ind-1, 0] = next_flow
+        flow_maps[ind-1, 1] = prev_flow
 
-                occlusion_mask = np.clip(occlusion_mask * 0.2 * 255, 0, 255).astype(np.uint8)
+        occlusion_mask = np.clip(occlusion_mask * 0.2 * 255, 0, 255).astype(np.uint8)
 
-                if args.visualize:
-                    # show the last written frame - useful to catch any issue with the process
-                    img_show = cv2.hconcat([cur_frame, frame2_bg_removed, occlusion_mask])
-                    cv2.imshow('Out img', img_show)
-                    if cv2.waitKey(1) & 0xFF == ord('q'): exit() # press Q to close the script while processing
+        if args.visualize:
+          # show the last written frame - useful to catch any issue with the process
+          img_show = cv2.hconcat([cur_frame, frame2_bg_removed, occlusion_mask])
+          cv2.imshow('Out img', img_show)
+          if cv2.waitKey(1) & 0xFF == ord('q'): exit() # press Q to close the script while processing
 
-            prev_frame = cur_frame.copy()
+      prev_frame = cur_frame.copy()
 
-    # Release the input and output video files
-    input_video.release()
+  # Release the input and output video files
+  input_video.release()
 
-    # Close all windows
-    if args.visualize: cv2.destroyAllWindows()
+  # Close all windows
+  if args.visualize: cv2.destroyAllWindows()
 
 if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('-i', '--input_video', help="Path to input video file", required=True)
-    parser.add_argument('-o', '--output_file', help="Path to output flow file. Stored in *.h5 format", required=True)
-    parser.add_argument('-W', '--width', help='Width of the generated flow maps', default=1024, type=int)
-    parser.add_argument('-H', '--height', help='Height of the generated flow maps', default=576, type=int)
-    parser.add_argument('-v', '--visualize', action='store_true', help='Show proceed images and occlusion maps')
-    args = parser.parse_args()
-
-    main(args)
+  parser = argparse.ArgumentParser()
+  parser.add_argument('-i', '--input_video', help="Path to input video file", required=True)
+  parser.add_argument('-o', '--output_file', help="Path to output flow file. Stored in *.h5 format", required=True)
+  parser.add_argument('-W', '--width', help='Width of the generated flow maps', default=1024, type=int)
+  parser.add_argument('-H', '--height', help='Height of the generated flow maps', default=576, type=int)
+  parser.add_argument('-v', '--visualize', action='store_true', help='Show proceed images and occlusion maps')
+  args = parser.parse_args()
+
+  main(args)
diff --git a/flow_utils.py b/flow_utils.py
@@ -15,51 +15,51 @@
 fgbg = cv2.createBackgroundSubtractorMOG2(history=500, varThreshold=16, detectShadows=True)
 
 def background_subtractor(frame, fgbg):
-    fgmask = fgbg.apply(frame)
-    return cv2.bitwise_and(frame, frame, mask=fgmask)
+  fgmask = fgbg.apply(frame)
+  return cv2.bitwise_and(frame, frame, mask=fgmask)
 
 def RAFT_estimate_flow(frame1, frame2, device='cuda', subtract_background=True):
-    global RAFT_model
-    if RAFT_model is None:
-        args = argparse.Namespace(**{
-            'model': 'RAFT/models/raft-things.pth',
-            'mixed_precision': True,
-            'small': False,
-            'alternate_corr': False,
-            'path': ""
-        })
+  global RAFT_model
+  if RAFT_model is None:
+    args = argparse.Namespace(**{
+        'model': 'RAFT/models/raft-things.pth',
+        'mixed_precision': True,
+        'small': False,
+        'alternate_corr': False,
+        'path': ""
+    })
 
-        RAFT_model = torch.nn.DataParallel(RAFT(args))
-        RAFT_model.load_state_dict(torch.load(args.model))
+    RAFT_model = torch.nn.DataParallel(RAFT(args))
+    RAFT_model.load_state_dict(torch.load(args.model))
 
-        RAFT_model = RAFT_model.module
-        RAFT_model.to(device)
-        RAFT_model.eval()
+    RAFT_model = RAFT_model.module
+    RAFT_model.to(device)
+    RAFT_model.eval()
 
-    if subtract_background:
-        frame1 = background_subtractor(frame1, fgbg)
-        frame2 = background_subtractor(frame2, fgbg)
+  if subtract_background:
+    frame1 = background_subtractor(frame1, fgbg)
+    frame2 = background_subtractor(frame2, fgbg)
 
-    with torch.no_grad():
-        frame1_torch = torch.from_numpy(frame1).permute(2, 0, 1).float()[None].to(device)
-        frame2_torch = torch.from_numpy(frame2).permute(2, 0, 1).float()[None].to(device)
+  with torch.no_grad():
+    frame1_torch = torch.from_numpy(frame1).permute(2, 0, 1).float()[None].to(device)
+    frame2_torch = torch.from_numpy(frame2).permute(2, 0, 1).float()[None].to(device)
 
-        padder = InputPadder(frame1_torch.shape)
-        image1, image2 = padder.pad(frame1_torch, frame2_torch)
+    padder = InputPadder(frame1_torch.shape)
+    image1, image2 = padder.pad(frame1_torch, frame2_torch)
 
-        # estimate optical flow
-        _, next_flow = RAFT_model(image1, image2, iters=20, test_mode=True)
-        _, prev_flow = RAFT_model(image2, image1, iters=20, test_mode=True)
+    # estimate optical flow
+    _, next_flow = RAFT_model(image1, image2, iters=20, test_mode=True)
+    _, prev_flow = RAFT_model(image2, image1, iters=20, test_mode=True)
 
-        next_flow = next_flow[0].permute(1, 2, 0).cpu().numpy()
-        prev_flow = prev_flow[0].permute(1, 2, 0).cpu().numpy()
+    next_flow = next_flow[0].permute(1, 2, 0).cpu().numpy()
+    prev_flow = prev_flow[0].permute(1, 2, 0).cpu().numpy()
 
-        fb_flow = next_flow + prev_flow
-        fb_norm = np.linalg.norm(fb_flow, axis=2)
+    fb_flow = next_flow + prev_flow
+    fb_norm = np.linalg.norm(fb_flow, axis=2)
 
-        occlusion_mask = fb_norm[..., None].repeat(3, axis=-1)
+    occlusion_mask = fb_norm[..., None].repeat(3, axis=-1)
 
-    return next_flow, prev_flow, occlusion_mask, frame1, frame2
+  return next_flow, prev_flow, occlusion_mask, frame1, frame2
 
 # ... rest of the file ...
 

diff --git a/readme.md b/readme.md
@@ -93,20 +93,11 @@ Go to the **txt2vid.py** file and change main parameters (OUTPUT_VIDEO, PROMPT,
 python3 txt2vid.py
 ```
 
-
-<!--
-## Last version changes: v0.4
-* Fixed issue with extreme blur accumulating at the static parts of the video.
-* The order of processing was changed to achieve the best quality at different domains.
-* Optical flow computation isolated into a separate script for better GPU memory management. Check out the instructions for a new processing pipeline.
--->
-
-
 ## Last version changes: v0.5
 * Fixed an issue with the wrong direction of an optical flow applied to an image.
 * Added text to video mode within txt2vid.py script. Make sure to update new dependencies for this script to work!
 * Added a threshold for an optical flow before processing the frame to remove white noise that might appear, as it was suggested by [@alexfredo](https://github.com/alexfredo).
-
+* Background removal at flow computation stage implemented by [@CaptnSeraph](https://github.com/CaptnSeraph), it should reduce ghosting effect in most of the videos processed with vid2vid script.
 
 <!--
 ## Potential improvements