Skip to content

Commit

Permalink
readme update
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexey Borsky committed Apr 19, 2023
1 parent b112d6b commit 12dcbef
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 87 deletions.
88 changes: 44 additions & 44 deletions compute_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,62 +10,62 @@
import argparse

def main(args):
W, H = args.width, args.height
# Open the input video file
input_video = cv2.VideoCapture(args.input_video)
W, H = args.width, args.height
# Open the input video file
input_video = cv2.VideoCapture(args.input_video)

# Get useful info from the source video
fps = int(input_video.get(cv2.CAP_PROP_FPS))
total_frames = int(input_video.get(cv2.CAP_PROP_FRAME_COUNT))
# Get useful info from the source video
fps = int(input_video.get(cv2.CAP_PROP_FPS))
total_frames = int(input_video.get(cv2.CAP_PROP_FRAME_COUNT))

prev_frame = None
prev_frame = None

# create an empty HDF5 file
with h5py.File(args.output_file, 'w') as f: pass
# create an empty HDF5 file
with h5py.File(args.output_file, 'w') as f: pass

# open the file for writing a flow maps into it
with h5py.File(args.output_file, 'a') as f:
flow_maps = f.create_dataset('flow_maps', shape=(0, 2, H, W, 2), maxshape=(None, 2, H, W, 2), dtype=np.float16)
# open the file for writing a flow maps into it
with h5py.File(args.output_file, 'a') as f:
flow_maps = f.create_dataset('flow_maps', shape=(0, 2, H, W, 2), maxshape=(None, 2, H, W, 2), dtype=np.float16)

for ind in tqdm(range(total_frames)):
# Read the next frame from the input video
if not input_video.isOpened(): break
ret, cur_frame = input_video.read()
if not ret: break
for ind in tqdm(range(total_frames)):
# Read the next frame from the input video
if not input_video.isOpened(): break
ret, cur_frame = input_video.read()
if not ret: break

cur_frame = cv2.resize(cur_frame, (W, H))
cur_frame = cv2.resize(cur_frame, (W, H))

if prev_frame is not None:
next_flow, prev_flow, occlusion_mask, frame1_bg_removed, frame2_bg_removed = RAFT_estimate_flow(prev_frame, cur_frame)
if prev_frame is not None:
next_flow, prev_flow, occlusion_mask, frame1_bg_removed, frame2_bg_removed = RAFT_estimate_flow(prev_frame, cur_frame)

# write data into a file
flow_maps.resize(ind, axis=0)
flow_maps[ind-1, 0] = next_flow
flow_maps[ind-1, 1] = prev_flow
# write data into a file
flow_maps.resize(ind, axis=0)
flow_maps[ind-1, 0] = next_flow
flow_maps[ind-1, 1] = prev_flow

occlusion_mask = np.clip(occlusion_mask * 0.2 * 255, 0, 255).astype(np.uint8)
occlusion_mask = np.clip(occlusion_mask * 0.2 * 255, 0, 255).astype(np.uint8)

if args.visualize:
# show the last written frame - useful to catch any issue with the process
img_show = cv2.hconcat([cur_frame, frame2_bg_removed, occlusion_mask])
cv2.imshow('Out img', img_show)
if cv2.waitKey(1) & 0xFF == ord('q'): exit() # press Q to close the script while processing
if args.visualize:
# show the last written frame - useful to catch any issue with the process
img_show = cv2.hconcat([cur_frame, frame2_bg_removed, occlusion_mask])
cv2.imshow('Out img', img_show)
if cv2.waitKey(1) & 0xFF == ord('q'): exit() # press Q to close the script while processing

prev_frame = cur_frame.copy()
prev_frame = cur_frame.copy()

# Release the input and output video files
input_video.release()
# Release the input and output video files
input_video.release()

# Close all windows
if args.visualize: cv2.destroyAllWindows()
# Close all windows
if args.visualize: cv2.destroyAllWindows()

if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input_video', help="Path to input video file", required=True)
parser.add_argument('-o', '--output_file', help="Path to output flow file. Stored in *.h5 format", required=True)
parser.add_argument('-W', '--width', help='Width of the generated flow maps', default=1024, type=int)
parser.add_argument('-H', '--height', help='Height of the generated flow maps', default=576, type=int)
parser.add_argument('-v', '--visualize', action='store_true', help='Show proceed images and occlusion maps')
args = parser.parse_args()

main(args)
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input_video', help="Path to input video file", required=True)
parser.add_argument('-o', '--output_file', help="Path to output flow file. Stored in *.h5 format", required=True)
parser.add_argument('-W', '--width', help='Width of the generated flow maps', default=1024, type=int)
parser.add_argument('-H', '--height', help='Height of the generated flow maps', default=576, type=int)
parser.add_argument('-v', '--visualize', action='store_true', help='Show proceed images and occlusion maps')
args = parser.parse_args()

main(args)
66 changes: 33 additions & 33 deletions flow_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,51 +15,51 @@
fgbg = cv2.createBackgroundSubtractorMOG2(history=500, varThreshold=16, detectShadows=True)

def background_subtractor(frame, fgbg):
fgmask = fgbg.apply(frame)
return cv2.bitwise_and(frame, frame, mask=fgmask)
fgmask = fgbg.apply(frame)
return cv2.bitwise_and(frame, frame, mask=fgmask)

def RAFT_estimate_flow(frame1, frame2, device='cuda', subtract_background=True):
global RAFT_model
if RAFT_model is None:
args = argparse.Namespace(**{
'model': 'RAFT/models/raft-things.pth',
'mixed_precision': True,
'small': False,
'alternate_corr': False,
'path': ""
})
global RAFT_model
if RAFT_model is None:
args = argparse.Namespace(**{
'model': 'RAFT/models/raft-things.pth',
'mixed_precision': True,
'small': False,
'alternate_corr': False,
'path': ""
})

RAFT_model = torch.nn.DataParallel(RAFT(args))
RAFT_model.load_state_dict(torch.load(args.model))
RAFT_model = torch.nn.DataParallel(RAFT(args))
RAFT_model.load_state_dict(torch.load(args.model))

RAFT_model = RAFT_model.module
RAFT_model.to(device)
RAFT_model.eval()
RAFT_model = RAFT_model.module
RAFT_model.to(device)
RAFT_model.eval()

if subtract_background:
frame1 = background_subtractor(frame1, fgbg)
frame2 = background_subtractor(frame2, fgbg)
if subtract_background:
frame1 = background_subtractor(frame1, fgbg)
frame2 = background_subtractor(frame2, fgbg)

with torch.no_grad():
frame1_torch = torch.from_numpy(frame1).permute(2, 0, 1).float()[None].to(device)
frame2_torch = torch.from_numpy(frame2).permute(2, 0, 1).float()[None].to(device)
with torch.no_grad():
frame1_torch = torch.from_numpy(frame1).permute(2, 0, 1).float()[None].to(device)
frame2_torch = torch.from_numpy(frame2).permute(2, 0, 1).float()[None].to(device)

padder = InputPadder(frame1_torch.shape)
image1, image2 = padder.pad(frame1_torch, frame2_torch)
padder = InputPadder(frame1_torch.shape)
image1, image2 = padder.pad(frame1_torch, frame2_torch)

# estimate optical flow
_, next_flow = RAFT_model(image1, image2, iters=20, test_mode=True)
_, prev_flow = RAFT_model(image2, image1, iters=20, test_mode=True)
# estimate optical flow
_, next_flow = RAFT_model(image1, image2, iters=20, test_mode=True)
_, prev_flow = RAFT_model(image2, image1, iters=20, test_mode=True)

next_flow = next_flow[0].permute(1, 2, 0).cpu().numpy()
prev_flow = prev_flow[0].permute(1, 2, 0).cpu().numpy()
next_flow = next_flow[0].permute(1, 2, 0).cpu().numpy()
prev_flow = prev_flow[0].permute(1, 2, 0).cpu().numpy()

fb_flow = next_flow + prev_flow
fb_norm = np.linalg.norm(fb_flow, axis=2)
fb_flow = next_flow + prev_flow
fb_norm = np.linalg.norm(fb_flow, axis=2)

occlusion_mask = fb_norm[..., None].repeat(3, axis=-1)
occlusion_mask = fb_norm[..., None].repeat(3, axis=-1)

return next_flow, prev_flow, occlusion_mask, frame1, frame2
return next_flow, prev_flow, occlusion_mask, frame1, frame2

# ... rest of the file ...

Expand Down
11 changes: 1 addition & 10 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,20 +93,11 @@ Go to the **txt2vid.py** file and change main parameters (OUTPUT_VIDEO, PROMPT,
python3 txt2vid.py
```


<!--
## Last version changes: v0.4
* Fixed issue with extreme blur accumulating at the static parts of the video.
* The order of processing was changed to achieve the best quality at different domains.
* Optical flow computation isolated into a separate script for better GPU memory management. Check out the instructions for a new processing pipeline.
-->


## Last version changes: v0.5
* Fixed an issue with the wrong direction of an optical flow applied to an image.
* Added text to video mode within txt2vid.py script. Make sure to update new dependencies for this script to work!
* Added a threshold for an optical flow before processing the frame to remove white noise that might appear, as it was suggested by [@alexfredo](https://github.com/alexfredo).

* Background removal at flow computation stage implemented by [@CaptnSeraph](https://github.com/CaptnSeraph), it should reduce ghosting effect in most of the videos processed with vid2vid script.

<!--
## Potential improvements
Expand Down

0 comments on commit 12dcbef

Please sign in to comment.