Skip to content

Commit

Permalink
critical fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
volotat committed May 14, 2023
1 parent cd400ea commit 9849e63
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 10 deletions.
2 changes: 2 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,5 @@ To install the extension go to 'Extensions' tab in [Automatic1111 web-ui](https:
* Added ability to export current parameters in a human readable form as a json.
* Interpolation mode in the flow-applying stage is set to ‘nearest’ to reduce overtime image blurring.
* Added ControlNet to txt2vid mode as well as fixing #86 issue, thanks to [@mariaWitch](https://github.com/mariaWitch)
* Fixed a major issue when ConrtolNet used wrong input images. Because of this vid2vid results were way worse than they should be.
* Text to video mode now supports video as a guidance for ControlNet. It allows to create much stronger video stylizations.
8 changes: 7 additions & 1 deletion scripts/base_ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def inputs_ui():

with gr.Tab('vid2vid') as tab_vid2vid:
with gr.Row():
gr.HTML('Put your video here:')
gr.HTML('Input video (each frame will be used as initial image for SD and as input image to CN): *REQUIRED')
with gr.Row():
v2v_file = gr.File(label="Input video", interactive=True, file_count="single", file_types=["video"], elem_id="vid_to_vid_chosen_file")

Expand Down Expand Up @@ -110,7 +110,13 @@ def inputs_ui():
v2v_custom_inputs = scripts.scripts_img2img.setup_ui()

with gr.Tab('txt2vid') as tab_txt2vid:
with gr.Row():
gr.HTML('Control video (each frame will be used as input image to CN): *NOT REQUIRED')
with gr.Row():
t2v_file = gr.File(label="Input video", interactive=True, file_count="single", file_types=["video"], elem_id="tex_to_vid_chosen_file")

t2v_width, t2v_height, t2v_prompt, t2v_n_prompt, t2v_cfg_scale, t2v_seed, t2v_processing_strength, t2v_fix_frame_strength, t2v_sampler_index, t2v_steps = setup_common_values('txt2vid', t2v_args)

with gr.Row():
t2v_length = gr.Slider(label='Length (in frames)', minimum=10, maximum=2048, step=10, value=40, interactive=True)
t2v_fps = gr.Slider(label='Video FPS', minimum=4, maximum=64, step=4, value=12, interactive=True)
Expand Down
41 changes: 34 additions & 7 deletions scripts/core/txt2vid.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,18 +44,30 @@ def FloweR_load_model(w, h):
# Move the model to the device
FloweR_model = FloweR_model.to(DEVICE)


def read_frame_from_video(input_video):
if input_video is None: return None

# Reading video file
if input_video.isOpened():
ret, cur_frame = input_video.read()
if cur_frame is not None:
cur_frame = cv2.cvtColor(cur_frame, cv2.COLOR_BGR2RGB)
else:
cur_frame = None
input_video.release()
input_video = None

return cur_frame

def start_process(*args):
processing_start_time = time.time()
args_dict = utils.args_to_dict(*args)
args_dict = utils.get_mode_args('t2v', args_dict)

#utils.set_CNs_input_image(args_dict, Image.fromarray(curr_frame))
processed_frames, _, _, _ = utils.txt2img(args_dict)
processed_frame = np.array(processed_frames[0])
processed_frame = np.clip(processed_frame, 0, 255).astype(np.uint8)
init_frame = processed_frame.copy()
# Open the input video file
input_video = None
if args_dict['file'] is not None:
input_video = cv2.VideoCapture(args_dict['file'].name)

# Create an output video file with the same fps, width, and height as the input video
output_video_name = f'outputs/sd-cn-animation/txt2vid/{datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.mp4'
Expand All @@ -69,6 +81,16 @@ def save_result_to_image(image, ind):
if args_dict['save_frames_check']:
cv2.imwrite(os.path.join(output_video_folder, f'{ind:05d}.png'), cv2.cvtColor(image, cv2.COLOR_RGB2BGR))

if input_video is not None:
curr_video_frame = read_frame_from_video(input_video)
curr_video_frame = cv2.resize(curr_video_frame, (args_dict['width'], args_dict['height']))
utils.set_CNs_input_image(args_dict, Image.fromarray(curr_video_frame))

processed_frames, _, _, _ = utils.txt2img(args_dict)
processed_frame = np.array(processed_frames[0])
processed_frame = np.clip(processed_frame, 0, 255).astype(np.uint8)
init_frame = processed_frame.copy()

output_video = cv2.VideoWriter(output_video_name, cv2.VideoWriter_fourcc(*'mp4v'), args_dict['fps'], (args_dict['width'], args_dict['height']))
output_video.write(cv2.cvtColor(processed_frame, cv2.COLOR_RGB2BGR))

Expand Down Expand Up @@ -125,7 +147,11 @@ def save_result_to_image(image, ind):
args_dict['mask_img'] = Image.fromarray(pred_occl)
args_dict['seed'] = -1

#utils.set_CNs_input_image(args_dict, Image.fromarray(curr_frame))
if input_video is not None:
curr_video_frame = read_frame_from_video(input_video)
curr_video_frame = cv2.resize(curr_video_frame, (args_dict['width'], args_dict['height']))
utils.set_CNs_input_image(args_dict, Image.fromarray(curr_video_frame))

processed_frames, _, _, _ = utils.img2img(args_dict)
processed_frame = np.array(processed_frames[0])
processed_frame = skimage.exposure.match_histograms(processed_frame, init_frame, channel_axis=None)
Expand All @@ -150,6 +176,7 @@ def save_result_to_image(image, ind):
stat = f"Frame: {ind + 2} / {args_dict['length']}; " + utils.get_time_left(ind+2, args_dict['length'], processing_start_time)
yield stat, curr_frame, pred_occl, warped_frame, processed_frame, None, gr.Button.update(interactive=False), gr.Button.update(interactive=True)

if input_video is not None: input_video.release()
output_video.release()
FloweR_clear_memory()

Expand Down
5 changes: 3 additions & 2 deletions scripts/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def get_component_names():
'v2v_sampler_index', 'v2v_steps', 'v2v_override_settings',
'v2v_occlusion_mask_blur', 'v2v_occlusion_mask_trailing', 'v2v_occlusion_mask_flow_multiplier', 'v2v_occlusion_mask_difo_multiplier', 'v2v_occlusion_mask_difs_multiplier',
'v2v_step_1_processing_mode', 'v2v_step_1_blend_alpha', 'v2v_step_1_seed', 'v2v_step_2_seed',
't2v_width', 't2v_height', 't2v_prompt', 't2v_n_prompt', 't2v_cfg_scale', 't2v_seed', 't2v_processing_strength', 't2v_fix_frame_strength',
't2v_file', 't2v_width', 't2v_height', 't2v_prompt', 't2v_n_prompt', 't2v_cfg_scale', 't2v_seed', 't2v_processing_strength', 't2v_fix_frame_strength',
't2v_sampler_index', 't2v_steps', 't2v_length', 't2v_fps',
'glo_save_frames_check'
]
Expand Down Expand Up @@ -121,7 +121,8 @@ def get_mode_args(mode, args_dict):
def set_CNs_input_image(args_dict, image):
for script_input in args_dict['script_inputs']:
if type(script_input).__name__ == 'UiControlNetUnit':
script_input.batch_images = [image]
script_input.batch_images = [np.array(image)]
script_input.image = np.array(image)

import time
import datetime
Expand Down

0 comments on commit 9849e63

Please sign in to comment.