-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(component,video): add task to embed audio to video (#939)
What this PR does: - Implement a pipeline task to embed an audio input to a video input. The end result is the original video with audio stream being replaced by the provided audio. CC @xiaofei-du @pinglin
- Loading branch information
1 parent
804e56a
commit 1aa40c2
Showing
11 changed files
with
360 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
package video | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"os" | ||
"path/filepath" | ||
|
||
"github.com/google/uuid" | ||
|
||
ffmpeg "github.com/warmans/ffmpeg-go" | ||
|
||
"github.com/instill-ai/pipeline-backend/pkg/component/base" | ||
"github.com/instill-ai/pipeline-backend/pkg/data" | ||
) | ||
|
||
func embedAudio(ctx context.Context, job *base.Job) error { | ||
var inputStruct embedAudioInput | ||
if err := job.Input.ReadData(ctx, &inputStruct); err != nil { | ||
return fmt.Errorf("reading input data: %w", err) | ||
} | ||
|
||
// Create temporary input video file | ||
tempInputVideoFile, err := os.CreateTemp("", "temp-input-video-*.mp4") | ||
if err != nil { | ||
return fmt.Errorf("creating temp input video file: %w", err) | ||
} | ||
defer func() { | ||
_ = os.Remove(tempInputVideoFile.Name()) | ||
}() | ||
|
||
videoBytes, err := inputStruct.Video.Binary() | ||
if err != nil { | ||
return fmt.Errorf("getting video bytes: %w", err) | ||
} | ||
|
||
if err := os.WriteFile(tempInputVideoFile.Name(), videoBytes.ByteArray(), 0600); err != nil { | ||
return fmt.Errorf("writing to temp input video file: %w", err) | ||
} | ||
|
||
// Create temporary input audio file | ||
tempInputAudioFile, err := os.CreateTemp("", "temp-input-audio-*.mp3") | ||
if err != nil { | ||
return fmt.Errorf("creating temp input audio file: %w", err) | ||
} | ||
defer func() { | ||
_ = os.Remove(tempInputAudioFile.Name()) | ||
}() | ||
|
||
audioBytes, err := inputStruct.Audio.Binary() | ||
if err != nil { | ||
return fmt.Errorf("getting audio bytes: %w", err) | ||
} | ||
|
||
if err := os.WriteFile(tempInputAudioFile.Name(), audioBytes.ByteArray(), 0600); err != nil { | ||
return fmt.Errorf("writing to temp input audio file: %w", err) | ||
} | ||
|
||
// Embed audio to video and write to a file | ||
outputVideoFilePath, err := embedAudioToVideo(tempInputVideoFile.Name(), tempInputAudioFile.Name()) | ||
if err != nil { | ||
return err | ||
} | ||
defer func() { | ||
_ = os.Remove(outputVideoFilePath) | ||
}() | ||
|
||
// Read the output video file and export to standard output | ||
outputVideoBytes, err := os.ReadFile(outputVideoFilePath) | ||
if err != nil { | ||
return fmt.Errorf("reading output video file: %w", err) | ||
} | ||
|
||
outputVideoData, err := data.NewVideoFromBytes(outputVideoBytes, "video/mp4", fmt.Sprintf("video-%s.mp4", uuid.New().String())) | ||
if err != nil { | ||
return fmt.Errorf("creating output video data: %w", err) | ||
} | ||
|
||
outputData := embedAudioOutput{ | ||
Video: outputVideoData, | ||
} | ||
|
||
if err := job.Output.WriteData(ctx, outputData); err != nil { | ||
return fmt.Errorf("writing output data: %w", err) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func embedAudioToVideo(inputVideoFile string, inputAudioFile string) (string, error) { | ||
outputFilePath := filepath.Join(os.TempDir(), fmt.Sprintf("video-%s.mp4", uuid.New().String())) | ||
|
||
input := []*ffmpeg.Stream{ffmpeg.Input(inputVideoFile), ffmpeg.Input(inputAudioFile)} | ||
|
||
// https://www.mux.com/articles/merge-audio-and-video-files-with-ffmpeg | ||
// Workaround for multiple maps https://github.com/u2takey/ffmpeg-go/issues/1#issuecomment-2507904461 | ||
err := ffmpeg.Output(input, outputFilePath, ffmpeg.KwArgs{ | ||
"c:v": "copy", | ||
"c:a": "aac", | ||
"map_0": "0:v:0", | ||
"map_1": "1:a:0", | ||
}).OverWriteOutput().Run() | ||
|
||
if err != nil { | ||
return "", fmt.Errorf("embedding audio to video: %w", err) | ||
} | ||
|
||
return outputFilePath, nil | ||
} |
Oops, something went wrong.