Skip to content

Commit

Permalink
Refactor media datatype sniffing logic
Browse files Browse the repository at this point in the history
  • Loading branch information
arash77 committed Apr 25, 2024
1 parent 371e227 commit 230b734
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 18 deletions.
4 changes: 2 additions & 2 deletions lib/galaxy/config/sample/datatypes_conf.xml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -937,7 +937,7 @@
<datatype extension="mp4" type="galaxy.datatypes.media:Mp4" display_in_upload="true" mimetype="video/mp4"/>
<datatype extension="flv" type="galaxy.datatypes.media:Flv" display_in_upload="true" mimetype="video/flv"/>
<datatype extension="webm" type="galaxy.datatypes.media:Webm" display_in_upload="true" mimetype="video/webm"/>
<datatype extension="mpeg" type="galaxy.datatypes.media:Mpeg" display_in_upload="true" mimetype="video/mpeg"/>
<datatype extension="mpg" type="galaxy.datatypes.media:Mpg" display_in_upload="true" mimetype="video/mpeg"/>
<datatype extension="mov" type="galaxy.datatypes.media:Mov" display_in_upload="true" mimetype="video/quicktime"/>
<datatype extension="avi" type="galaxy.datatypes.media:Avi" display_in_upload="true" mimetype="video/x-msvideo"/>
<datatype extension="wmv" type="galaxy.datatypes.media:Wmv" display_in_upload="true" mimetype="video/x-ms-wmv"/>
Expand Down Expand Up @@ -1251,7 +1251,7 @@
<sniffer type="galaxy.datatypes.media:Mp4"/>
<sniffer type="galaxy.datatypes.media:Flv"/>
<sniffer type="galaxy.datatypes.media:Ogg"/>
<sniffer type="galaxy.datatypes.media:Mpeg"/>
<sniffer type="galaxy.datatypes.media:Mpg"/>
<sniffer type="galaxy.datatypes.media:Mov"/>
<sniffer type="galaxy.datatypes.media:Avi"/>
<sniffer type="galaxy.datatypes.media:Wmv"/>
Expand Down
56 changes: 40 additions & 16 deletions lib/galaxy/datatypes/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import (
List,
Tuple,
cast,
)

from galaxy.datatypes.binary import Binary
Expand Down Expand Up @@ -88,7 +89,7 @@ def ffprobe(path):
"wma": {"offset": 0, "hex": ["30 26 B2 75"]},
"wmv": {"offset": 0, "hex": ["30 26 B2 75"]},
"avi": {"offset": 8, "hex": ["41 56 49"]},
"mpeg": {
"mpg": {
"offset": 0,
"hex": [
"00 00 01 B0",
Expand All @@ -113,9 +114,11 @@ def ffprobe(path):

def _get_file_format_from_magic_number(filename: str, file_ext: str):
with open(filename, "rb") as f:
f.seek(magic_number[file_ext]["offset"])
f.seek(cast(int, magic_number[file_ext]["offset"]))
head = f.read(8)
return any(head.startswith(bytes.fromhex(hex_code)) for hex_code in magic_number[file_ext]["hex"])
return any(
head.startswith(bytes.fromhex(hex_code)) for hex_code in cast(List[str], magic_number[file_ext]["hex"])
)


class Audio(Binary):
Expand Down Expand Up @@ -270,10 +273,10 @@ def sniff(self, filename: str) -> bool:
if which("ffprobe"):
metadata, streams = ffprobe(filename)
vp_check = any(
stream["codec_name"] in ["vp8", "vp9"] for stream in streams if stream["codec_type"] == "video"
stream["codec_name"] in ["av1", "vp8", "vp9"] for stream in streams if stream["codec_type"] == "video"
)
return _get_file_format_from_magic_number(filename, "mkv") and not vp_check
return False
return "matroska" in metadata["format_name"].split(",") and not vp_check
return _get_file_format_from_magic_number(filename, "mkv")


class Mp4(Video):
Expand All @@ -287,13 +290,19 @@ class Mp4(Video):
file_ext = "mp4"

def sniff(self, filename: str) -> bool:
if which("ffprobe"):
metadata, streams = ffprobe(filename)
return "mp4" in metadata["format_name"].split(",") and _get_file_format_from_magic_number(filename, "mp4")
return _get_file_format_from_magic_number(filename, "mp4")


class Flv(Video):
file_ext = "flv"

def sniff(self, filename: str) -> bool:
if which("ffprobe"):
metadata, streams = ffprobe(filename)
return "flv" in metadata["format_name"].split(",")
return _get_file_format_from_magic_number(filename, "flv")


Expand All @@ -310,6 +319,9 @@ class Mp3(Audio):
file_ext = "mp3"

def sniff(self, filename: str) -> bool:
if which("ffprobe"):
metadata, streams = ffprobe(filename)
return "mp3" in metadata["format_name"].split(",")
return _get_file_format_from_magic_number(filename, "mp3")


Expand Down Expand Up @@ -362,6 +374,9 @@ class Ogg(Audio):
file_ext = "ogg"

def sniff(self, filename: str) -> bool:
if which("ffprobe"):
metadata, streams = ffprobe(filename)
return "ogg" in metadata["format_name"].split(",")
return _get_file_format_from_magic_number(filename, "ogg")


Expand All @@ -372,30 +387,39 @@ def sniff(self, filename: str) -> bool:
if which("ffprobe"):
metadata, streams = ffprobe(filename)
vp_check = any(
stream["codec_name"] in ["vp8", "vp9"] for stream in streams if stream["codec_type"] == "video"
stream["codec_name"] in ["av1", "vp8", "vp9"] for stream in streams if stream["codec_type"] == "video"
)
return _get_file_format_from_magic_number(filename, "webm") and vp_check
return False
return "webm" in metadata["format_name"].split(",") and vp_check
return _get_file_format_from_magic_number(filename, "webm")


class Mpeg(Video):
file_ext = "mpeg"
class Mpg(Video):
file_ext = "mpg"

def sniff(self, filename: str) -> bool:
return _get_file_format_from_magic_number(filename, "mpeg")
if which("ffprobe"):
metadata, streams = ffprobe(filename)
return "mpegvideo" in metadata["format_name"].split(",")
return _get_file_format_from_magic_number(filename, "mpg")


class Mov(Video):
file_ext = "mov"

def sniff(self, filename: str) -> bool:
if which("ffprobe"):
metadata, streams = ffprobe(filename)
return "mov" in metadata["format_name"].split(",") and _get_file_format_from_magic_number(filename, "mov")
return _get_file_format_from_magic_number(filename, "mov")


class Avi(Video):
file_ext = "avi"

def sniff(self, filename: str) -> bool:
if which("ffprobe"):
metadata, streams = ffprobe(filename)
return "avi" in metadata["format_name"].split(",")
return _get_file_format_from_magic_number(filename, "avi")


Expand All @@ -406,8 +430,8 @@ def sniff(self, filename: str) -> bool:
if which("ffprobe"):
metadata, streams = ffprobe(filename)
is_video = "video" in [stream["codec_type"] for stream in streams]
return _get_file_format_from_magic_number(filename, "wmv") and is_video
return False
return "asf" in metadata["format_name"].split(",") and is_video
return _get_file_format_from_magic_number(filename, "wmv")


class Wma(Audio):
Expand All @@ -417,5 +441,5 @@ def sniff(self, filename: str) -> bool:
if which("ffprobe"):
metadata, streams = ffprobe(filename)
is_audio = "video" not in [stream["codec_type"] for stream in streams]
return _get_file_format_from_magic_number(filename, "wma") and is_audio
return False
return "asf" in metadata["format_name"].split(",") and is_audio
return _get_file_format_from_magic_number(filename, "wma")

0 comments on commit 230b734

Please sign in to comment.