yutto-dev · SigureMo · May 10, 2023 · May 9, 2023 · May 9, 2023 · May 10, 2023
diff --git a/README.md b/README.md
@@ -307,7 +307,7 @@ yutto <url> -c "d8bc7493%2C2843925707%2C08c3e*81"
 #### 存放子路径模板
 
 -  参数 `-tp` 或 `--subpath-template`
--  可选参数变量 `title | id | name | username | series_title | pubdate` （以后可能会有更多）
+-  可选参数变量 `title | id | name | username | series_title | pubdate | download_date | owner_uid` （以后可能会有更多）
 -  默认值 `"{auto}"`
 
 通过配置子路径模板可以灵活地控制视频存放位置。
@@ -316,18 +316,19 @@ yutto <url> -c "d8bc7493%2C2843925707%2C08c3e*81"
 
 另外，该功能语法由 Python format 函数模板语法提供，所以也支持一些高级的用法，比如 `{id:0>3}{name}`。
 
-值得注意的是，并不是所有变量在各种场合下都会提供，比如 `username` 变量当前仅在 UP 主全部投稿视频/收藏夹才提供，在其它情况下不应使用它。各变量详细作用域描述见下表：
+值得注意的是，并不是所有变量在各种场合下都会提供，比如 `username`, `owner_uid` 变量当前仅在 UP 主全部投稿视频/收藏夹才提供，在其它情况下不应使用它。各变量详细作用域描述见下表：
 
 <!-- prettier-ignore -->
 |Variable|Description|Scope|
 |-|-|-|
 |title|系列视频总标题（番剧名/投稿视频标题）|全部|
 |id|系列视频单 p 顺序标号|全部|
 |name|系列视频单 p 标题|全部|
-|username|UP 主用户名|个人空间、收藏夹、合集、视频列表下载|
+|username|UP主用户名|个人空间、收藏夹、合集、视频列表下载|
 |series_title|合集标题|收藏夹、视频合集、视频列表下载|
 |pubdate|投稿日期|仅投稿视频|
 |download_date|下载日期|全部|
+|owner_uid|UP主UID|个人空间、收藏夹、合集、视频列表下载|
 
 > **Note**
 >

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -26,9 +26,9 @@ python = "^3.9.0"
 aiohttp = "^3.8.3"
 aiofiles = "^23.0.0"
 biliass = "1.3.7"
-dicttoxml = "^1.7.15"
 colorama = { version = "^0.4.6", markers = "sys_platform == 'win32'" }
 typing-extensions = "^4.4.0"
+dict2xml = "1.7.3"
 
 [tool.poetry.group.dev.dependencies]
 pytest = "^7.2.2"

diff --git a/tests/test_api/test_ugc_video.py b/tests/test_api/test_ugc_video.py
@@ -68,12 +68,14 @@ async def test_get_ugc_video_list():
         assert ugc_video_list[0]["cid"] == CId("222190584")
         assert ugc_video_list[0]["metadata"] is not None
         assert ugc_video_list[0]["metadata"]["title"] == "bilili 特性以及使用方法简单介绍"
+        assert ugc_video_list[0]["metadata"]["website"] == "https://www.bilibili.com/video/BV1vZ4y1M7mQ"
 
         assert ugc_video_list[1]["id"] == 2
         assert ugc_video_list[1]["name"] == "bilili 环境配置方法"
         assert ugc_video_list[1]["cid"] == CId("222200470")
         assert ugc_video_list[1]["metadata"] is not None
         assert ugc_video_list[1]["metadata"]["title"] == "bilili 环境配置方法"
+        assert ugc_video_list[0]["metadata"]["website"] == "https://www.bilibili.com/video/BV1vZ4y1M7mQ"
 
 
 @pytest.mark.api

diff --git a/yutto/api/bangumi.py b/yutto/api/bangumi.py
@@ -165,8 +165,12 @@ def _parse_bangumi_metadata(item: dict[str, Any]) -> MetaData:
         show_title=item["share_copy"],
         plot=item["share_copy"],
         thumb=item["cover"],
-        premiered=get_time_str_by_stamp(item["pub_time"]),
+        premiered=get_time_str_by_stamp(item["pub_time"], "%Y-%m-%d"),
         dateadded=get_time_str_by_now(),
         source="",  # TODO
+        actor=[],  # TODO
+        genre=[],  # TODO
+        tag=[],  # TODO
+        website="",  # TODO
         original_filename="",  # TODO
     )
diff --git a/yutto/api/cheese.py b/yutto/api/cheese.py
@@ -138,8 +138,12 @@ def _parse_cheese_metadata(item: dict[str, Any]) -> MetaData:
         show_title=item["title"],  # 无此字段，用 title 代替
         plot=item["title"],  # 无此字段，用 title 代替
         thumb=item["cover"],
-        premiered=get_time_str_by_stamp(item["release_date"]),
+        premiered=get_time_str_by_stamp(item["release_date"], "%Y-%m-%d"),
         dateadded=get_time_str_by_now(),
         source="",  # TODO
+        actor=[],  # TODO
+        genre=[],  # TODO
+        tag=[],  # TODO
+        website="",  # TODO
         original_filename="",  # TODO
     )
diff --git a/yutto/api/ugc_video.py b/yutto/api/ugc_video.py
@@ -2,7 +2,7 @@
 
 import json
 import re
-from typing import TypedDict
+from typing import Any, TypedDict
 
 from aiohttp import ClientSession
 
@@ -24,7 +24,7 @@
 )
 from yutto.utils.console.logger import Logger
 from yutto.utils.fetcher import Fetcher
-from yutto.utils.metadata import MetaData
+from yutto.utils.metadata import Actor, MetaData
 from yutto.utils.time import get_time_str_by_now, get_time_str_by_stamp
 
 
@@ -45,6 +45,9 @@ class _UgcVideoInfo(TypedDict):
     pubdate: int
     description: str
     pages: list[_UgcVideoPageInfo]
+    genre: list[str]
+    actor: list[Actor]
+    tag: list[str]
 
 
 class UgcVideoListItem(TypedDict):
@@ -62,6 +65,17 @@ class UgcVideoList(TypedDict):
     pages: list[UgcVideoListItem]
 
 
+async def get_ugc_video_tag(session: ClientSession, avid: AvId) -> list[str]:
+    tags: list[str] = []
+    tag_api = "http://api.bilibili.com/x/tag/archive/tags?aid={aid}&bvid={bvid}"
+    res_json = await Fetcher.fetch_json(session, tag_api.format(**avid.to_dict()))
+    if res_json is None or res_json["code"] != 0:
+        raise NotFoundError(f"无法获取视频 {avid} 标签")
+    for tag in res_json["data"]:
+        tags.append(tag["tag_name"])
+    return tags
+
+
 async def get_ugc_video_info(session: ClientSession, avid: AvId) -> _UgcVideoInfo:
     regex_ep = re.compile(r"https?://www\.bilibili\.com/bangumi/play/ep(?P<episode_id>\d+)")
     info_api = "http://api.bilibili.com/x/web-interface/view?aid={aid}&bvid={bvid}"
@@ -81,6 +95,10 @@ async def get_ugc_video_info(session: ClientSession, avid: AvId) -> _UgcVideoInf
     episode_id = EpisodeId("")
     if res_json_data.get("redirect_url") and (ep_match := regex_ep.match(res_json_data["redirect_url"])):
         episode_id = EpisodeId(ep_match.group("episode_id"))
+
+    actors = _parse_actor_info(res_json_data)
+    genres = _parse_genre_info(res_json_data)
+    tags: list[str] = await get_ugc_video_tag(session, avid)
     return {
         "avid": BvId(res_json_data["bvid"]),
         "aid": AId(str(res_json_data["aid"])),
@@ -99,6 +117,9 @@ async def get_ugc_video_info(session: ClientSession, avid: AvId) -> _UgcVideoInf
             }
             for page in res_json_data["pages"]
         ],
+        "actor": actors,
+        "tag": tags,
+        "genre": genres,
     }
 
 
@@ -241,19 +262,65 @@ async def get_ugc_video_subtitles(session: ClientSession, avid: AvId, cid: CId)
     return []
 
 
-def _parse_ugc_video_metadata(video_info: _UgcVideoInfo, page_info: _UgcVideoPageInfo) -> MetaData:
+def _parse_ugc_video_metadata(
+    video_info: _UgcVideoInfo,
+    page_info: _UgcVideoPageInfo,
+) -> MetaData:
     return MetaData(
         title=page_info["part"],
         show_title=page_info["part"],
         plot=video_info["description"],
         thumb=page_info["first_frame"] if page_info["first_frame"] is not None else video_info["picture"],
-        premiered=get_time_str_by_stamp(video_info["pubdate"]),
+        premiered=get_time_str_by_stamp(video_info["pubdate"], "%Y-%m-%d"),
         dateadded=get_time_str_by_now(),
+        actor=video_info["actor"],
+        genre=video_info["genre"],
+        tag=video_info["tag"],
         source="",  # TODO
         original_filename="",  # TODO
+        website=video_info["bvid"].to_url(),
     )
 
 
+def _parse_actor_info(video_info: dict[str, Any]):
+    actors: list[Actor] = []
+    if video_info.get("staff") and isinstance(video_info["staff"], list):
+        _index: int = 0
+        staff_list: list[dict[str, Any]] = video_info["staff"]
+        for staff in staff_list:
+            actors.append(
+                Actor(
+                    name=staff["name"],
+                    role=staff["title"],
+                    thumb=staff["face"],
+                    profile=f"https://space.bilibili.com/{staff['mid']}",
+                    order=_index,
+                )
+            )
+            _index += 1
+    elif video_info.get("owner") and isinstance(video_info["owner"], dict):
+        staff_info: dict[str, Any] = video_info["owner"]
+        actors.append(
+            Actor(
+                name=staff_info["name"],
+                role="UP主",
+                thumb=staff_info["face"],
+                profile=f"https://space.bilibili.com/{staff_info['mid']}",
+                order=0,
+            )
+        )
+    else:
+        Logger.warning("未找到演职人员信息")
+    return actors
+
+
+def _parse_genre_info(video_info: dict[str, Any]) -> list[str]:
+    genres: list[str] = []
+    if video_info.get("tname") and isinstance(video_info["tname"], str):
+        genres.append(video_info["tname"])
+    return genres
+
+
 def _is_meaningless_name(name: str) -> bool:
     """检测名称是否为无意义的名称"""
     # name 为空

diff --git a/yutto/extractor/common.py b/yutto/extractor/common.py
@@ -58,6 +58,7 @@ async def extract_bangumi_data(
             "series_title": UNKNOWN,
             "pubdate": UNKNOWN,
             "download_date": bangumi_info["metadata"]["dateadded"],
+            "owner_uid": UNKNOWN,
         }
         subpath_variables_base.update(subpath_variables)
         subpath = resolve_path_template(args.subpath_template, auto_subpath_template, subpath_variables_base)
@@ -103,6 +104,7 @@ async def extract_cheese_data(
             "series_title": UNKNOWN,
             "pubdate": UNKNOWN,
             "download_date": UNKNOWN,
+            "owner_uid": UNKNOWN,
         }
         subpath_variables_base.update(subpath_variables)
         subpath = resolve_path_template(args.subpath_template, auto_subpath_template, subpath_variables_base)
@@ -139,6 +141,11 @@ async def extract_ugc_video_data(
         subtitles = await get_ugc_video_subtitles(session, avid, cid) if args.require_subtitle else []
         danmaku = await get_danmaku(session, cid, args.danmaku_format) if args.require_danmaku else EmptyDanmakuData
         metadata = ugc_video_info["metadata"] if args.require_metadata else None
+        owner_uid: str = (
+            ugc_video_info["metadata"]["actor"][0]["profile"].split("/")[-1]
+            if ugc_video_info["metadata"]["actor"]
+            else UNKNOWN
+        )
         subpath_variables_base: PathTemplateVariableDict = {
             "id": id,
             "name": name,
@@ -147,6 +154,7 @@ async def extract_ugc_video_data(
             "series_title": UNKNOWN,
             "pubdate": UNKNOWN,
             "download_date": ugc_video_info["metadata"]["dateadded"],
+            "owner_uid": owner_uid,
         }
         subpath_variables_base.update(subpath_variables)
         subpath = resolve_path_template(args.subpath_template, auto_subpath_template, subpath_variables_base)

diff --git a/yutto/processor/path_resolver.py b/yutto/processor/path_resolver.py
@@ -6,7 +6,9 @@
 
 from yutto.utils.console.logger import Logger
 
-PathTemplateVariable = Literal["title", "id", "name", "username", "series_title", "pubdate", "download_date"]
+PathTemplateVariable = Literal[
+    "title", "id", "name", "username", "series_title", "pubdate", "download_date", "owner_uid"
+]
 PathTemplateVariableDict = dict[PathTemplateVariable, Union[int, str]]
 UNKNOWN: str = "unknown_variable"
 

diff --git a/yutto/utils/metadata.py b/yutto/utils/metadata.py
@@ -4,7 +4,15 @@
 from typing import TypedDict
 from xml.dom.minidom import parseString  # type: ignore
 
-import dicttoxml  # type: ignore
+from dict2xml import dict2xml  # type: ignore
+
+
+class Actor(TypedDict):
+    name: str
+    role: str
+    thumb: str
+    profile: str
+    order: int
 
 
 class MetaData(TypedDict):
@@ -14,16 +22,17 @@ class MetaData(TypedDict):
     thumb: str
     premiered: str
     dateadded: str
+    actor: list[Actor]
+    genre: list[str]
+    tag: list[str]
     source: str
     original_filename: str
+    website: str
 
 
 def write_metadata(metadata: MetaData, video_path: Path):
     metadata_path = video_path.with_suffix(".nfo")
-    custom_root = "episodedetails"
-
-    xml_content = dicttoxml.dicttoxml(metadata, custom_root=custom_root, attr_type=False)  # type: ignore
-    dom = parseString(xml_content)  # type: ignore
-    pretty_content = dom.toprettyxml()  # type: ignore
+    custom_root = "episodedetails"  # TODO: 不同视频类型使用不同的root name
+    xml_content = dict2xml(metadata, wrap=custom_root, indent="  ")  # type: ignore
     with metadata_path.open("w", encoding="utf-8") as f:  # type: ignore
-        f.write(pretty_content)  # type: ignore
+        f.write(xml_content)  # type: ignore