-
Notifications
You must be signed in to change notification settings - Fork 100
/
index_celebdf.py
85 lines (66 loc) · 3.19 KB
/
index_celebdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
"""
Index Celeb-DF v2
Image and Sound Processing Lab - Politecnico di Milano
Nicolò Bonettini
Edoardo Daniele Cannas
Sara Mandelli
Luca Bondi
Paolo Bestagini
"""
import argparse
from multiprocessing import Pool
from pathlib import Path
import numpy as np
import pandas as pd
from isplutils.utils import extract_meta_av, extract_meta_cv
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--source', type=Path, help='Source dir',
required=True)
parser.add_argument('--videodataset', type=Path, default='data/celebdf_videos.pkl',
help='Path to save the videos DataFrame')
args = parser.parse_args()
## Parameters parsing
source_dir: Path = args.source
videodataset_path: Path = args.videodataset
# Create ouput folder (if doesn't exist)
videodataset_path.parent.mkdir(parents=True, exist_ok=True)
## DataFrame
if videodataset_path.exists():
print('Loading video DataFrame')
df_videos = pd.read_pickle(videodataset_path)
else:
print('Creating video DataFrame')
split_file = Path(source_dir).joinpath('List_of_testing_videos.txt')
if not split_file.exists():
raise FileNotFoundError('Unable to find "List_of_testing_videos.txt" in {}'.format(source_dir))
test_videos_df = pd.read_csv(split_file, delimiter=' ', header=0, index_col=1)
ff_videos = Path(source_dir).rglob('*.mp4')
df_videos = pd.DataFrame(
{'path': [f.relative_to(source_dir) for f in ff_videos]})
df_videos['height'] = df_videos['width'] = df_videos['frames'] = np.zeros(len(df_videos), dtype=np.uint16)
with Pool() as p:
meta = p.map(extract_meta_av, df_videos['path'].map(lambda x: str(source_dir.joinpath(x))))
meta = np.stack(meta)
df_videos.loc[:, ['height', 'width', 'frames']] = meta
# Fix for videos that av cannot decode properly
for idx, record in df_videos[df_videos['frames'] == 0].iterrows():
meta = extract_meta_cv(str(source_dir.joinpath(record['path'])))
df_videos.loc[idx, ['height', 'width', 'frames']] = meta
df_videos['class'] = df_videos['path'].map(lambda x: x.parts[0]).astype('category')
df_videos['label'] = df_videos['class'].map(
lambda x: True if x == 'Celeb-synthesis' else False) # True is FAKE, False is REAL
df_videos['name'] = df_videos['path'].map(lambda x: x.with_suffix('').name)
df_videos['original'] = -1 * np.ones(len(df_videos), dtype=np.int16)
df_videos.loc[(df_videos['label'] == True), 'original'] = \
df_videos[(df_videos['label'] == True)]['name'].map(
lambda x: df_videos.index[
np.flatnonzero(df_videos['name'] == '_'.join([x.split('_')[0], x.split('_')[2]]))[0]]
)
df_videos['test'] = df_videos['path'].map(str).isin(test_videos_df.index)
print('Saving video DataFrame to {}'.format(videodataset_path))
df_videos.to_pickle(str(videodataset_path))
print('Real videos: {:d}'.format(sum(df_videos['label'] == 0)))
print('Fake videos: {:d}'.format(sum(df_videos['label'] == 1)))
if __name__ == '__main__':
main()