forked from polimi-ispl/icpr2020dfdc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
index_ffpp.py
92 lines (71 loc) · 3.64 KB
/
index_ffpp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
"""
Index FaceForensics++
Video Face Manipulation Detection Through Ensemble of CNNs
Image and Sound Processing Lab - Politecnico di Milano
Nicolò Bonettini
Edoardo Daniele Cannas
Sara Mandelli
Luca Bondi
Paolo Bestagini
"""
import argparse
import sys
from multiprocessing import Pool
from pathlib import Path
import numpy as np
import pandas as pd
from isplutils.utils import extract_meta_av, extract_meta_cv
def parse_args(argv):
parser = argparse.ArgumentParser()
parser.add_argument('--source', type=Path, help='Source dir',
default='dataset/ffpp/faceforensics')
parser.add_argument('--videodataset', type=Path, default='data/ffpp_videos.pkl',
help='Path to save the videos DataFrame')
return parser.parse_args(argv)
def main(argv):
## Parameters parsing
args = parse_args(argv)
source_dir: Path = args.source
videodataset_path: Path = args.videodataset
# Create ouput folder (if doesn't exist)
videodataset_path.parent.mkdir(parents=True, exist_ok=True)
## DataFrame
if videodataset_path.exists():
print('Loading video DataFrame')
df_videos = pd.read_pickle(videodataset_path)
else:
print('Creating video DataFrame')
ff_videos = Path(source_dir).rglob('*.mp4')
df_videos = pd.DataFrame(
{'path': [f.relative_to(source_dir) for f in ff_videos if 'mask' not in str(f) and 'raw' not in str(f)]})
df_videos['height'] = df_videos['width'] = df_videos['frames'] = np.zeros(len(df_videos), dtype=np.uint16)
with Pool() as p:
meta = p.map(extract_meta_av, df_videos['path'].map(lambda x: str(source_dir.joinpath(x))))
meta = np.stack(meta)
df_videos.loc[:, ['height', 'width', 'frames']] = meta
# Fix for videos that av cannot decode properly
for idx, record in df_videos[df_videos['frames'] == 0].iterrows():
meta = extract_meta_cv(str(source_dir.joinpath(record['path'])))
df_videos.loc[idx, ['height', 'width', 'frames']] = meta
df_videos['class'] = df_videos['path'].map(lambda x: x.parts[0]).astype('category')
df_videos['label'] = df_videos['class'].map(
lambda x: True if x == 'manipulated_sequences' else False) # True is FAKE, False is REAL
df_videos['source'] = df_videos['path'].map(lambda x: x.parts[1]).astype('category')
df_videos['quality'] = df_videos['path'].map(lambda x: x.parts[2]).astype('category')
df_videos['name'] = df_videos['path'].map(lambda x: x.with_suffix('').parts[-1])
df_videos['original'] = -1 * np.ones(len(df_videos), dtype=np.int16)
df_videos.loc[(df_videos['label'] == True) & (df_videos['source'] != 'DeepFakeDetection'), 'original'] = \
df_videos[(df_videos['label'] == True) & (df_videos['source'] != 'DeepFakeDetection')]['name'].map(
lambda x: df_videos.index[np.flatnonzero(df_videos['name'] == x.split('_')[0])[0]]
)
df_videos.loc[(df_videos['label'] == True) & (df_videos['source'] == 'DeepFakeDetection'), 'original'] = \
df_videos[(df_videos['label'] == True) & (df_videos['source'] == 'DeepFakeDetection')]['name'].map(
lambda x: df_videos.index[
np.flatnonzero(df_videos['name'] == x.split('_')[0] + '__' + x.split('__')[1])[0]]
)
print('Saving video DataFrame to {}'.format(videodataset_path))
df_videos.to_pickle(str(videodataset_path))
print('Real videos: {:d}'.format(sum(df_videos['label'] == 0)))
print('Fake videos: {:d}'.format(sum(df_videos['label'] == 1)))
if __name__ == '__main__':
main(sys.argv[1:])