-
Notifications
You must be signed in to change notification settings - Fork 183
/
4_adding_a_dataset.py
174 lines (143 loc) · 6.07 KB
/
4_adding_a_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
"""
====================================
Tutorial 4: Creating a dataset class
====================================
"""
# Authors: Pedro L. C. Rodrigues, Sylvain Chevallier
#
# https://github.com/plcrodrigues/Workshop-MOABB-BCI-Graz-2019
import mne
import numpy as np
from pyriemann.classification import MDM
from pyriemann.estimation import Covariances
from scipy.io import loadmat, savemat
from sklearn.pipeline import make_pipeline
from moabb.datasets import download as dl
from moabb.datasets.base import BaseDataset
from moabb.evaluations import WithinSessionEvaluation
from moabb.paradigms import LeftRightImagery
##############################################################################
# Creating some Data
# ------------------
#
# To illustrate the creation of a dataset class in MOABB, we first create an
# example dataset saved in .mat file. It contains a single fake recording on
# 8 channels lasting for 150 seconds (sampling frequency 256 Hz). We have
# included the script that creates this dataset and have uploaded it online.
# The fake dataset is available on the
# `Zenodo website <https://sandbox.zenodo.org/record/369543>`_
def create_example_dataset():
"""Create a fake example for a dataset."""
sfreq = 256
t_recording = 150
t_trial = 1 # duration of a trial
intertrial = 2 # time between end of a trial and the next one
n_chan = 8
x = np.zeros((n_chan + 1, t_recording * sfreq)) # electrodes + stimulus
stim = np.zeros(t_recording * sfreq)
t_offset = 1.0 # offset where the trials start
n_trials = 40
rep = np.linspace(0, 4 * t_trial, t_trial * sfreq)
signal = np.sin(2 * np.pi / t_trial * rep)
for n in range(n_trials):
label = n % 2 + 1 # alternate between class 0 and class 1
tn = int(t_offset * sfreq + n * (t_trial + intertrial) * sfreq)
stim[tn] = label
noise = 0.1 * np.random.randn(n_chan, len(signal))
x[:-1, tn : (tn + t_trial * sfreq)] = label * signal + noise
x[-1, :] = stim
return x, sfreq
# Create the fake data
for subject in [1, 2, 3]:
x, fs = create_example_dataset()
filename = "subject_" + str(subject).zfill(2) + ".mat"
mdict = {}
mdict["x"] = x
mdict["fs"] = fs
savemat(filename, mdict)
##############################################################################
# Creating a Dataset Class
# ------------------------
#
# We will create now a dataset class using the fake data simulated with the
# code from above. For this, we first need to import the right classes from
# MOABB:
#
# - ``dl`` is a very useful script that downloads automatically a dataset online
# if it is not yet available in the user's computer. The script knows where
# to download the files because we create a global variable telling the URL
# where to fetch the data.
# - ``BaseDataset`` is the basic class that we overload to create our dataset.
#
# The global variable with the dataset's URL should specify an online
# repository where all the files are stored.
ExampleDataset_URL = "https://sandbox.zenodo.org/record/369543/files/"
##############################################################################
# The ``ExampleDataset`` needs to implement only 3 functions:
#
# - ``__init__`` for indicating the parameter of the dataset
# - ``_get_single_subject_data`` to define how to process the data once they
# have been downloaded
# - ``data_path`` to define how the data are downloaded.
class ExampleDataset(BaseDataset):
"""Dataset used to exemplify the creation of a dataset class in MOABB.
The data samples have been simulated and has no physiological
meaning whatsoever.
"""
def __init__(self):
super().__init__(
subjects=[1, 2, 3],
sessions_per_subject=1,
events={"left_hand": 1, "right_hand": 2},
code="ExampleDataset",
interval=[0, 0.75],
paradigm="imagery",
doi="",
)
def _get_single_subject_data(self, subject):
"""Return data for a single subject."""
file_path_list = self.data_path(subject)
data = loadmat(file_path_list[0])
x = data["x"]
fs = data["fs"]
ch_names = ["ch" + str(i) for i in range(8)] + ["stim"]
ch_types = ["eeg" for i in range(8)] + ["stim"]
info = mne.create_info(ch_names, fs, ch_types)
raw = mne.io.RawArray(x, info)
sessions = {}
sessions["0"] = {}
sessions["0"]["0"] = raw
return sessions
def data_path(
self, subject, path=None, force_update=False, update_path=None, verbose=None
):
"""Download the data from one subject."""
if subject not in self.subject_list:
raise (ValueError("Invalid subject number"))
url = "{:s}subject_0{:d}.mat".format(ExampleDataset_URL, subject)
path = dl.data_dl(url, "ExampleDataset")
return [path] # it has to return a list
##############################################################################
# Using the ExampleDataset
# ------------------------
#
# Now that the `ExampleDataset` is defined, it could be instantiated directly.
# The rest of the code follows the steps described in the previous tutorials.
dataset = ExampleDataset()
paradigm = LeftRightImagery()
X, labels, meta = paradigm.get_data(dataset=dataset, subjects=[1])
evaluation = WithinSessionEvaluation(
paradigm=paradigm, datasets=dataset, overwrite=False, suffix="newdataset"
)
pipelines = {}
pipelines["MDM"] = make_pipeline(Covariances("oas"), MDM(metric="riemann"))
scores = evaluation.process(pipelines)
print(scores)
##############################################################################
# Pushing on MOABB Github
# -----------------------
#
# If you want to make your dataset available to everyone, you could upload
# your data on public server (like Zenodo or Figshare) and signal that you
# want to add your dataset to MOABB in the `dedicated issue <https://github.com/NeuroTechX/moabb/issues/1>`_. # noqa: E501
# You could then follow the instructions on `how to contribute <https://github.com/NeuroTechX/moabb/blob/master/CONTRIBUTING.md>`_ # noqa: E501