-
Notifications
You must be signed in to change notification settings - Fork 0
/
end2end_process.py
22 lines (19 loc) · 1.17 KB
/
end2end_process.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from preprocessing import SectionExtractor, SentenceTokenizer, ExtraneousSentenceRemover, ReportLabeler
from semantic_mapping import DateTimeMapper, SemanticMapper, StopWordRemover, NegexSmearer, ExtenderPreserver, ExtenderRemover
from sklearn.base import TransformerMixin
from sklearn.pipeline import Pipeline, make_pipeline
import itertools
import pickle
def read_replacements(replacement_file_path):
return pickle.load(open(replacement_file_path, 'rb'))
class EndToEndProcessor(TransformerMixin):
def __init__(self, replacement_paths, sections=["impression", "findings", "clinical_history"]):
replacements = [read_replacements(sm) for sm in replacement_paths]
replacements = list(itertools.chain.from_iterable(replacements))
ReplacementMapper = SemanticMapper(replacements)
self.pipeline = make_pipeline(SectionExtractor(sections=sections),
SentenceTokenizer(), ExtraneousSentenceRemover(), ReportLabeler(),
DateTimeMapper, ExtenderPreserver, ReplacementMapper,
StopWordRemover(), NegexSmearer(), ExtenderRemover, None)
def transform(self, reports, *_):
return self.pipeline.transform(reports)