-
Notifications
You must be signed in to change notification settings - Fork 0
/
run.R
50 lines (42 loc) · 1.92 KB
/
run.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# Check if all required packages are installed. If not, install them.
source("src/pkg.R")
pkgLoad()
# Load parameters for the workflow
library(ini)
config = read.ini("config.ini")
# Import the person name strings to match and parse the names for later testing
source("src/extract_strings.R")
## Keep the data in memory to connect the matched strings back to specimens
## after matching
data = extract_strings(path = config$source$data, # data file location
columns_list = config$source$columns, # properties to import
property = config$source$property, # property with the names
data_type = config$source$data_type) # type of data file
## Parse the name strings into first, last name, initials and
## try to interpret different syntaxes and teams using the dwc_agent ruby gem
parsed_names = parse_strings(data,
config$source$property)
# Import geonames data
source("src/import_geonames.R")
geonames = import_geonames(config$source$wikifile)
source("src/matching.R")
## Determine the set of cores that can be used on this machine for
## parallel computing
cores = assess_cores(config$matching$cores)
matching_results = match_wrapper(parsed_names,
geonames,
cores,
config$matching$rmode,
config$matching$parallel_cut,
config$source$data)
## Filter the matches by a set of rules
## Also convert to a tibble for easier exporting of results
processed_results = matches_process(matching_results,
parsed_names)
# Export the matched names into the specified export format
source("src/export.R")
processed_results %>%
export(data = data,
property = config$source$property,
foldername = config$source$data,
export_type = config$export)