Skip to content

Commit

Permalink
Fix genmcf test and enable stat-checks and local-resolution by default (
Browse files Browse the repository at this point in the history
  • Loading branch information
pradh authored Sep 24, 2021
1 parent 7529566 commit 7eea7f2
Show file tree
Hide file tree
Showing 15 changed files with 840 additions and 209 deletions.
14 changes: 7 additions & 7 deletions tool/src/main/java/org/datacommons/tool/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,26 +51,26 @@ class Main {
+ "loaded into memory. Defaults to true.")
public boolean doExistenceChecks;

// TODO: Default to LOCAL after some trials.
@CommandLine.Option(
names = {"-r", "--resolution"},
defaultValue = "NONE",
defaultValue = "LOCAL",
scope = CommandLine.ScopeType.INHERIT,
description =
"Specifies the mode of resolution to use: ${COMPLETION-CANDIDATES}. For no resolution,"
+ " set NONE. To lookup external IDs (like ISO) in DC, resolve local references "
+ "and generated DCIDs, set FULL. To just resolve local references and generate "
+ "DCIDs, set LOCAL. Note that FULL mode may be slower since it makes "
+ "(batched) DC Recon API calls and two passes over your CSV files. Default to NONE.")
+ "(batched) DC Recon API calls and two passes over your CSV files. Default to "
+ "LOCAL.")
public Args.ResolutionMode resolutionMode = Args.ResolutionMode.NONE;

// TODO: Default to true after some trials.
@CommandLine.Option(
names = {"-s", "--stat-checks"},
defaultValue = "false",
defaultValue = "true",
scope = CommandLine.ScopeType.INHERIT,
description =
"Checks integrity of time series by checking for holes, variance in values, etc.")
"Checks integrity of time series by checking for holes, variance in values, etc. "
+ "Defaults to true.")
public boolean doStatChecks;

@CommandLine.Option(
Expand All @@ -86,7 +86,7 @@ class Main {
names = {"-n", "--num-threads"},
defaultValue = "1",
scope = CommandLine.ScopeType.INHERIT,
description = "Number of concurrent threads used for processing CSVs.")
description = "Number of concurrent threads used for processing CSVs. Defaults to 1.")
public int numThreads;

public static void main(String... args) {
Expand Down
52 changes: 35 additions & 17 deletions tool/src/test/java/org/datacommons/tool/GenMcfTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import static java.nio.file.StandardCopyOption.REPLACE_EXISTING;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import com.google.common.truth.Expect;
import java.io.File;
Expand All @@ -25,6 +26,8 @@
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.commons.io.FilenameUtils;
import org.datacommons.util.TmcfCsvParser;
import org.junit.Rule;
import org.junit.Test;
Expand All @@ -42,6 +45,14 @@
public class GenMcfTest {
@Rule public TemporaryFolder testFolder = new TemporaryFolder();
@Rule public final Expect expect = Expect.create();
// To ensure we test the right number of files for every test, when you add a file, add the
// count here.
private static Map<String, Integer> EXPECTED_FILES_TO_CHECK =
Map.of(
"fataltmcf", 1,
"resolution", 4,
"statchecks", 2,
"successtmcf", 2);

@Test
public void GenMcfTest() throws IOException {
Expand All @@ -53,39 +64,43 @@ public void GenMcfTest() throws IOException {
CommandLine cmd = new CommandLine(app);
File[] testDirectories = new File(resourceFile("genmcf")).listFiles(File::isDirectory);
for (File directory : testDirectories) {
System.err.println("Processing " + directory.getName());
String testName = directory.getName();
System.err.println(testName + ": BEGIN");
assertTrue(EXPECTED_FILES_TO_CHECK.containsKey(testName));
List<String> argsList = new ArrayList<>();
argsList.add("genmcf");
File[] inputFiles = new File(Path.of(directory.getPath(), "input").toString()).listFiles();
List<String> expectedOutputFiles =
new ArrayList<>(
List.of("report.json", "instance_mcf_nodes.mcf", "failed_instance_mcf_nodes.mcf"));
for (File inputFile : inputFiles) {
argsList.add(inputFile.getPath());
String fName = inputFile.getName();
if (fName.endsWith(".csv") || fName.endsWith(".tsv")) {
expectedOutputFiles.add(
"table_mcf_nodes_" + FilenameUtils.removeExtension(fName) + ".mcf");
expectedOutputFiles.add(
"failed_table_mcf_nodes_" + FilenameUtils.removeExtension(fName) + ".mcf");
}
}
argsList.add("--resolution=FULL");
argsList.add("--stat-checks");
argsList.add(
"--output-dir=" + Paths.get(testFolder.getRoot().getPath(), directory.getName()));
argsList.add("--output-dir=" + Paths.get(testFolder.getRoot().getPath(), testName));
String[] args = argsList.toArray(new String[argsList.size()]);
cmd.execute(args);

List<String> files =
List.of(
"report.json",
"table_nodes.mcf",
"failed_table_nodes.mcf",
"nodes.mcf",
"failed_nodes.mcf");

Integer numChecked = 0;
if (goldenFilesPrefix != null && !goldenFilesPrefix.isEmpty()) {
for (var f : files) {
Path actual = TestUtil.getTestFilePath(testFolder, directory.getName(), f);
for (var f : expectedOutputFiles) {
Path actual = TestUtil.getTestFilePath(testFolder, testName, f);
if (!f.equals("report.json") && !new File(actual.toString()).exists()) continue;

Path golden = Path.of(goldenFilesPrefix, "genmcf", directory.getName(), "output", f);
Path golden = Path.of(goldenFilesPrefix, "genmcf", testName, "output", f);
Files.copy(actual, golden, REPLACE_EXISTING);
numChecked++;
}
} else {
for (var f : files) {
Path actual = TestUtil.getTestFilePath(testFolder, directory.getName(), f);
for (var f : expectedOutputFiles) {
Path actual = TestUtil.getTestFilePath(testFolder, testName, f);
if (!f.equals("report.json") && !new File(actual.toString()).exists()) continue;

Path expected = TestUtil.getOutputFilePath(directory.getPath(), f);
Expand All @@ -97,8 +112,11 @@ public void GenMcfTest() throws IOException {
org.datacommons.util.TestUtil.mcfFromFile(expected.toString()),
org.datacommons.util.TestUtil.mcfFromFile(actual.toString()));
}
numChecked++;
}
}
assertEquals(numChecked, EXPECTED_FILES_TO_CHECK.get(testName));
System.err.println(testName + ": PASSED");
}
}

Expand Down
5 changes: 3 additions & 2 deletions tool/src/test/java/org/datacommons/tool/LintTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,14 @@ public void LintTest() throws IOException {
CommandLine cmd = new CommandLine(app);
File[] testDirectories = new File(resourceFile("lint")).listFiles(File::isDirectory);
for (File directory : testDirectories) {
System.err.println("Processing " + directory.getName());
String testName = directory.getName();
System.err.println(testName + ": BEGIN");
List<String> argsList = new ArrayList<>();
argsList.add("lint");
File[] inputFiles = new File(Path.of(directory.getPath(), "input").toString()).listFiles();
for (File inputFile : inputFiles) {
argsList.add(inputFile.getPath());
}
argsList.add("--stat-checks");
argsList.add(
"--output-dir=" + Paths.get(testFolder.getRoot().getPath(), directory.getName()));
String[] args = argsList.toArray(new String[argsList.size()]);
Expand All @@ -64,6 +64,7 @@ public void LintTest() throws IOException {
TestUtil.readStringFromPath(expectedReportPath),
TestUtil.readStringFromPath(actualReportPath));
}
System.err.println(testName + ": PASSED");
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,24 +115,24 @@
"file": "FatalTmcf.tmcf",
"lineNumber": "19"
},
"userMessage": "No definition found for a referenced 'E:' value :: reference: 'E:SVTest->E13', property: 'dcid' node: 'E:SVTest->E3'",
"userMessage": "No definition found for a referenced 'E:' value :: reference: 'E:SVTest->E10', property: 'dcid' node: 'E:SVTest->E3'",
"counterKey": "Sanity_TmcfMissingEntityDef"
}, {
"level": "LEVEL_ERROR",
"location": {
"file": "FatalTmcf.tmcf",
"lineNumber": "19"
},
"userMessage": "No definition found for a referenced 'E:' value :: reference: 'E:SVTest->E10', property: 'dcid' node: 'E:SVTest->E3'",
"counterKey": "Sanity_TmcfMissingEntityDef"
"userMessage": "Column referred to in TMCF is missing from CSV header :: column: 'dcid1', node: 'E:SVTest->E3'",
"counterKey": "Sanity_TmcfMissingColumn"
}, {
"level": "LEVEL_ERROR",
"location": {
"file": "FatalTmcf.tmcf",
"lineNumber": "19"
},
"userMessage": "Column referred to in TMCF is missing from CSV header :: column: 'dcid1', node: 'E:SVTest->E3'",
"counterKey": "Sanity_TmcfMissingColumn"
"userMessage": "No definition found for a referenced 'E:' value :: reference: 'E:SVTest->E13', property: 'dcid' node: 'E:SVTest->E3'",
"counterKey": "Sanity_TmcfMissingEntityDef"
}, {
"level": "LEVEL_ERROR",
"location": {
Expand Down
Loading

0 comments on commit 7eea7f2

Please sign in to comment.