diff --git a/pom.xml b/pom.xml
index b89d66f2..4e2995ea 100644
--- a/pom.xml
+++ b/pom.xml
@@ -504,7 +504,7 @@
3.8.5
- [11.0.11,18.0)
+ [17.0.0,22.0)
diff --git a/topicgenerator/pom.xml b/topicgenerator/pom.xml
index df05d6ee..ae6ca26e 100644
--- a/topicgenerator/pom.xml
+++ b/topicgenerator/pom.xml
@@ -114,6 +114,18 @@
jtokkit
1.0.0
+
+ org.apache.commons
+ commons-lang3
+
+
+ com.google.guava
+ guava
+
+
+ commons-io
+ commons-io
+
com.theokanning.openai-gpt3-java
diff --git a/topicgenerator/src/main/java/io/dockstore/topicgenerator/client/cli/TopicGeneratorClient.java b/topicgenerator/src/main/java/io/dockstore/topicgenerator/client/cli/TopicGeneratorClient.java
index 012e16c0..40e87ae8 100644
--- a/topicgenerator/src/main/java/io/dockstore/topicgenerator/client/cli/TopicGeneratorClient.java
+++ b/topicgenerator/src/main/java/io/dockstore/topicgenerator/client/cli/TopicGeneratorClient.java
@@ -9,6 +9,7 @@
import com.beust.jcommander.JCommander;
import com.beust.jcommander.MissingCommandException;
import com.beust.jcommander.ParameterException;
+import com.google.common.collect.Lists;
import com.knuddels.jtokkit.Encodings;
import com.knuddels.jtokkit.api.Encoding;
import com.knuddels.jtokkit.api.EncodingRegistry;
@@ -32,7 +33,9 @@
import io.dockstore.topicgenerator.client.cli.TopicGeneratorCommandLineArgs.GenerateTopicsCommand;
import io.dockstore.topicgenerator.client.cli.TopicGeneratorCommandLineArgs.GenerateTopicsCommand.OutputCsvHeaders;
import io.dockstore.topicgenerator.client.cli.TopicGeneratorCommandLineArgs.UploadTopicsCommand;
+import io.dockstore.topicgenerator.helper.ChuckNorrisFilter;
import io.dockstore.topicgenerator.helper.OpenAIHelper;
+import io.dockstore.topicgenerator.helper.StringFilter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
@@ -59,6 +62,7 @@ public class TopicGeneratorClient {
private static final int MAX_CONTEXT_LENGTH = 16385;
private static final EncodingRegistry REGISTRY = Encodings.newDefaultEncodingRegistry();
private static final Encoding ENCODING = REGISTRY.getEncodingForModel(AI_MODEL);
+ private final List stringFilters = Lists.newArrayList(new ChuckNorrisFilter("en"), new ChuckNorrisFilter("fr-CA-u-sd-caqc"));
TopicGeneratorClient() {
}
@@ -229,6 +233,11 @@ private void uploadTopics(TopicGeneratorConfig topicGeneratorConfig, String inpu
// This command's input CSV headers are the generate-topic command's output headers
final String trsId = entryWithAITopic.get(GenerateTopicsCommand.OutputCsvHeaders.trsId);
final String aiTopic = entryWithAITopic.get(GenerateTopicsCommand.OutputCsvHeaders.aiTopic);
+ boolean caughtByFilter = assessTopic(aiTopic);
+ if (caughtByFilter) {
+ LOG.info("Topic for {} was deemed offensive, please review above", trsId);
+ continue;
+ }
final String version = entryWithAITopic.get(OutputCsvHeaders.version);
try {
extendedGa4GhApi.updateAITopic(new UpdateAITopicRequest().aiTopic(aiTopic), version, trsId);
@@ -239,6 +248,16 @@ private void uploadTopics(TopicGeneratorConfig topicGeneratorConfig, String inpu
}
}
+ private boolean assessTopic(String aiTopic) {
+ for (StringFilter filter : this.stringFilters) {
+ if (filter.assessTopic(aiTopic)) {
+ LOG.info(filter.getClass() + " blocked a topic sentence, please review: " + aiTopic);
+ return true;
+ }
+ }
+ return false;
+ }
+
private Iterable readCsvFile(String inputCsvFilePath, Class extends Enum>> csvHeaders) {
// Read CSV file
Iterable csvRecords = null;
diff --git a/topicgenerator/src/test/java/io/dockstore/topicgenerator/client/cli/TopicGeneratorClientIT.java b/topicgenerator/src/test/java/io/dockstore/topicgenerator/client/cli/TopicGeneratorClientIT.java
index 725ce8f8..02843970 100644
--- a/topicgenerator/src/test/java/io/dockstore/topicgenerator/client/cli/TopicGeneratorClientIT.java
+++ b/topicgenerator/src/test/java/io/dockstore/topicgenerator/client/cli/TopicGeneratorClientIT.java
@@ -66,4 +66,19 @@ void testUploadAITopics() {
workflow = workflowsApi.getPublishedWorkflow(32L, null);
assertNotNull(workflow.getTopicAI());
}
+
+ @Test
+ void testCensoredAITopics() {
+ final ApiClient apiClient = CommonTestUtilities.getOpenAPIWebClient(true, ADMIN_USERNAME, testingPostgres);
+ final WorkflowsApi workflowsApi = new WorkflowsApi(apiClient);
+
+ Workflow workflow = workflowsApi.getPublishedWorkflow(32L, null);
+ assertNull(workflow.getTopicAI());
+ // This file is modelled after the output file from the "generate-topics" command. It contains 1 row
+ String aiTopicsFilePath = ResourceHelpers.resourceFilePath("offensive-generated-ai-topics.csv");
+
+ TopicGeneratorClient.main(new String[] {"--config", CONFIG_FILE_PATH, "upload-topics", "--aiTopics", aiTopicsFilePath});
+ workflow = workflowsApi.getPublishedWorkflow(32L, null);
+ assertNull(workflow.getTopicAI());
+ }
}