diff --git a/pom.xml b/pom.xml index b89d66f2..4e2995ea 100644 --- a/pom.xml +++ b/pom.xml @@ -504,7 +504,7 @@ 3.8.5 - [11.0.11,18.0) + [17.0.0,22.0) diff --git a/topicgenerator/pom.xml b/topicgenerator/pom.xml index df05d6ee..ae6ca26e 100644 --- a/topicgenerator/pom.xml +++ b/topicgenerator/pom.xml @@ -114,6 +114,18 @@ jtokkit 1.0.0 + + org.apache.commons + commons-lang3 + + + com.google.guava + guava + + + commons-io + commons-io + com.theokanning.openai-gpt3-java diff --git a/topicgenerator/src/main/java/io/dockstore/topicgenerator/client/cli/TopicGeneratorClient.java b/topicgenerator/src/main/java/io/dockstore/topicgenerator/client/cli/TopicGeneratorClient.java index 012e16c0..40e87ae8 100644 --- a/topicgenerator/src/main/java/io/dockstore/topicgenerator/client/cli/TopicGeneratorClient.java +++ b/topicgenerator/src/main/java/io/dockstore/topicgenerator/client/cli/TopicGeneratorClient.java @@ -9,6 +9,7 @@ import com.beust.jcommander.JCommander; import com.beust.jcommander.MissingCommandException; import com.beust.jcommander.ParameterException; +import com.google.common.collect.Lists; import com.knuddels.jtokkit.Encodings; import com.knuddels.jtokkit.api.Encoding; import com.knuddels.jtokkit.api.EncodingRegistry; @@ -32,7 +33,9 @@ import io.dockstore.topicgenerator.client.cli.TopicGeneratorCommandLineArgs.GenerateTopicsCommand; import io.dockstore.topicgenerator.client.cli.TopicGeneratorCommandLineArgs.GenerateTopicsCommand.OutputCsvHeaders; import io.dockstore.topicgenerator.client.cli.TopicGeneratorCommandLineArgs.UploadTopicsCommand; +import io.dockstore.topicgenerator.helper.ChuckNorrisFilter; import io.dockstore.topicgenerator.helper.OpenAIHelper; +import io.dockstore.topicgenerator.helper.StringFilter; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; @@ -59,6 +62,7 @@ public class TopicGeneratorClient { private static final int MAX_CONTEXT_LENGTH = 16385; private static final EncodingRegistry REGISTRY = Encodings.newDefaultEncodingRegistry(); private static final Encoding ENCODING = REGISTRY.getEncodingForModel(AI_MODEL); + private final List stringFilters = Lists.newArrayList(new ChuckNorrisFilter("en"), new ChuckNorrisFilter("fr-CA-u-sd-caqc")); TopicGeneratorClient() { } @@ -229,6 +233,11 @@ private void uploadTopics(TopicGeneratorConfig topicGeneratorConfig, String inpu // This command's input CSV headers are the generate-topic command's output headers final String trsId = entryWithAITopic.get(GenerateTopicsCommand.OutputCsvHeaders.trsId); final String aiTopic = entryWithAITopic.get(GenerateTopicsCommand.OutputCsvHeaders.aiTopic); + boolean caughtByFilter = assessTopic(aiTopic); + if (caughtByFilter) { + LOG.info("Topic for {} was deemed offensive, please review above", trsId); + continue; + } final String version = entryWithAITopic.get(OutputCsvHeaders.version); try { extendedGa4GhApi.updateAITopic(new UpdateAITopicRequest().aiTopic(aiTopic), version, trsId); @@ -239,6 +248,16 @@ private void uploadTopics(TopicGeneratorConfig topicGeneratorConfig, String inpu } } + private boolean assessTopic(String aiTopic) { + for (StringFilter filter : this.stringFilters) { + if (filter.assessTopic(aiTopic)) { + LOG.info(filter.getClass() + " blocked a topic sentence, please review: " + aiTopic); + return true; + } + } + return false; + } + private Iterable readCsvFile(String inputCsvFilePath, Class> csvHeaders) { // Read CSV file Iterable csvRecords = null; diff --git a/topicgenerator/src/test/java/io/dockstore/topicgenerator/client/cli/TopicGeneratorClientIT.java b/topicgenerator/src/test/java/io/dockstore/topicgenerator/client/cli/TopicGeneratorClientIT.java index 725ce8f8..02843970 100644 --- a/topicgenerator/src/test/java/io/dockstore/topicgenerator/client/cli/TopicGeneratorClientIT.java +++ b/topicgenerator/src/test/java/io/dockstore/topicgenerator/client/cli/TopicGeneratorClientIT.java @@ -66,4 +66,19 @@ void testUploadAITopics() { workflow = workflowsApi.getPublishedWorkflow(32L, null); assertNotNull(workflow.getTopicAI()); } + + @Test + void testCensoredAITopics() { + final ApiClient apiClient = CommonTestUtilities.getOpenAPIWebClient(true, ADMIN_USERNAME, testingPostgres); + final WorkflowsApi workflowsApi = new WorkflowsApi(apiClient); + + Workflow workflow = workflowsApi.getPublishedWorkflow(32L, null); + assertNull(workflow.getTopicAI()); + // This file is modelled after the output file from the "generate-topics" command. It contains 1 row + String aiTopicsFilePath = ResourceHelpers.resourceFilePath("offensive-generated-ai-topics.csv"); + + TopicGeneratorClient.main(new String[] {"--config", CONFIG_FILE_PATH, "upload-topics", "--aiTopics", aiTopicsFilePath}); + workflow = workflowsApi.getPublishedWorkflow(32L, null); + assertNull(workflow.getTopicAI()); + } }