Skip to content

Commit

Permalink
add a small simpler filter set
Browse files Browse the repository at this point in the history
  • Loading branch information
denis-yuen committed Aug 7, 2024
1 parent da37df9 commit 7836907
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,7 @@
<version>3.8.5</version>
</requireMavenVersion>
<requireJavaVersion>
<version>[11.0.11,18.0)</version>
<version>[17.0.0,22.0)</version>
</requireJavaVersion>
<banDuplicatePomDependencyVersions />
<bannedDependencies>
Expand Down
12 changes: 12 additions & 0 deletions topicgenerator/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,18 @@
<artifactId>jtokkit</artifactId>
<version>1.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
<dependency>
<!-- https://github.com/TheoKanning/openai-java -->
<groupId>com.theokanning.openai-gpt3-java</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import com.beust.jcommander.JCommander;
import com.beust.jcommander.MissingCommandException;
import com.beust.jcommander.ParameterException;
import com.google.common.collect.Lists;
import com.knuddels.jtokkit.Encodings;
import com.knuddels.jtokkit.api.Encoding;
import com.knuddels.jtokkit.api.EncodingRegistry;
Expand All @@ -32,7 +33,9 @@
import io.dockstore.topicgenerator.client.cli.TopicGeneratorCommandLineArgs.GenerateTopicsCommand;
import io.dockstore.topicgenerator.client.cli.TopicGeneratorCommandLineArgs.GenerateTopicsCommand.OutputCsvHeaders;
import io.dockstore.topicgenerator.client.cli.TopicGeneratorCommandLineArgs.UploadTopicsCommand;
import io.dockstore.topicgenerator.helper.ChuckNorrisFilter;
import io.dockstore.topicgenerator.helper.OpenAIHelper;
import io.dockstore.topicgenerator.helper.StringFilter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
Expand All @@ -59,6 +62,7 @@ public class TopicGeneratorClient {
private static final int MAX_CONTEXT_LENGTH = 16385;
private static final EncodingRegistry REGISTRY = Encodings.newDefaultEncodingRegistry();
private static final Encoding ENCODING = REGISTRY.getEncodingForModel(AI_MODEL);
private final List<StringFilter> stringFilters = Lists.newArrayList(new ChuckNorrisFilter("en"), new ChuckNorrisFilter("fr-CA-u-sd-caqc"));

TopicGeneratorClient() {
}
Expand Down Expand Up @@ -229,6 +233,11 @@ private void uploadTopics(TopicGeneratorConfig topicGeneratorConfig, String inpu
// This command's input CSV headers are the generate-topic command's output headers
final String trsId = entryWithAITopic.get(GenerateTopicsCommand.OutputCsvHeaders.trsId);
final String aiTopic = entryWithAITopic.get(GenerateTopicsCommand.OutputCsvHeaders.aiTopic);
boolean caughtByFilter = assessTopic(aiTopic);
if (caughtByFilter) {
LOG.info("Topic for {} was deemed offensive, please review above", trsId);
continue;
}
final String version = entryWithAITopic.get(OutputCsvHeaders.version);
try {
extendedGa4GhApi.updateAITopic(new UpdateAITopicRequest().aiTopic(aiTopic), version, trsId);
Expand All @@ -239,6 +248,16 @@ private void uploadTopics(TopicGeneratorConfig topicGeneratorConfig, String inpu
}
}

private boolean assessTopic(String aiTopic) {
for (StringFilter filter : this.stringFilters) {
if (filter.assessTopic(aiTopic)) {
LOG.info(filter.getClass() + " blocked a topic sentence, please review: " + aiTopic);
return true;
}
}
return false;
}

private Iterable<CSVRecord> readCsvFile(String inputCsvFilePath, Class<? extends Enum<?>> csvHeaders) {
// Read CSV file
Iterable<CSVRecord> csvRecords = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,19 @@ void testUploadAITopics() {
workflow = workflowsApi.getPublishedWorkflow(32L, null);
assertNotNull(workflow.getTopicAI());
}

@Test
void testCensoredAITopics() {
final ApiClient apiClient = CommonTestUtilities.getOpenAPIWebClient(true, ADMIN_USERNAME, testingPostgres);
final WorkflowsApi workflowsApi = new WorkflowsApi(apiClient);

Workflow workflow = workflowsApi.getPublishedWorkflow(32L, null);
assertNull(workflow.getTopicAI());
// This file is modelled after the output file from the "generate-topics" command. It contains 1 row
String aiTopicsFilePath = ResourceHelpers.resourceFilePath("offensive-generated-ai-topics.csv");

TopicGeneratorClient.main(new String[] {"--config", CONFIG_FILE_PATH, "upload-topics", "--aiTopics", aiTopicsFilePath});
workflow = workflowsApi.getPublishedWorkflow(32L, null);
assertNull(workflow.getTopicAI());
}
}

0 comments on commit 7836907

Please sign in to comment.