Skip to content

Commit

Permalink
Update regex segment annotator
Browse files Browse the repository at this point in the history
  • Loading branch information
mccullen committed Aug 27, 2021
1 parent 1e610fc commit f8d7930
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 18 deletions.
Original file line number Diff line number Diff line change
@@ -1,24 +1,19 @@
package icapa.ae;

import javafx.geometry.Pos;
import javafx.util.Pair;
import org.apache.ctakes.typesystem.type.textsem.DiseaseDisorderMention;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;

import java.lang.annotation.Annotation;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class TagSectionAnnotator extends JCasAnnotator_ImplBase {
public class RegexSegmentAnnotator extends JCasAnnotator_ImplBase {
public static final String PARAM_START_REGEX = "StartRegex";
@ConfigurationParameter(
name = PARAM_START_REGEX,
Expand All @@ -33,12 +28,21 @@ public class TagSectionAnnotator extends JCasAnnotator_ImplBase {
)
private String _endRegex;

public static final String PARAM_HEADER = "Header";
public static final String PARAM_SEGMENT_ID = "SegmentId";
@ConfigurationParameter(
name = PARAM_HEADER,
description = "Header"
name = PARAM_SEGMENT_ID,
description = "Segment id"
)
private String _header;
private String _segmentId;

public static final String PARAM_PREFERRED_TEXT = "PreferredText";
@ConfigurationParameter(
name = PARAM_PREFERRED_TEXT,
description = "Tag name",
mandatory = false,
defaultValue = ""
)
private String _preferredText;

private Pattern _startPattern;
private Pattern _endPattern;
Expand All @@ -48,6 +52,8 @@ public void initialize(UimaContext context) throws ResourceInitializationExcepti
super.initialize(context);
_startPattern = Pattern.compile(_startRegex);
_endPattern = Pattern.compile(_endRegex);
// If preferred text is not set, default to segment id
_preferredText = _preferredText.equals("") ? _segmentId : _preferredText;
}

// Utility class to mark the start/end of the section tag
Expand Down Expand Up @@ -103,8 +109,10 @@ public int compare(Position o1, Position o2) {

// Add the segment to the cas
Segment segment = new Segment(jCas);
segment.setId(_header);
segment.setPreferredText(_header);
segment.setId(_segmentId);
segment.setPreferredText(_preferredText);
segment.setBegin(startMatcher.end());
segment.setEnd(endMatcher.start());
segment.addToIndexes();
}
}
Expand Down Expand Up @@ -138,7 +146,7 @@ public int compare(Position o1, Position o2) {
if (index < tags.size() && index >= 0) {
Position tag = tags.get(index);
if (ia.getBegin() >= tag._start && ia.getEnd() <= tag._end) {
ia.setSegmentID(_header);
ia.setSegmentID(_segmentId);
}
}
});
Expand Down
10 changes: 5 additions & 5 deletions src/test/java/TagSectionAnnotatorTests.java
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import icapa.ae.TagSectionAnnotator;
import icapa.ae.RegexSegmentAnnotator;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
Expand Down Expand Up @@ -46,10 +46,10 @@ public void testSectionizer() throws Exception {
a.addToIndexes();

AnalysisEngine ae = AnalysisEngineFactory.createEngine(
TagSectionAnnotator.class,
TagSectionAnnotator.PARAM_HEADER, "Test",
TagSectionAnnotator.PARAM_START_REGEX, "XXX",
TagSectionAnnotator.PARAM_END_REGEX, "YYY");
RegexSegmentAnnotator.class,
RegexSegmentAnnotator.PARAM_PREFERRED_TEXT, "Test",
RegexSegmentAnnotator.PARAM_START_REGEX, "XXX",
RegexSegmentAnnotator.PARAM_END_REGEX, "YYY");

SimplePipeline.runPipeline(jCas, ae);

Expand Down

0 comments on commit f8d7930

Please sign in to comment.