Skip to content

Commit

Permalink
Make identifier column user-configurable
Browse files Browse the repository at this point in the history
  • Loading branch information
qqndrew committed Feb 23, 2022
1 parent 06b71e9 commit c46eaff
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 5 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

<groupId>org.ohnlp.medtagger</groupId>
<artifactId>medtagger</artifactId>
<version>1.0.29</version>
<version>1.0.30</version>
<description>The MedTagger biomedical information extraction pipeline</description>


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import org.ohnlp.backbone.api.exceptions.ComponentInitializationException;
import org.ohnlp.medtagger.ae.AhoCorasickLookupAnnotator;
import org.ohnlp.medtagger.context.RuleContextAnnotator;
import org.ohnlp.medtagger.ie.ae.MedTaggerIEAnnotator;
import org.ohnlp.medtagger.type.ConceptMention;
import org.ohnlp.typesystem.type.textspan.Segment;
import org.ohnlp.typesystem.type.textspan.Sentence;
Expand All @@ -53,13 +52,15 @@ public class MedTaggerBackboneTransform extends Transform {
private String inputField;
private String resources;
private RunMode mode;
private String noteIdField;

@Override
public void initFromConfig(JsonNode config) throws ComponentInitializationException {
try {
this.inputField = config.get("input").asText();
this.mode = config.has("mode") ? RunMode.valueOf(config.get("mode").textValue().toUpperCase(Locale.ROOT)) : RunMode.STANDALONE;
this.resources = config.get("ruleset").asText();
this.noteIdField = config.has("identifier_col") ? config.get("identifier_col").asText() : "note_id";
} catch (Throwable t) {
throw new ComponentInitializationException(t);
}
Expand All @@ -68,23 +69,25 @@ public void initFromConfig(JsonNode config) throws ComponentInitializationExcept
@Override
public PCollection<Row> expand(PCollection<Row> input) {
return input.apply("MedTagger Concept Extraction",
ParDo.of(new MedTaggerPipelineFunction(this.inputField, this.resources, this.mode)));
ParDo.of(new MedTaggerPipelineFunction(this.inputField, this.resources, this.mode, this.noteIdField)));
}

private static class MedTaggerPipelineFunction extends DoFn<Row, Row> {
private final String resourceFolder;
private final String textField;
private final RunMode mode;
private final String noteIdField;

// UIMA components are not serializable, and thus must be initialized per-executor via the @Setup annotation
private transient AnalysisEngine aae;
private transient ResourceManager resMgr;
private transient CAS cas;

public MedTaggerPipelineFunction(String textField, String resourceFolder, RunMode mode) {
public MedTaggerPipelineFunction(String textField, String resourceFolder, RunMode mode, String noteIdField) {
this.textField = textField;
this.resourceFolder = resourceFolder;
this.mode = mode;
this.noteIdField = noteIdField;
}

@Setup
Expand Down Expand Up @@ -168,7 +171,7 @@ public void processElement(@Element Row input, OutputReceiver<Row> output) {
List<Schema.Field> fields = new LinkedList<>(input.getSchema().getFields());
fields.add(Schema.Field.of("nlp_output_json", Schema.FieldType.STRING));
Schema schema = Schema.of(fields.toArray(new Schema.Field[0]));
Object id = input.getBaseValue("note_id");
Object id = input.getBaseValue(this.noteIdField);
String text = input.getString(this.textField);
cas.reset();
cas.setDocumentText(text);
Expand Down

0 comments on commit c46eaff

Please sign in to comment.