diff --git a/plans/entities.plan b/plans/entities.plan index 8523ddb14c67d45b2edc40e07b71539e55ef52cd..b54e298c9be0c2cd2f9ec8e337322c542616a18a 100644 --- a/plans/entities.plan +++ b/plans/entities.plan @@ -131,12 +131,12 @@ </param--> <param name="taxid_microorganisms"> - <alias module="taxa.microorganisms.taxids" param="mappingFile"/> + <alias module="taxa" param="taxid_microorganisms"/> <alias module="microorganisms-after-strains.taxids" param="mappingFile"/> </param> <param name="taxa+id_full"> - <alias module="taxa.dict" param="dictFile"/> + <alias module="taxa" param="taxaDict"/> </param> @@ -236,8 +236,9 @@ <setFeatures/> </set-year> - <taxa file="plans/taxa.plan"/> - + <taxa file="plans/taxa.plan"> + <sectionFilter>@name == "title" or @name == "abstract" or @name == "text"</sectionFilter> + </taxa> <!-- Project stopwords --> <stopwordsprojector class="TabularProjector"> diff --git a/plans/map_microorganisms.plan b/plans/map_microorganisms.plan index 06d8a3c4c19b3b40370497547b51feae2cd9e95d..c4d294564f78b782289d1560ea24a0de6f93208f 100644 --- a/plans/map_microorganisms.plan +++ b/plans/map_microorganisms.plan @@ -8,19 +8,15 @@ <alias module="print-mapping" param="corpusFile"/> </param> - <param name="taxid_microorganisms"> + <param name="taxid_microorganisms"> <alias module="microorganisms-after-strains.taxids" param="mappingFile"/> - <alias module="taxa.microorganisms.taxids" param="mappingFile"/> + <alias module="taxa" param="taxid_microorganisms"/> </param> <param name="taxa+id_full"> - <alias module="taxa.dict" param="dictFile"/> + <alias module="taxa.dict" param="taxaDict"/> </param> - <param name="NCBI_taxa_ontobiotope"> - <alias module="taxa.dict" param="dictFile"/> - </param> - <read class="TextFileReader"> @@ -28,7 +24,7 @@ <linesLimit>1</linesLimit> </read> - <taxa file="plans/taxa_generic.plan"/> + <taxa file="plans/taxa.plan"/> <syntax file="plans/syntax.plan"/> diff --git a/plans/taxa.plan b/plans/taxa.plan index df5d51b3df34d67169a3dd693646582639e464de..a250a16daf1dbfc3268802f2108ad01825c6bf49 100644 --- a/plans/taxa.plan +++ b/plans/taxa.plan @@ -1,9 +1,23 @@ <alvisnlp-plan id="taxa"> + <param name="taxaDict"> + <alias module="dict" param="dictFile"/> + </param> + + <param name="compiledDict"> + <alias module="dict" param="trieSource"/> + </param> + + <param name="sectionFilter"> + <alias module="dict" param="sectionFilter"/> + </param> + + <param name="taxid_microorganisms"> + <alias module="microorganisms.taxids" param="mappingFile"/> + </param> <dict class="TabularProjector"> - <sectionFilter>@name == "title" or @name == "abstract" or @name == "text"</sectionFilter> <targetLayerName>taxa</targetLayerName> - <dictFile>ancillaries/extended-microorganisms-taxonomy/taxa+id_full.txt</dictFile> + <!--<dictFile>ancillaries/extended-microorganisms-taxonomy/taxa+id_full.txt</dictFile>--> <!--<trieSource>ancillaries/extended-microorganisms-taxonomy/taxa+id_full.trie</trieSource>--> <matchStartCaseInsensitive/> <valueFeatures>,taxid,canonical-name,path,pos,rank,species-taxid,species-name</valueFeatures> diff --git a/plans/taxa_generic.plan b/plans/taxa_generic.plan deleted file mode 100644 index c7b8d74c4df46fd58f2dcf47104289316b3cf390..0000000000000000000000000000000000000000 --- a/plans/taxa_generic.plan +++ /dev/null @@ -1,85 +0,0 @@ -<alvisnlp-plan id="taxa"> - <dict class="TabularProjector"> - <targetLayerName>taxa</targetLayerName> - <matchStartCaseInsensitive/> - <valueFeatures>,taxid,canonical-name,path,pos,rank,species-taxid,species-name</valueFeatures> - <constantAnnotationFeatures>source=NCBI</constantAnnotationFeatures> - </dict> - - <disambiguate> - <not-ambiguous class="Action"> - <target>documents.sections.layer:taxa[not span:taxa]</target> - <action>set:feat:not-ambiguous("yes")</action> - <setFeatures/> - </not-ambiguous> - - <disambiguate-coreferences class="Action"> - <target>documents.sections.layer:taxa[@not-ambiguous != "yes" and $ as x.(section.nav:sections-before.layer:taxa|before:taxa)[@not-ambiguous == "yes" and @taxid == x.@taxid]]</target> - <action>set:feat:not-ambiguous("yes")</action> - <setFeatures/> - </disambiguate-coreferences> - - <disambiguate-coreferences-more-specific class="Action"> - <target>documents.sections.layer:taxa[@not-ambiguous != "yes" and $ as x.(section.nav:sections-before.layer:taxa|before:taxa)[@not-ambiguous == "yes" and x.@path ^= @path]]</target> - <action>set:feat:not-ambiguous("yes")</action> - <setFeatures/> - </disambiguate-coreferences-more-specific> - - <disambiguate-coreference-more-general class="Action"> - <target>documents.sections.layer:taxa[@not-ambiguous != "yes" and $ as x.(section.nav:sections-before.layer:taxa|before:taxa)[@not-ambiguous == "yes" and @path ^= x.@path]]</target> - <action>set:feat:not-ambiguous("yes")</action> - <setFeatures/> - </disambiguate-coreference-more-general> - - <remove-alternative-to-not-ambiguous class="Action"> - <target>documents.sections.layer:taxa[@not-ambiguous == "yes"].span:taxa[@not-ambiguous != "yes"]</target> - <action>remove:taxa</action> - <removeFromLayer/> - </remove-alternative-to-not-ambiguous> - </disambiguate> - - <!-- - --> - - <!-- tag microorganisms --> - <microorganisms> - <taxids class="FileMapper"> - <target>documents.sections.layer:taxa</target> - <form>@taxid</form> - <targetFeatures>microorganism</targetFeatures> - </taxids> - - <layer class="Action"> - <target>documents.sections.layer:taxa[@microorganism]</target> - <action>add:microorganism</action> - <addToLayer/> - </layer> - - <overlaps class="RemoveOverlaps"> - <layerName>microorganism</layerName> - </overlaps> - </microorganisms> - - <!-- tag bacteria --> - <bacteria> - <tag class="Action"> - <target>documents.sections.layer:taxa[@microorganism and @path ^= "/ncbi:1/ncbi:131567/ncbi:2/"]</target> - <action>set:feat:bacteria("true")|add:bacteria</action> - <setFeatures/> - <addToLayer/> - </tag> - - <check class="Assert"> - <target>$</target> - <assertion>documents.sections.layer:bacteria</assertion> - <message>"no bacteria, maybe the Eubacteria path is wrong"</message> - <severe>false</severe> - </check> - - <overlaps class="RemoveOverlaps"> - <layerName>bacteria</layerName> - </overlaps> - </bacteria> - - <strains file="plans/strains-1.plan"/> -</alvisnlp-plan>