diff --git a/.gitignore b/.gitignore
index 74a7b99b5c3ded20292a5509436601ca3ee36790..cf2817aecc6ae86c9d0f7f2cab2e8f9d5356996f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,13 +3,19 @@ corpora/cirm/mapped_habitats.txt
 corpora/cirm/mapped_taxids.txt
 corpora/cirm/mapped_yeast_habitats.txt
 corpora/cirm/mapped_yeast_taxa.txt
+corpora/cirm/mapped_cfbp_habitats.txt
+corpora/cirm/mapped_cfbp_taxa.txt
 corpora/cirm/taxa.txt
 corpora/cirm/test-yeast.txt
 corpora/cirm/test.txt
 corpora/cirm/yeast_habitats.txt
 corpora/cirm/yeast_taxa.txt
+corpora/cirm/cfbp_habitats.txt
+corpora/cirm/cfbp_taxa.txt
 corpora/cirm/Levures_2021/Florilege_21012021.tsv
 corpora/cirm/BIA_2021/florilege_export_final_17_02_21.tsv
+corpora/cirm/CFBP_2020/CFPB_22_sept_2020_Pathotype.tsv
+corpora/cirm/CFBP_2020/CFPB_22_sept_2020_Type.tsv
 corpora/dsmz/habitats.txt
 corpora/dsmz/mapped_habitats.txt
 corpora/dsmz/mapped_taxids.txt
@@ -69,3 +75,4 @@ yatea-train/
 softwares/alvisir-install/
 softwares/*.sif
 softwares/obo-utils
+log/
diff --git a/corpora/cirm/CFBP_2020/CFPB_22_sept_2020_Pathotype.xlsx b/corpora/cirm/CFBP_2020/CFPB_22_sept_2020_Pathotype.xlsx
new file mode 100755
index 0000000000000000000000000000000000000000..faec4fdd0c3d8602a81c51958085387f25bd4f0f
Binary files /dev/null and b/corpora/cirm/CFBP_2020/CFPB_22_sept_2020_Pathotype.xlsx differ
diff --git a/corpora/cirm/CFBP_2020/CFPB_22_sept_2020_Type.xlsx b/corpora/cirm/CFBP_2020/CFPB_22_sept_2020_Type.xlsx
new file mode 100755
index 0000000000000000000000000000000000000000..f4b1962bd3283627180014102f4c31e1cdd4470f
Binary files /dev/null and b/corpora/cirm/CFBP_2020/CFPB_22_sept_2020_Type.xlsx differ
diff --git a/docs/3-pipeline.svg b/docs/3-pipeline.svg
index 08a1334f7c77de730d559465ae1228b65b8e0092..2a89b15f2cfd166ddf57745d593c2a71bea7195a 100644
--- a/docs/3-pipeline.svg
+++ b/docs/3-pipeline.svg
@@ -4,115 +4,165 @@
 <!-- Generated by graphviz version 2.38.0 (20140413.2041)
  -->
 <!-- Title: snakemake_dag Pages: 1 -->
-<svg width="647pt" height="260pt"
- viewBox="0.00 0.00 647.00 260.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="1109pt" height="260pt"
+ viewBox="0.00 0.00 1109.18 260.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 256)">
 <title>snakemake_dag</title>
-<polygon fill="white" stroke="none" points="-4,4 -4,-256 643,-256 643,4 -4,4"/>
+<polygon fill="white" stroke="none" points="-4,4 -4,-256 1105.18,-256 1105.18,4 -4,4"/>
 <!-- 0 -->
 <g id="node1" class="node"><title>0</title>
-<path fill="none" stroke="#56d86b" stroke-width="2" d="M319.5,-36C319.5,-36 289.5,-36 289.5,-36 283.5,-36 277.5,-30 277.5,-24 277.5,-24 277.5,-12 277.5,-12 277.5,-6 283.5,-0 289.5,-0 289.5,-0 319.5,-0 319.5,-0 325.5,-0 331.5,-6 331.5,-12 331.5,-12 331.5,-24 331.5,-24 331.5,-30 325.5,-36 319.5,-36"/>
-<text text-anchor="middle" x="304.5" y="-15.5" font-family="sans" font-size="10.00">all</text>
+<path fill="none" stroke="#68d856" stroke-width="2" d="M475.684,-36C475.684,-36 445.684,-36 445.684,-36 439.684,-36 433.684,-30 433.684,-24 433.684,-24 433.684,-12 433.684,-12 433.684,-6 439.684,-0 445.684,-0 445.684,-0 475.684,-0 475.684,-0 481.684,-0 487.684,-6 487.684,-12 487.684,-12 487.684,-24 487.684,-24 487.684,-30 481.684,-36 475.684,-36"/>
+<text text-anchor="middle" x="460.684" y="-15.5" font-family="sans" font-size="10.00">all</text>
 </g>
 <!-- 1 -->
 <g id="node2" class="node"><title>1</title>
-<path fill="none" stroke="#70d856" stroke-width="2" d="M268,-108C268,-108 177,-108 177,-108 171,-108 165,-102 165,-96 165,-96 165,-84 165,-84 165,-78 171,-72 177,-72 177,-72 268,-72 268,-72 274,-72 280,-78 280,-84 280,-84 280,-96 280,-96 280,-102 274,-108 268,-108"/>
-<text text-anchor="middle" x="222.5" y="-87.5" font-family="sans" font-size="10.00">format_cirm_results</text>
+<path fill="none" stroke="#8fd856" stroke-width="2" d="M225.184,-108C225.184,-108 134.184,-108 134.184,-108 128.184,-108 122.184,-102 122.184,-96 122.184,-96 122.184,-84 122.184,-84 122.184,-78 128.184,-72 134.184,-72 134.184,-72 225.184,-72 225.184,-72 231.184,-72 237.184,-78 237.184,-84 237.184,-84 237.184,-96 237.184,-96 237.184,-102 231.184,-108 225.184,-108"/>
+<text text-anchor="middle" x="179.684" y="-87.5" font-family="sans" font-size="10.00">format_cirm_results</text>
 </g>
 <!-- 1&#45;&gt;0 -->
 <g id="edge1" class="edge"><title>1&#45;&gt;0</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M242.77,-71.6966C252.998,-62.9655 265.561,-52.2405 276.678,-42.7503"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="279.13,-45.259 284.463,-36.1043 274.585,-39.935 279.13,-45.259"/>
+<path fill="none" stroke="grey" stroke-width="2" d="M237.227,-74.6655C293.024,-60.7659 376.017,-40.0915 423.586,-28.2415"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="424.603,-31.5952 433.46,-25.7817 422.911,-24.8028 424.603,-31.5952"/>
 </g>
 <!-- 2 -->
 <g id="node3" class="node"><title>2</title>
-<path fill="none" stroke="#d88556" stroke-width="2" d="M448,-108C448,-108 325,-108 325,-108 319,-108 313,-102 313,-96 313,-96 313,-84 313,-84 313,-78 319,-72 325,-72 325,-72 448,-72 448,-72 454,-72 460,-78 460,-84 460,-84 460,-96 460,-96 460,-102 454,-108 448,-108"/>
-<text text-anchor="middle" x="386.5" y="-87.5" font-family="sans" font-size="10.00">format_cirm_yeast_results</text>
+<path fill="none" stroke="#d85656" stroke-width="2" d="M522.184,-108C522.184,-108 399.184,-108 399.184,-108 393.184,-108 387.184,-102 387.184,-96 387.184,-96 387.184,-84 387.184,-84 387.184,-78 393.184,-72 399.184,-72 399.184,-72 522.184,-72 522.184,-72 528.184,-72 534.184,-78 534.184,-84 534.184,-84 534.184,-96 534.184,-96 534.184,-102 528.184,-108 522.184,-108"/>
+<text text-anchor="middle" x="460.684" y="-87.5" font-family="sans" font-size="10.00">format_cirm_yeast_results</text>
 </g>
 <!-- 2&#45;&gt;0 -->
 <g id="edge2" class="edge"><title>2&#45;&gt;0</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M366.23,-71.6966C356.002,-62.9655 343.439,-52.2405 332.322,-42.7503"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="334.415,-39.935 324.537,-36.1043 329.87,-45.259 334.415,-39.935"/>
+<path fill="none" stroke="grey" stroke-width="2" d="M460.684,-71.6966C460.684,-63.9827 460.684,-54.7125 460.684,-46.1124"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="464.184,-46.1043 460.684,-36.1043 457.184,-46.1044 464.184,-46.1043"/>
 </g>
 <!-- 3 -->
 <g id="node4" class="node"><title>3</title>
-<path fill="none" stroke="#56b1d8" stroke-width="2" d="M137,-180C137,-180 12,-180 12,-180 6,-180 1.42109e-14,-174 1.42109e-14,-168 1.42109e-14,-168 1.42109e-14,-156 1.42109e-14,-156 1.42109e-14,-150 6,-144 12,-144 12,-144 137,-144 137,-144 143,-144 149,-150 149,-156 149,-156 149,-168 149,-168 149,-174 143,-180 137,-180"/>
-<text text-anchor="middle" x="74.5" y="-159.5" font-family="sans" font-size="10.00">map_cirm_microorganisms</text>
+<path fill="none" stroke="#56a9d8" stroke-width="2" d="M858.184,-108C858.184,-108 741.184,-108 741.184,-108 735.184,-108 729.184,-102 729.184,-96 729.184,-96 729.184,-84 729.184,-84 729.184,-78 735.184,-72 741.184,-72 741.184,-72 858.184,-72 858.184,-72 864.184,-72 870.184,-78 870.184,-84 870.184,-84 870.184,-96 870.184,-96 870.184,-102 864.184,-108 858.184,-108"/>
+<text text-anchor="middle" x="799.684" y="-87.5" font-family="sans" font-size="10.00">format_cirm_cfbp_results</text>
 </g>
-<!-- 3&#45;&gt;1 -->
-<g id="edge3" class="edge"><title>3&#45;&gt;1</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M110.705,-143.876C130.758,-134.392 155.872,-122.513 177.214,-112.419"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="178.869,-115.508 186.413,-108.068 175.876,-109.18 178.869,-115.508"/>
+<!-- 3&#45;&gt;0 -->
+<g id="edge3" class="edge"><title>3&#45;&gt;0</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M729.031,-74.4109C658.665,-59.881 553.362,-38.1371 497.91,-26.6867"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="498.333,-23.2003 487.832,-24.6057 496.917,-30.0557 498.333,-23.2003"/>
 </g>
 <!-- 4 -->
 <g id="node5" class="node"><title>4</title>
-<path fill="none" stroke="#56d8c9" stroke-width="2" d="M265.5,-180C265.5,-180 179.5,-180 179.5,-180 173.5,-180 167.5,-174 167.5,-168 167.5,-168 167.5,-156 167.5,-156 167.5,-150 173.5,-144 179.5,-144 179.5,-144 265.5,-144 265.5,-144 271.5,-144 277.5,-150 277.5,-156 277.5,-156 277.5,-168 277.5,-168 277.5,-174 271.5,-180 265.5,-180"/>
-<text text-anchor="middle" x="222.5" y="-159.5" font-family="sans" font-size="10.00">map_cirm_habitats</text>
+<path fill="none" stroke="#56d892" stroke-width="2" d="M164.684,-252C164.684,-252 36.6842,-252 36.6842,-252 30.6842,-252 24.6842,-246 24.6842,-240 24.6842,-240 24.6842,-228 24.6842,-228 24.6842,-222 30.6842,-216 36.6842,-216 36.6842,-216 164.684,-216 164.684,-216 170.684,-216 176.684,-222 176.684,-228 176.684,-228 176.684,-240 176.684,-240 176.684,-246 170.684,-252 164.684,-252"/>
+<text text-anchor="middle" x="100.684" y="-231.5" font-family="sans" font-size="10.00">get_cirm_bia_taxa_habitats</text>
 </g>
 <!-- 4&#45;&gt;1 -->
 <g id="edge4" class="edge"><title>4&#45;&gt;1</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M222.5,-143.697C222.5,-135.983 222.5,-126.712 222.5,-118.112"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="226,-118.104 222.5,-108.104 219,-118.104 226,-118.104"/>
+<path fill="none" stroke="grey" stroke-width="2" d="M49.0357,-215.965C32.5446,-207.803 16.1342,-196.177 6.68419,-180 -1.38637,-166.185 -2.82928,-156.864 6.68419,-144 30.6711,-111.564 74.1296,-98.3336 111.487,-93.2024"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="112.328,-96.6266 121.84,-91.9606 111.494,-89.6764 112.328,-96.6266"/>
 </g>
 <!-- 5 -->
 <g id="node6" class="node"><title>5</title>
-<path fill="none" stroke="#56d89a" stroke-width="2" d="M465.5,-180C465.5,-180 307.5,-180 307.5,-180 301.5,-180 295.5,-174 295.5,-168 295.5,-168 295.5,-156 295.5,-156 295.5,-150 301.5,-144 307.5,-144 307.5,-144 465.5,-144 465.5,-144 471.5,-144 477.5,-150 477.5,-156 477.5,-156 477.5,-168 477.5,-168 477.5,-174 471.5,-180 465.5,-180"/>
-<text text-anchor="middle" x="386.5" y="-159.5" font-family="sans" font-size="10.00">map_cirm_yeast_microorganisms</text>
+<path fill="none" stroke="#56d0d8" stroke-width="2" d="M173.684,-180C173.684,-180 27.6842,-180 27.6842,-180 21.6842,-180 15.6842,-174 15.6842,-168 15.6842,-168 15.6842,-156 15.6842,-156 15.6842,-150 21.6842,-144 27.6842,-144 27.6842,-144 173.684,-144 173.684,-144 179.684,-144 185.684,-150 185.684,-156 185.684,-156 185.684,-168 185.684,-168 185.684,-174 179.684,-180 173.684,-180"/>
+<text text-anchor="middle" x="100.684" y="-159.5" font-family="sans" font-size="10.00">map_cirm_bia_microorganisms</text>
 </g>
-<!-- 5&#45;&gt;2 -->
-<g id="edge5" class="edge"><title>5&#45;&gt;2</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M386.5,-143.697C386.5,-135.983 386.5,-126.712 386.5,-118.112"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="390,-118.104 386.5,-108.104 383,-118.104 390,-118.104"/>
+<!-- 4&#45;&gt;5 -->
+<g id="edge13" class="edge"><title>4&#45;&gt;5</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M100.684,-215.697C100.684,-207.983 100.684,-198.712 100.684,-190.112"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="104.184,-190.104 100.684,-180.104 97.1843,-190.104 104.184,-190.104"/>
 </g>
 <!-- 6 -->
 <g id="node7" class="node"><title>6</title>
-<path fill="none" stroke="#d8b456" stroke-width="2" d="M627,-180C627,-180 508,-180 508,-180 502,-180 496,-174 496,-168 496,-168 496,-156 496,-156 496,-150 502,-144 508,-144 508,-144 627,-144 627,-144 633,-144 639,-150 639,-156 639,-156 639,-168 639,-168 639,-174 633,-180 627,-180"/>
-<text text-anchor="middle" x="567.5" y="-159.5" font-family="sans" font-size="10.00">map_cirm_yeast_habitats</text>
+<path fill="none" stroke="#56d8b9" stroke-width="2" d="M301.684,-180C301.684,-180 215.684,-180 215.684,-180 209.684,-180 203.684,-174 203.684,-168 203.684,-168 203.684,-156 203.684,-156 203.684,-150 209.684,-144 215.684,-144 215.684,-144 301.684,-144 301.684,-144 307.684,-144 313.684,-150 313.684,-156 313.684,-156 313.684,-168 313.684,-168 313.684,-174 307.684,-180 301.684,-180"/>
+<text text-anchor="middle" x="258.684" y="-159.5" font-family="sans" font-size="10.00">map_cirm_habitats</text>
 </g>
-<!-- 6&#45;&gt;2 -->
-<g id="edge6" class="edge"><title>6&#45;&gt;2</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M523.222,-143.876C498.149,-134.179 466.604,-121.98 440.135,-111.743"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="441.223,-108.411 430.634,-108.068 438.698,-114.94 441.223,-108.411"/>
+<!-- 4&#45;&gt;6 -->
+<g id="edge14" class="edge"><title>4&#45;&gt;6</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M139.335,-215.876C160.935,-206.307 188.035,-194.3 210.951,-184.148"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="212.433,-187.319 220.159,-180.068 209.598,-180.919 212.433,-187.319"/>
+</g>
+<!-- 5&#45;&gt;1 -->
+<g id="edge5" class="edge"><title>5&#45;&gt;1</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M120.212,-143.697C130.066,-134.965 142.17,-124.24 152.88,-114.75"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="155.217,-117.356 160.381,-108.104 150.575,-112.117 155.217,-117.356"/>
+</g>
+<!-- 6&#45;&gt;1 -->
+<g id="edge6" class="edge"><title>6&#45;&gt;1</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M239.156,-143.697C229.302,-134.965 217.198,-124.24 206.488,-114.75"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="208.793,-112.117 198.988,-108.104 204.151,-117.356 208.793,-112.117"/>
 </g>
 <!-- 7 -->
 <g id="node8" class="node"><title>7</title>
-<path fill="none" stroke="#5682d8" stroke-width="2" d="M105.5,-252C105.5,-252 43.5,-252 43.5,-252 37.5,-252 31.5,-246 31.5,-240 31.5,-240 31.5,-228 31.5,-228 31.5,-222 37.5,-216 43.5,-216 43.5,-216 105.5,-216 105.5,-216 111.5,-216 117.5,-222 117.5,-228 117.5,-228 117.5,-240 117.5,-240 117.5,-246 111.5,-252 105.5,-252"/>
-<text text-anchor="middle" x="74.5" y="-231.5" font-family="sans" font-size="10.00">get_cirm_taxa</text>
+<path fill="none" stroke="#567bd8" stroke-width="2" d="M530.684,-252C530.684,-252 390.684,-252 390.684,-252 384.684,-252 378.684,-246 378.684,-240 378.684,-240 378.684,-228 378.684,-228 378.684,-222 384.684,-216 390.684,-216 390.684,-216 530.684,-216 530.684,-216 536.684,-216 542.684,-222 542.684,-228 542.684,-228 542.684,-240 542.684,-240 542.684,-246 536.684,-252 530.684,-252"/>
+<text text-anchor="middle" x="460.684" y="-231.5" font-family="sans" font-size="10.00">get_cirm_yeast_taxa_habitats</text>
 </g>
-<!-- 7&#45;&gt;3 -->
-<g id="edge7" class="edge"><title>7&#45;&gt;3</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M74.5,-215.697C74.5,-207.983 74.5,-198.712 74.5,-190.112"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="78.0001,-190.104 74.5,-180.104 71.0001,-190.104 78.0001,-190.104"/>
+<!-- 7&#45;&gt;2 -->
+<g id="edge7" class="edge"><title>7&#45;&gt;2</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M404.248,-215.96C387.248,-207.895 370.511,-196.33 360.684,-180 352.434,-166.291 352.434,-157.709 360.684,-144 368.746,-130.604 381.457,-120.415 395.148,-112.734"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="396.965,-115.735 404.248,-108.04 393.756,-109.514 396.965,-115.735"/>
 </g>
 <!-- 8 -->
 <g id="node9" class="node"><title>8</title>
-<path fill="none" stroke="#d85656" stroke-width="2" d="M260,-252C260,-252 185,-252 185,-252 179,-252 173,-246 173,-240 173,-240 173,-228 173,-228 173,-222 179,-216 185,-216 185,-216 260,-216 260,-216 266,-216 272,-222 272,-228 272,-228 272,-240 272,-240 272,-246 266,-252 260,-252"/>
-<text text-anchor="middle" x="222.5" y="-231.5" font-family="sans" font-size="10.00">get_cirm_habitat</text>
+<path fill="none" stroke="#56d86b" stroke-width="2" d="M539.684,-180C539.684,-180 381.684,-180 381.684,-180 375.684,-180 369.684,-174 369.684,-168 369.684,-168 369.684,-156 369.684,-156 369.684,-150 375.684,-144 381.684,-144 381.684,-144 539.684,-144 539.684,-144 545.684,-144 551.684,-150 551.684,-156 551.684,-156 551.684,-168 551.684,-168 551.684,-174 545.684,-180 539.684,-180"/>
+<text text-anchor="middle" x="460.684" y="-159.5" font-family="sans" font-size="10.00">map_cirm_yeast_microorganisms</text>
 </g>
-<!-- 8&#45;&gt;4 -->
-<g id="edge8" class="edge"><title>8&#45;&gt;4</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M222.5,-215.697C222.5,-207.983 222.5,-198.712 222.5,-190.112"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="226,-190.104 222.5,-180.104 219,-190.104 226,-190.104"/>
+<!-- 7&#45;&gt;8 -->
+<g id="edge15" class="edge"><title>7&#45;&gt;8</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M460.684,-215.697C460.684,-207.983 460.684,-198.712 460.684,-190.112"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="464.184,-190.104 460.684,-180.104 457.184,-190.104 464.184,-190.104"/>
 </g>
 <!-- 9 -->
 <g id="node10" class="node"><title>9</title>
-<path fill="none" stroke="#9fd856" stroke-width="2" d="M434,-252C434,-252 339,-252 339,-252 333,-252 327,-246 327,-240 327,-240 327,-228 327,-228 327,-222 333,-216 339,-216 339,-216 434,-216 434,-216 440,-216 446,-222 446,-228 446,-228 446,-240 446,-240 446,-246 440,-252 434,-252"/>
-<text text-anchor="middle" x="386.5" y="-231.5" font-family="sans" font-size="10.00">get_cirm_yeast_taxa</text>
+<path fill="none" stroke="#d87d56" stroke-width="2" d="M701.184,-180C701.184,-180 582.184,-180 582.184,-180 576.184,-180 570.184,-174 570.184,-168 570.184,-168 570.184,-156 570.184,-156 570.184,-150 576.184,-144 582.184,-144 582.184,-144 701.184,-144 701.184,-144 707.184,-144 713.184,-150 713.184,-156 713.184,-156 713.184,-168 713.184,-168 713.184,-174 707.184,-180 701.184,-180"/>
+<text text-anchor="middle" x="641.684" y="-159.5" font-family="sans" font-size="10.00">map_cirm_yeast_habitats</text>
+</g>
+<!-- 7&#45;&gt;9 -->
+<g id="edge16" class="edge"><title>7&#45;&gt;9</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M504.962,-215.876C530.035,-206.179 561.58,-193.98 588.049,-183.743"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="589.486,-186.94 597.55,-180.068 586.961,-180.411 589.486,-186.94"/>
 </g>
-<!-- 9&#45;&gt;5 -->
-<g id="edge9" class="edge"><title>9&#45;&gt;5</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M386.5,-215.697C386.5,-207.983 386.5,-198.712 386.5,-190.112"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="390,-190.104 386.5,-180.104 383,-190.104 390,-190.104"/>
+<!-- 8&#45;&gt;2 -->
+<g id="edge8" class="edge"><title>8&#45;&gt;2</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M460.684,-143.697C460.684,-135.983 460.684,-126.712 460.684,-118.112"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="464.184,-118.104 460.684,-108.104 457.184,-118.104 464.184,-118.104"/>
+</g>
+<!-- 9&#45;&gt;2 -->
+<g id="edge9" class="edge"><title>9&#45;&gt;2</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M597.407,-143.876C572.333,-134.179 540.788,-121.98 514.319,-111.743"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="515.407,-108.411 504.818,-108.068 512.882,-114.94 515.407,-108.411"/>
 </g>
 <!-- 10 -->
 <g id="node11" class="node"><title>10</title>
-<path fill="none" stroke="#ced856" stroke-width="2" d="M621,-252C621,-252 514,-252 514,-252 508,-252 502,-246 502,-240 502,-240 502,-228 502,-228 502,-222 508,-216 514,-216 514,-216 621,-216 621,-216 627,-216 633,-222 633,-228 633,-228 633,-240 633,-240 633,-246 627,-252 621,-252"/>
-<text text-anchor="middle" x="567.5" y="-231.5" font-family="sans" font-size="10.00">get_cirm_yeast_habitat</text>
-</g>
-<!-- 10&#45;&gt;6 -->
-<g id="edge10" class="edge"><title>10&#45;&gt;6</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M567.5,-215.697C567.5,-207.983 567.5,-198.712 567.5,-190.112"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="571,-190.104 567.5,-180.104 564,-190.104 571,-190.104"/>
+<path fill="none" stroke="#d8cb56" stroke-width="2" d="M924.684,-252C924.684,-252 790.684,-252 790.684,-252 784.684,-252 778.684,-246 778.684,-240 778.684,-240 778.684,-228 778.684,-228 778.684,-222 784.684,-216 790.684,-216 790.684,-216 924.684,-216 924.684,-216 930.684,-216 936.684,-222 936.684,-228 936.684,-228 936.684,-240 936.684,-240 936.684,-246 930.684,-252 924.684,-252"/>
+<text text-anchor="middle" x="857.684" y="-231.5" font-family="sans" font-size="10.00">get_cirm_cfbp_taxa_habitats</text>
+</g>
+<!-- 10&#45;&gt;3 -->
+<g id="edge10" class="edge"><title>10&#45;&gt;3</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M803.665,-215.962C786.91,-207.851 770.327,-196.258 760.684,-180 748.445,-159.365 761.841,-134.189 776.166,-115.887"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="779.021,-117.928 782.725,-108.002 773.639,-113.452 779.021,-117.928"/>
+</g>
+<!-- 11 -->
+<g id="node12" class="node"><title>11</title>
+<path fill="none" stroke="#b6d856" stroke-width="2" d="M933.684,-180C933.684,-180 781.684,-180 781.684,-180 775.684,-180 769.684,-174 769.684,-168 769.684,-168 769.684,-156 769.684,-156 769.684,-150 775.684,-144 781.684,-144 781.684,-144 933.684,-144 933.684,-144 939.684,-144 945.684,-150 945.684,-156 945.684,-156 945.684,-168 945.684,-168 945.684,-174 939.684,-180 933.684,-180"/>
+<text text-anchor="middle" x="857.684" y="-159.5" font-family="sans" font-size="10.00">map_cirm_cfbp_microorganisms</text>
+</g>
+<!-- 10&#45;&gt;11 -->
+<g id="edge17" class="edge"><title>10&#45;&gt;11</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M857.684,-215.697C857.684,-207.983 857.684,-198.712 857.684,-190.112"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="861.184,-190.104 857.684,-180.104 854.184,-190.104 861.184,-190.104"/>
+</g>
+<!-- 12 -->
+<g id="node13" class="node"><title>12</title>
+<path fill="none" stroke="#d8a456" stroke-width="2" d="M1089.18,-180C1089.18,-180 976.184,-180 976.184,-180 970.184,-180 964.184,-174 964.184,-168 964.184,-168 964.184,-156 964.184,-156 964.184,-150 970.184,-144 976.184,-144 976.184,-144 1089.18,-144 1089.18,-144 1095.18,-144 1101.18,-150 1101.18,-156 1101.18,-156 1101.18,-168 1101.18,-168 1101.18,-174 1095.18,-180 1089.18,-180"/>
+<text text-anchor="middle" x="1032.68" y="-159.5" font-family="sans" font-size="10.00">map_cirm_cfbp_habitats</text>
+</g>
+<!-- 10&#45;&gt;12 -->
+<g id="edge18" class="edge"><title>10&#45;&gt;12</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M900.494,-215.876C924.63,-206.222 954.968,-194.087 980.49,-183.878"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="982.029,-187.032 990.013,-180.068 979.429,-180.533 982.029,-187.032"/>
+</g>
+<!-- 11&#45;&gt;3 -->
+<g id="edge11" class="edge"><title>11&#45;&gt;3</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M843.347,-143.697C836.394,-135.305 827.914,-125.07 820.284,-115.861"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="822.932,-113.571 813.856,-108.104 817.541,-118.038 822.932,-113.571"/>
+</g>
+<!-- 12&#45;&gt;3 -->
+<g id="edge12" class="edge"><title>12&#45;&gt;3</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M975.984,-143.966C943.004,-134.057 901.252,-121.514 866.659,-111.121"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="867.223,-107.636 856.639,-108.111 865.209,-114.34 867.223,-107.636"/>
 </g>
 </g>
 </svg>
diff --git a/docs/3-process-cirm-data.md b/docs/3-process-cirm-data.md
index b161dcb7b1500fdd082546a0a0406cd7cbafc8cb..38c0554686522b82e06be609d2f04d609c024942 100644
--- a/docs/3-process-cirm-data.md
+++ b/docs/3-process-cirm-data.md
@@ -1,5 +1,5 @@
 ## About
-The pipeline extracts microorganisms, habitats from CIRM-BIA and CIRM-Levures data sources.
+The pipeline extracts microorganisms, habitats from CIRM-BIA, CIRM-Levures and CIRM CFBP data sources.
 
  <img align="right" width="460" src="3-pipeline.svg">
 <!--![pipeline that processes and prepares data from cirm](3-pipeline.svg)-->
@@ -38,11 +38,13 @@ The pipeline handles the following resources :
 * inputs
     * `corpora/cirm/BIA_2021/florilege_export_final_17_02_21.xlsx` 
     * `corpora/cirm/Levures_2021/Florilege_21012021.xlsx`
+    * `corpora/cirm/CFBP_2020/CFPB_22_sept_2020_Type.xlsx`
     * `ancillaries/OntoBiotope_BioNLP-OST-2019-Habitat.obo`
     * `ancillaries/OntoBiotope_BioNLP-OST-2019-Phenotype.obo` ?
 * outputs
-    * `corpora/florilege/cirm/cirm-results.txt`
+    * `corpora/florilege/cirm/cirm-bia-results.txt`
     * `corpora/florilege/cirm/cirm-yeast-results.txt`
+    * `corpora/florilege/cirm/cirm-cfpb-results.txt`
 * programs
     * `alvisnlp singularity container`
     * `python env`
diff --git a/process_CIRM_corpus.snakefile b/process_CIRM_corpus.snakefile
index 4dc3bdf08755701b7034db80b7dcd9efa0c27057..3da26f833a9c8b6513c2dc0191ea88643b934f56 100644
--- a/process_CIRM_corpus.snakefile
+++ b/process_CIRM_corpus.snakefile
@@ -9,13 +9,14 @@ all
 '''
 rule all:
 	input:
-		'corpora/florilege/cirm/cirm-results.txt',
-		'corpora/florilege/cirm/cirm-yeast-results.txt'
+		'corpora/florilege/cirm/cirm-bia-results.txt',
+		'corpora/florilege/cirm/cirm-yeast-results.txt',
+		'corpora/florilege/cirm/cirm-cfbp-results.txt'
 
 '''
 get taxa and habitats (CIRM BIA)
 '''
-rule get_cirm_taxa:
+rule get_cirm_bia_taxa_habitats:
 	input:
 		file='corpora/cirm/BIA_2021/florilege_export_final_17_02_21.xlsx'
 	params:
@@ -23,8 +24,8 @@ rule get_cirm_taxa:
 		strain_index='1',
 		habitat_index='15'
 	output:
-		taxa='corpora/cirm/taxa.txt',
-		habitats='corpora/cirm/habitats.txt',
+		taxa='corpora/cirm/bia_taxa.txt',
+		habitats='corpora/cirm/bia_habitats.txt',
 		tsv='corpora/cirm/BIA_2021/florilege_export_final_17_02_21.tsv'
 	conda: 'softwares/envs/python3_pandas_env.yaml'
 	shell: """
@@ -34,7 +35,7 @@ rule get_cirm_taxa:
 '''
 get taxa and habitats (CIRM Levures)
 '''
-rule get_cirm_yeast_taxa:
+rule get_cirm_yeast_taxa_habitats:
 	input:
 		file='corpora/cirm/Levures_2021/Florilege_21012021.xlsx'
 	params:
@@ -49,14 +50,34 @@ rule get_cirm_yeast_taxa:
 		python3.7 softwares/Florilege/scripts/preprocess-cirm-yeast.py --input {input.file} --taxa-index {params.taxa_index} --habitat-index {params.habitat_index} --taxa-outfile {output.taxa} --habitat-outfile {output.habitats} --tsv-outfile {output.tsv}
 	      """
 
+'''
+get taxa and habitats (CIRM CFBP)
+'''
+rule get_cirm_cfbp_taxa_habitats:
+	input:
+		file='corpora/cirm/CFBP_2020/CFPB_22_sept_2020_Type.xlsx'
+	params:
+		taxa_index='1',
+		strain_index='0',
+		habitat_index='9,10,14,23'
+	output:
+		taxa='corpora/cirm/cfbp_taxa.txt',
+		habitats='corpora/cirm/cfbp_habitats.txt',
+		tsv='corpora/cirm/CFBP_2020/CFPB_22_sept_2020_Type.tsv'
+	conda: 'softwares/envs/python3_pandas_env.yaml'
+	shell: """
+		python3.7 softwares/Florilege/scripts/preprocess-cirm-cfbp.py --input {input.file} --taxa-index {params.taxa_index} --strain-index {params.strain_index} --habitat-index {params.habitat_index} --taxa-outfile {output.taxa} --habitat-outfile {output.habitats} --tsv-outfile {output.tsv}
+	      """
+
+
 '''
 map microorganisms
 '''
-rule map_cirm_microorganisms:
+rule map_cirm_bia_microorganisms:
 	input:
-		taxa='corpora/cirm/taxa.txt'
+		taxa='corpora/cirm/bia_taxa.txt'
 	output:
-		mapped_taxaids='corpora/cirm/mapped_taxids.txt'
+		mapped_taxaids='corpora/cirm/mapped_bia_taxa.txt'
 	params:
 		plan='plans/map_microorganisms.plan',
 		taxid_microorganisms='ancillaries/ncbi-taxonomy-prefix/taxid_microorganisms.txt',
@@ -91,14 +112,36 @@ rule map_cirm_yeast_microorganisms:
 	{params.plan}
 	"""
 
+
+'''
+map microorganisms (CIRM CFBP)
+'''
+rule map_cirm_cfbp_microorganisms:
+	input:
+		taxa='corpora/cirm/cfbp_taxa.txt'
+	output:
+		mapped_taxa='corpora/cirm/mapped_cfbp_taxa.txt'
+	params:
+		plan='plans/map_microorganisms.plan',
+		taxid_microorganisms='ancillaries/ncbi-taxonomy-prefix/taxid_microorganisms.txt',
+		taxa_id_full='ancillaries/ncbi-taxonomy-prefix/taxa+id_full.txt'
+	singularity:config["SINGULARITY_IMG"]
+	shell: """alvisnlp -J-Xmx32g -cleanTmp -verbose \
+	-alias input {input.taxa} \
+	-alias output {output.mapped_taxa} \
+	-alias taxid_microorganisms {params.taxid_microorganisms} \
+	-alias taxa+id_full {params.taxa_id_full} \
+	{params.plan}
+	"""
+
 '''
 map habitats of microorganisms
 '''
 rule map_cirm_habitats:
 	input:
-		habitats='corpora/cirm/habitats.txt'
+		habitats='corpora/cirm/bia_habitats.txt'
 	output:
-		mapped_habitats='corpora/cirm/mapped_habitats.txt'
+		mapped_habitats='corpora/cirm/mapped_bia_habitats.txt'
 	params:
 		plan='plans/map_habitats.plan',
 		onto='ancillaries/BioNLP-OST+EnovFood-Habitat.obo',
@@ -137,16 +180,39 @@ rule map_cirm_yeast_habitats:
 	{params.plan}
 	"""
 
+'''
+map habitats of microorganisms (CIRM CFBP)
+'''
+rule map_cirm_cfbp_habitats:
+	input:
+		habitats='corpora/cirm/cfbp_habitats.txt'
+	output:
+		mapped_habitats='corpora/cirm/mapped_cfbp_habitats.txt'
+	params:
+		plan='plans/map_habitats.plan',
+		onto='ancillaries/BioNLP-OST+EnovFood-Habitat.obo',
+		tomap='ancillaries/BioNLP-OST+EnovFood-Habitat.tomap',
+		graylist='ancillaries/graylist_extended.heads',
+		emptywords='ancillaries/stopwords_EN.ttg'
+	singularity:config["SINGULARITY_IMG"]
+	shell: """alvisnlp -J-Xmx32g -cleanTmp -verbose \
+	-alias input {input.habitats} \
+	-alias output {output.mapped_habitats} \
+	-alias ontobiotope {params.onto} \
+	-xalias '<ontobiotope-tomap empty-words="{params.emptywords}" graylist="{params.graylist}" whole-proxy-distance="false">{params.tomap}</ontobiotope-tomap>' \
+	{params.plan}
+	"""
+
 '''
 format results
 '''
 rule format_cirm_results:
 	input:
 		file='corpora/cirm/BIA_2021/florilege_export_final_17_02_21.tsv',
-		taxa='corpora/cirm/mapped_taxids.txt',
-		habitats='corpora/cirm/mapped_habitats.txt'
+		taxa='corpora/cirm/mapped_bia_taxa.txt',
+		habitats='corpora/cirm/mapped_bia_habitats.txt'
 	output:
-		result='corpora/florilege/cirm/cirm-results.txt'
+		result='corpora/florilege/cirm/cirm-bia-results.txt'
 	params:
 		taxa_index='2',
 		strain_index='1',
@@ -170,4 +236,21 @@ rule format_cirm_yeast_results:
 	conda: 'softwares/envs/obo-utils-env.yaml'
 	shell: 'python softwares/Florilege/scripts/format-cirm-yeast-results.py --cirm {input.file} --taxa {input.taxa} --habitats {input.habitats} --taxa-index {params.taxa_index} --habitat-index {params.habitat_index} > {output.result}'
 
+'''
+format results (CIRM CFBP)
+'''
+rule format_cirm_cfbp_results:
+	input:
+		file='corpora/cirm/CFBP_2020/CFPB_22_sept_2020_Type.tsv',
+		taxa='corpora/cirm/mapped_cfbp_taxa.txt',
+		habitats='corpora/cirm/mapped_cfbp_habitats.txt'
+	output:
+		result='corpora/florilege/cirm/cirm-cfbp-results.txt'
+	params:
+		taxa_index='1',
+		strain_index='0',
+		habitat_index='9,10,14,23'
+	conda: 'softwares/envs/obo-utils-env.yaml'
+	shell: 'python softwares/Florilege/scripts/format-cirm-cfbp-results.py --cirm {input.file} --taxa {input.taxa} --habitats {input.habitats} --taxa-index {params.taxa_index} --strain-index {params.strain_index} --habitat-index {params.habitat_index} > {output.result}'
+
 
diff --git a/softwares/Florilege/scripts/format-cirm-cfbp-results.py b/softwares/Florilege/scripts/format-cirm-cfbp-results.py
new file mode 100644
index 0000000000000000000000000000000000000000..0364f40085f0e69906ed00a37ed8494835b8cecf
--- /dev/null
+++ b/softwares/Florilege/scripts/format-cirm-cfbp-results.py
@@ -0,0 +1,104 @@
+import re
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--cirm', action='store', default='corpora/cirm/CFBP_2020/CFPB_22_sept_2020_Type.tsv', help='CIRM file')
+parser.add_argument('--taxa', action='store', default='corpora/cirm/mapped_cfbp_taxa.txt', help='mapped taxa file')
+parser.add_argument('--habitats', action='store', default='corpora/cirm/mapped_cfbp_habitats.txt', help='mapped habitat file')
+parser.add_argument('--taxa-index', action='store', default='1', help='index of taxa column')
+parser.add_argument('--strain-index', action='store', default='0', help='index of strain column')
+parser.add_argument('--habitat-index', action='store', default='9,10,14,23', help='index(es) of habitat column(s) (comma-delimited if multiple indexes')
+
+args = parser.parse_args()
+
+cirm_file = args.cirm
+taxa_file = args.taxa
+habitat_file = args.habitats
+taxa_index = int(args.taxa_index)
+strain_index = int(args.strain_index)
+habitat_indexes = list(map(int,args.habitat_index.split(',')))
+
+habitat_dict = {}
+hf = open(habitat_file, "r")
+for line in hf:
+    line = line.rstrip()
+    p = re.compile(r'([^\t]+)\t(\S.+)')
+    m = p.match(line)
+    if(m):
+        habitat = m.group(1)
+        habitat_info = m.group(2)
+        if habitat in habitat_dict:
+            habitat_dict[habitat].add(habitat_info)
+        else:
+            habitat_dict[habitat] = {habitat_info}
+hf.close()
+
+taxa_dict = {}
+tf = open(taxa_file, "r")
+for line in tf:
+    line = line.rstrip()
+    p = re.compile(r'([^\t]+)\t(\S.+)')
+    m = p.match(line)
+    if(m):
+        taxon = m.group(1)
+        taxon_info = m.group(2)
+        taxa_dict[taxon] = taxon_info
+tf.close()
+
+def add_entry(mappings, habitat, concepts, taxid, taxon, name, path):
+    for concept in concepts:
+        surface_form, concept_id, concept_name, concept_path = concept.split('\t')
+        key = taxid+"-"+concept_id
+        if key in mappings:
+            mappings[key]['habitat']['surface'].add(habitat)
+            mappings[key]['taxon']['surface'].add(taxon)
+        else:
+            mappings[key] = {}
+            mappings[key]['habitat'] = {}
+            mappings[key]['taxon'] = {}
+            mappings[key]['habitat']['concept_id'] = concept_id
+            mappings[key]['habitat']['concept_name'] = concept_name
+            mappings[key]['habitat']['concept_path'] = concept_path
+            mappings[key]['taxon']['taxid'] = taxid
+            mappings[key]['taxon']['canonical_name'] = name
+            mappings[key]['taxon']['path'] = path
+            mappings[key]['habitat']['surface'] = {habitat}
+            mappings[key]['taxon']['surface'] = {taxon}
+    return mappings
+
+
+unique_mappings = {}
+cf = open(cirm_file, "r")
+for num, line in enumerate(cf, 1):
+    if (num > 1):
+        line = line.rstrip("\n")
+        fields = line.split("\t")
+        taxon = re.sub(r'\s+', ' ', fields[taxa_index]).strip().replace('"','')
+        taxon2 = re.sub(r'( [0-9\.]+)$', r'', taxon)
+        strain = re.sub(r'\s+', ' ', fields[strain_index]).strip()
+        variants = [taxon + " CFBP " + strain, taxon + " CFBP" + strain, taxon + " CFBP:" + strain, taxon2 + " CFBP " + strain, taxon2 + " CFBP" + strain, taxon2 + " CFBP:" + strain, "CFBP " + strain, "CFBP" + strain, "CFBP:" + strain, taxon, taxon2]
+        matched = "";
+        for variant in variants:
+            if variant in taxa_dict:
+                matched = variant
+                break
+        if (matched != ""):
+            taxid, name, path = taxa_dict[matched].split("\t")
+            for index in habitat_indexes:
+                habitat = re.sub(r'\.$','',re.sub(r'\s+', ' ', fields[index]).strip())
+                if habitat in habitat_dict:
+                    concepts = habitat_dict[habitat]
+                    unique_mappings = add_entry(unique_mappings, habitat, concepts, taxid, matched, name, path)
+cf.close()
+
+for mapping in unique_mappings.values():
+    print("\t".join(('|'.join(mapping['taxon']['surface']),
+              mapping['taxon']['canonical_name'],
+              mapping['taxon']['taxid'],
+              mapping['taxon']['path'],
+              '|'.join(mapping['habitat']['surface']),
+              mapping['habitat']['concept_id'],
+              mapping['habitat']['concept_name'],
+              mapping['habitat']['concept_path'])
+    ))
+
diff --git a/softwares/Florilege/scripts/preprocess-cirm-cfbp.py b/softwares/Florilege/scripts/preprocess-cirm-cfbp.py
new file mode 100644
index 0000000000000000000000000000000000000000..c24e2f5276bf4c67b317f7368c8956c3939c1f89
--- /dev/null
+++ b/softwares/Florilege/scripts/preprocess-cirm-cfbp.py
@@ -0,0 +1,51 @@
+import pandas as pd
+import argparse
+import re
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--input', action='store', default='corpora/cirm/CFBP_2020/CFPB_22_sept_2020_Type.xlsx', help='CIRM file')
+parser.add_argument('--taxa-index', action='store', default='1', help='index of taxa column')
+parser.add_argument('--strain-index', action='store', default='0', help='index of strain column')
+parser.add_argument('--habitat-index', action='store', default='9,10,14,23', help='index(es) of habitat column(s) (comma-delimited if multiple indexes')
+parser.add_argument('--taxa-outfile', action='store', default='corpora/cirm/cfbp_taxa.txt', help='output list of taxa')
+parser.add_argument('--habitat-outfile', action='store', default='corpora/cirm/cfbp_habitats.txt', help='output list of habitats')
+parser.add_argument('--tsv-outfile', action='store', default='corpora/cirm/CFBP_2020/CFPB_22_sept_2020_Type.tsv', help='output tab-delimited converted file')
+
+args = parser.parse_args()
+
+taxa_index = int(args.taxa_index)
+strain_index = int(args.strain_index)
+habitat_indexes = map(int,args.habitat_index.split(','))
+
+cirm_data = pd.read_excel(args.input, dtype='object')
+
+taxa_dict = set()
+for i in range(len(cirm_data)) :
+  taxon = str(cirm_data.iloc[i, taxa_index])
+  strain = str(cirm_data.iloc[i, strain_index])
+  taxon = taxon.replace('"','')
+  taxa_dict.add(taxon)
+  taxa_dict.add(taxon + " CFBP " + strain)
+  taxa_dict.add(taxon + " CFBP" + strain)
+  taxa_dict.add(taxon + " CFBP:" + strain)
+  taxon2 = re.sub(r'( [0-9\.]+)$', r'', taxon)
+  taxa_dict.add(taxon2)
+  taxa_dict.add(taxon2 + " CFBP " + strain)
+  taxa_dict.add(taxon2 + " CFBP" + strain)
+  taxa_dict.add(taxon2 + " CFBP:" + strain)
+  taxa_dict.add("CFBP " + strain)
+  taxa_dict.add("CFBP" + strain)
+  taxa_dict.add("CFBP:" + strain)
+
+f = open(args.taxa_outfile, 'w')
+f.write("\n".join(taxa_dict) + "\n")
+f.close()
+
+f = open(args.habitat_outfile, 'w')
+for index in habitat_indexes:
+    habitats = cirm_data.iloc[:, index].dropna().unique()
+    for habitat in habitats:
+      f.write(re.sub(r'\.$', r'', habitat) + "\n")
+f.close()
+
+cirm_data.to_csv(args.tsv_outfile, sep="\t", index=False)