1 // javac -cp ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar
2 // ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/org/forester/applications/tax_code_cleaner.java
4 // /home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/:/home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar
5 // org.forester.applications.tax_code_cleaner
7 package org.forester.applications;
10 import java.util.regex.Pattern;
12 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
13 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
14 import org.forester.io.writers.PhylogenyWriter;
15 import org.forester.phylogeny.Phylogeny;
16 import org.forester.phylogeny.PhylogenyNode;
17 import org.forester.phylogeny.data.Taxonomy;
18 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
19 import org.forester.phylogeny.factories.PhylogenyFactory;
20 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
21 import org.forester.util.CommandLineArguments;
22 import org.forester.util.ForesterUtil;
24 public class tax_code_cleaner {
26 private final static String BASE = "b_";
28 public static void main( final String args[] ) {
32 CommandLineArguments cla = null;
33 cla = new CommandLineArguments( args );
34 in = cla.getFile( 0 );
35 out = cla.getFile( 1 );
36 // if ( out.exists() ) {
37 // System.out.println( out + " already exists" );
40 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
41 final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParserXsdValidating();
42 final Phylogeny[] phylogenies_0 = factory.create( in, xml_parser );
43 final Phylogeny phylogeny_0 = phylogenies_0[ 0 ];
44 final PhylogenyNodeIterator it = phylogeny_0.iteratorPostorder();
46 while ( it.hasNext() ) {
47 final PhylogenyNode node = it.next();
48 processNode( node, i );
51 final PhylogenyWriter writer = new PhylogenyWriter();
52 writer.toPhyloXML( out, phylogeny_0, 0 );
54 catch ( final Exception e ) {
55 System.out.println( e.getLocalizedMessage() );
61 private static void processNode( final PhylogenyNode node, final int i ) throws PhyloXmlDataFormatException {
62 if ( node.isExternal() ) {
63 if ( node.getNodeData().isHasTaxonomy() ) {
64 final Taxonomy t = node.getNodeData().getTaxonomy();
65 if ( !ForesterUtil.isEmpty( t.getTaxonomyCode() ) ) {
66 final String tc = t.getTaxonomyCode();
67 if ( tc.equals( "ACRALC" ) ) {
68 t.setScientificName( "Acremonium alcalophilum" );
69 t.setTaxonomyCode( "AALXX" );
71 else if ( tc.equals( "AMPQU" ) ) {
72 t.setScientificName( "Amphimedon queenslandica" );
73 t.setTaxonomyCode( "AMPQE" );
75 else if ( tc.equals( "AQUAE" ) ) {
76 t.setScientificName( "Aquifex aeolicus (strain VF5)" );
78 else if ( tc.equals( "ASTSPC" ) ) {
79 t.setScientificName( "Asterochloris sp. Cgr/DA1pho" );
80 t.setTaxonomyCode( "ASCXX" );
82 else if ( tc.equals( "BAUCOM" ) ) {
83 t.setScientificName( "Baudoinia compniacensis" );
84 t.setTaxonomyCode( "BCOXX" );
86 else if ( tc.equals( "CAP" ) ) {
87 t.setScientificName( "Capitella sp.1" );
88 t.setTaxonomyCode( "CTEXX" );
90 else if ( tc.equals( "CAPOWC" ) ) {
91 t.setScientificName( "Capsaspora owczarzaki (strain ATCC 30864)" );
92 t.setTaxonomyCode( "CAPO3" );
94 else if ( tc.equals( "CHLVUL" ) ) {
95 t.setScientificName( "Chlorella variabilis" );
96 t.setTaxonomyCode( "CHLVA" );
98 else if ( tc.equals( "CITCLE" ) ) {
99 t.setScientificName( "Citrus clementina" );
100 t.setTaxonomyCode( "CCLXX" );
102 else if ( tc.equals( "CLAGRA" ) ) {
103 t.setScientificName( "Cladonia grayi" );
104 t.setTaxonomyCode( "" );
106 else if ( tc.equals( "COEREV" ) ) {
107 t.setScientificName( "Coemansia reversa" );
108 t.setTaxonomyCode( "CREXX" );
110 else if ( tc.equals( "CONPUT" ) ) {
111 t.setScientificName( "Coniophora puteana" );
112 t.setTaxonomyCode( "CPUXX" );
114 else if ( tc.equals( "DICSQU" ) ) {
115 t.setScientificName( "Dichomitus squalens" );
116 t.setTaxonomyCode( "DICSQ" );
118 else if ( tc.equals( "FOMPIN" ) ) {
119 t.setScientificName( "Fomitopsis pinicola" );
120 t.setTaxonomyCode( "FPIXX" );
122 else if ( tc.equals( "GONPRO" ) ) {
123 t.setScientificName( "Gonapodya prolifera" );
124 t.setTaxonomyCode( "GONPR" );
126 else if ( tc.equals( "GYMLUX" ) ) {
127 t.setScientificName( "Gymnopus luxurians" );
128 t.setTaxonomyCode( "" );
130 else if ( tc.equals( "HYDPIN" ) ) {
131 t.setScientificName( "Hydnomerulius pinastri" );
132 t.setTaxonomyCode( "" );
134 else if ( tc.equals( "JAAARG" ) ) {
135 t.setScientificName( "Jaapia argillacea" );
136 t.setTaxonomyCode( "" );
138 else if ( tc.equals( "MYCPOP" ) ) {
139 t.setScientificName( "Mycosphaerella populorum" );
140 t.setTaxonomyCode( "MYCPS" );
142 else if ( tc.equals( "MYCTHE" ) ) {
143 t.setScientificName( "Myceliophthora thermophila" );
144 t.setTaxonomyCode( "THIHA" );
146 else if ( tc.equals( "OIDMAI" ) ) {
147 t.setScientificName( "Oidiodendron maius" );
148 t.setTaxonomyCode( "" );
150 else if ( tc.equals( "PANVIR" ) ) {
151 t.setScientificName( "Panicum virgatum" );
152 t.setTaxonomyCode( "PANVG" );
154 else if ( tc.equals( "PIRSPE" ) ) {
155 t.setScientificName( "Piromyces sp. E2" );
156 t.setTaxonomyCode( "PIRSE" );
158 else if ( tc.equals( "SAICOM" ) ) {
159 t.setScientificName( "Saitoella complicata" );
160 t.setTaxonomyCode( "" );
162 else if ( tc.equals( "SERLAC" ) ) {
163 t.setScientificName( "Serpula lacrymans" );
164 t.setTaxonomyCode( "SERL9" );
166 else if ( tc.equals( "SPHARC" ) ) {
167 t.setScientificName( "Sphaeroforma arctica" );
168 t.setTaxonomyCode( "SARXX" );
170 else if ( tc.equals( "THETRA" ) ) {
171 t.setScientificName( "Thecamonas trahens" );
172 t.setTaxonomyCode( "TTRXX" );
174 else if ( tc.equals( "THITER" ) ) {
175 t.setScientificName( "Thielavia terrestris (strain ATCC 38088 / NRRL 8126)" );
176 t.setTaxonomyCode( "THITE" );
178 else if ( tc.equals( "WOLCOC" ) ) {
179 t.setScientificName( "Wolfiporia cocos MD-104 SS10" );
180 t.setTaxonomyCode( "WOLCO" );
182 else if ( tc.equals( "XANPAR" ) ) {
183 t.setScientificName( "Xanthoria parietina 46-1" );
184 t.setTaxonomyCode( "" );
186 else if ( tc.length() == 6 ) {
187 final Pattern p = Pattern.compile( "[A-Z9][A-Z]{2}[A-Z0-9]{2}\\d" );
188 if ( p.matcher( tc ).matches() ) {
189 t.setTaxonomyCode( tc.substring( 0, 5 ) );