in progress...
[jalview.git] / forester / java / src / org / forester / application / decoratorX.java
1 // java -Xmx2048m -cp
2 // ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar
3 // org.forester.application.decoratorX
4 // RRMa_ALL_plus_RRMa_ee3_50_hmmalign_05_40_fme_with_seqs_2.phylo.xml
5 // nature12311-s3_cz_4.txt x1 x2
6
7 package org.forester.application;
8
9 import java.io.File;
10
11 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
12 import org.forester.io.writers.PhylogenyWriter;
13 import org.forester.phylogeny.Phylogeny;
14 import org.forester.phylogeny.PhylogenyNode;
15 import org.forester.phylogeny.data.Annotation;
16 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
17 import org.forester.phylogeny.factories.PhylogenyFactory;
18 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
19 import org.forester.util.BasicTable;
20 import org.forester.util.BasicTableParser;
21 import org.forester.util.CommandLineArguments;
22
23 public class decoratorX {
24
25     private static final int SEQ_NAME_COLUMN = 1;
26     private static final int SPECIES_COLUMN  = 2;
27     private static final int SEQ_COLUMN      = 3;
28     private static final int TARGET_COLUMN   = 4;
29
30     public static void main( final String args[] ) {
31         File intree = null;
32         File outtree1 = null;
33         File outtree2 = null;
34         File intable = null;
35         try {
36             CommandLineArguments cla = null;
37             cla = new CommandLineArguments( args );
38             intree = cla.getFile( 0 );
39             intable = cla.getFile( 1 );
40             outtree1 = cla.getFile( 2 );
41             outtree2 = cla.getFile( 3 );
42             if ( outtree1.exists() ) {
43                 System.out.println( outtree1 + " already exists" );
44                 System.exit( -1 );
45             }
46             if ( outtree2.exists() ) {
47                 System.out.println( outtree2 + " already exists" );
48                 System.exit( -1 );
49             }
50             final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
51             final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParserXsdValidating();
52             final Phylogeny phy = factory.create( intree, xml_parser )[ 0 ];
53             final BasicTable<String> t = BasicTableParser.parse( intable, '\t' );
54             final PhylogenyNodeIterator it = phy.iteratorExternalForward();
55             int i = 0;
56             while ( it.hasNext() ) {
57                 final PhylogenyNode node = it.next();
58                 processNode( node, t );
59                 i++;
60             }
61             final PhylogenyWriter writer1 = new PhylogenyWriter();
62             writer1.toPhyloXML( outtree1, phy, 0 );
63             final PhylogenyNodeIterator it2 = phy.iteratorExternalForward();
64             while ( it2.hasNext() ) {
65                 final PhylogenyNode node = it2.next();
66                 processNode2( node, phy );
67             }
68             final PhylogenyWriter writer2 = new PhylogenyWriter();
69             writer2.toPhyloXML( outtree2, phy, 0 );
70         }
71         catch ( final Exception e ) {
72             System.out.println( e.getLocalizedMessage() );
73             System.exit( -1 );
74         }
75     }
76
77     private static void processNode( final PhylogenyNode node, final BasicTable<String> t ) throws Exception {
78         final String node_seq = node.getNodeData().getSequence().getMolecularSequence().toUpperCase();
79         boolean found = false;
80         String found_row = "";
81         String found_protein_name = "";
82         String found_species = "";
83         for( int row = 0; row < t.getNumberOfRows(); ++row ) {
84             final String table_seq = t.getValueAsString( SEQ_COLUMN, row ).toUpperCase();
85             if ( table_seq.contains( node_seq ) ) {
86                 if ( found ) {
87                     if ( !found_protein_name.equals( t.getValueAsString( SEQ_NAME_COLUMN, row ) )
88                             || !found_species.equals( t.getValueAsString( SPECIES_COLUMN, row ) ) ) {
89                         throw new Exception( "Sequence from node " + node + " is not unique: " + node_seq + "\n"
90                                 + "Already found in row " + found_row );
91                     }
92                 }
93                 else {
94                     found = true;
95                     found_row = t.getRowAsString( row, ", " );
96                     found_protein_name = t.getValueAsString( SEQ_NAME_COLUMN, row );
97                     found_species = t.getValueAsString( SPECIES_COLUMN, row );
98                 }
99                 final Annotation annotation = new Annotation( "target", t.getValueAsString( TARGET_COLUMN, row ) );
100                 node.getNodeData().getSequence().addAnnotation( annotation );
101                 System.out.println( node + "->" + annotation );
102             }
103         }
104     }
105
106     private static void processNode2( final PhylogenyNode node, final Phylogeny t ) {
107         if ( ( node.getNodeData().getSequence().getAnnotations() == null )
108                 || node.getNodeData().getSequence().getAnnotations().isEmpty() ) {
109             t.deleteSubtree( node, true );
110         }
111     }
112 }