inprogress
[jalview.git] / forester / java / src / org / forester / application / decoratorX.java
1 // java -Xmx2048m -cp
2 // ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar
3 // org.forester.application.decoratorX
4 // RRMa_ALL_plus_RRMa_ee3_50_hmmalign_05_40_fme_with_seqs_2.phylo.xml
5 // nature12311-s3_cz_4.txt x
6
7 package org.forester.application;
8
9 import java.io.File;
10
11 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
12 import org.forester.io.writers.PhylogenyWriter;
13 import org.forester.phylogeny.Phylogeny;
14 import org.forester.phylogeny.PhylogenyNode;
15 import org.forester.phylogeny.data.Annotation;
16 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
17 import org.forester.phylogeny.factories.PhylogenyFactory;
18 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
19 import org.forester.util.BasicTable;
20 import org.forester.util.BasicTableParser;
21 import org.forester.util.CommandLineArguments;
22
23 public class decoratorX {
24
25     private static final int SEQ_NAME_COLUMN = 1;
26     private static final int SPECIES_COLUMN  = 2;
27     private static final int SEQ_COLUMN      = 3;
28     private static final int TARGET_COLUMN   = 4;
29
30     public static void main( final String args[] ) {
31         File intree = null;
32         File outtree1 = null;
33         File outtree2 = null;
34         File intable = null;
35         try {
36             CommandLineArguments cla = null;
37             cla = new CommandLineArguments( args );
38             intree = cla.getFile( 0 );
39             intable = cla.getFile( 1 );
40             outtree1 = cla.getFile( 2 );
41             outtree2 = cla.getFile( 3 );
42             if ( outtree1.exists() ) {
43                 System.out.println( outtree1 + " already exists" );
44                 System.exit( -1 );
45             }
46             if ( outtree2.exists() ) {
47                 System.out.println( outtree2 + " already exists" );
48                 System.exit( -1 );
49             }
50             final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
51             final PhyloXmlParser xml_parser = new PhyloXmlParser();
52             final Phylogeny phy = factory.create( intree, xml_parser )[ 0 ];
53             final BasicTable<String> t = BasicTableParser.parse( intable, '\t' );
54             //  System.out.println( t.toString() );
55             final PhylogenyNodeIterator it = phy.iteratorExternalForward();
56             int i = 0;
57             while ( it.hasNext() ) {
58                 final PhylogenyNode node = it.next();
59                 processNode( node, t );
60                 i++;
61             }
62             final PhylogenyWriter writer1 = new PhylogenyWriter();
63             writer1.toPhyloXML( outtree1, phy, 0 );
64             final PhylogenyNodeIterator it2 = phy.iteratorExternalForward();
65             while ( it2.hasNext() ) {
66                 final PhylogenyNode node = it2.next();
67                 processNode2( node, phy );
68             }
69             final PhylogenyWriter writer2 = new PhylogenyWriter();
70             writer2.toPhyloXML( outtree2, phy, 0 );
71         }
72         catch ( final Exception e ) {
73             System.out.println( e.getLocalizedMessage() );
74             System.exit( -1 );
75         }
76     }
77
78     private static void processNode( final PhylogenyNode node, final BasicTable<String> t ) throws Exception {
79         final String node_seq = node.getNodeData().getSequence().getMolecularSequence().toUpperCase();
80         boolean found = false;
81         String found_row = "";
82         String found_protein_name = "";
83         String found_species = "";
84         for( int row = 0; row < t.getNumberOfRows(); ++row ) {
85             final String table_seq = t.getValueAsString( SEQ_COLUMN, row ).toUpperCase();
86             if ( table_seq.contains( node_seq ) ) {
87                 if ( found ) {
88                     if ( !found_protein_name.equals( t.getValueAsString( SEQ_NAME_COLUMN, row ) )
89                             || !found_species.equals( t.getValueAsString( SPECIES_COLUMN, row ) ) ) {
90                         throw new Exception( "Sequence from node " + node + " is not unique: " + node_seq + "\n"
91                                 + "Already found in row " + found_row );
92                     }
93                 }
94                 else {
95                     found = true;
96                     found_row = t.getRowAsString( row, ", " );
97                     found_protein_name = t.getValueAsString( SEQ_NAME_COLUMN, row );
98                     found_species = t.getValueAsString( SPECIES_COLUMN, row );
99                 }
100                 final Annotation annotation = new Annotation( "target", t.getValueAsString( TARGET_COLUMN, row ) );
101                 node.getNodeData().getSequence().addAnnotation( annotation );
102                 System.out.println( node + "->" + annotation );
103             }
104         }
105         // if ( !found ) {
106         //     throw new Exception( "Sequence from node " + node + " not found: " + node_seq );
107         // }
108     }
109
110     private static void processNode2( final PhylogenyNode node, final Phylogeny t ) {
111         if ( ( node.getNodeData().getSequence().getAnnotations() == null )
112                 || node.getNodeData().getSequence().getAnnotations().isEmpty() ) {
113             t.deleteSubtree( node, true );
114         }
115     }
116 }