inprogress
[jalview.git] / forester / java / src / org / forester / application / decoratorX.java
index 916a4b7..58a2b00 100644 (file)
@@ -2,7 +2,7 @@
 // ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar
 // org.forester.application.decoratorX
 // RRMa_ALL_plus_RRMa_ee3_50_hmmalign_05_40_fme_with_seqs_2.phylo.xml
-// nature12311-s3_cz_4.txt x
+// nature12311-s3_cz_4.txt x1 x2
 
 package org.forester.application;
 
@@ -22,28 +22,35 @@ import org.forester.util.CommandLineArguments;
 
 public class decoratorX {
 
-    private static final int SEQ_COLUMN    = 3;
-    private static final int TARGET_COLUMN = 4;
+    private static final int SEQ_NAME_COLUMN = 1;
+    private static final int SPECIES_COLUMN  = 2;
+    private static final int SEQ_COLUMN      = 3;
+    private static final int TARGET_COLUMN   = 4;
 
     public static void main( final String args[] ) {
         File intree = null;
-        File outtree = null;
+        File outtree1 = null;
+        File outtree2 = null;
         File intable = null;
         try {
             CommandLineArguments cla = null;
             cla = new CommandLineArguments( args );
             intree = cla.getFile( 0 );
             intable = cla.getFile( 1 );
-            outtree = cla.getFile( 2 );
-            if ( outtree.exists() ) {
-                System.out.println( outtree + " already exists" );
+            outtree1 = cla.getFile( 2 );
+            outtree2 = cla.getFile( 3 );
+            if ( outtree1.exists() ) {
+                System.out.println( outtree1 + " already exists" );
+                System.exit( -1 );
+            }
+            if ( outtree2.exists() ) {
+                System.out.println( outtree2 + " already exists" );
                 System.exit( -1 );
             }
             final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
-            final PhyloXmlParser xml_parser = new PhyloXmlParser();
+            final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParserXsdValidating();
             final Phylogeny phy = factory.create( intree, xml_parser )[ 0 ];
             final BasicTable<String> t = BasicTableParser.parse( intable, '\t' );
-            //  System.out.println( t.toString() );
             final PhylogenyNodeIterator it = phy.iteratorExternalForward();
             int i = 0;
             while ( it.hasNext() ) {
@@ -51,17 +58,18 @@ public class decoratorX {
                 processNode( node, t );
                 i++;
             }
+            final PhylogenyWriter writer1 = new PhylogenyWriter();
+            writer1.toPhyloXML( outtree1, phy, 0 );
             final PhylogenyNodeIterator it2 = phy.iteratorExternalForward();
             while ( it2.hasNext() ) {
                 final PhylogenyNode node = it2.next();
                 processNode2( node, phy );
             }
-            final PhylogenyWriter writer = new PhylogenyWriter();
-            writer.toPhyloXML( outtree, phy, 0 );
+            final PhylogenyWriter writer2 = new PhylogenyWriter();
+            writer2.toPhyloXML( outtree2, phy, 0 );
         }
         catch ( final Exception e ) {
             System.out.println( e.getLocalizedMessage() );
-            e.printStackTrace();
             System.exit( -1 );
         }
     }
@@ -69,21 +77,30 @@ public class decoratorX {
     private static void processNode( final PhylogenyNode node, final BasicTable<String> t ) throws Exception {
         final String node_seq = node.getNodeData().getSequence().getMolecularSequence().toUpperCase();
         boolean found = false;
-        for( int col = 0; col < t.getNumberOfRows(); ++col ) {
-            final String table_seq = t.getValueAsString( SEQ_COLUMN, col ).toUpperCase();
+        String found_row = "";
+        String found_protein_name = "";
+        String found_species = "";
+        for( int row = 0; row < t.getNumberOfRows(); ++row ) {
+            final String table_seq = t.getValueAsString( SEQ_COLUMN, row ).toUpperCase();
             if ( table_seq.contains( node_seq ) ) {
                 if ( found ) {
-                    // throw new Exception( "Sequence from node " + node + " is not unique: " + node_seq );
+                    if ( !found_protein_name.equals( t.getValueAsString( SEQ_NAME_COLUMN, row ) )
+                            || !found_species.equals( t.getValueAsString( SPECIES_COLUMN, row ) ) ) {
+                        throw new Exception( "Sequence from node " + node + " is not unique: " + node_seq + "\n"
+                                + "Already found in row " + found_row );
+                    }
+                }
+                else {
+                    found = true;
+                    found_row = t.getRowAsString( row, ", " );
+                    found_protein_name = t.getValueAsString( SEQ_NAME_COLUMN, row );
+                    found_species = t.getValueAsString( SPECIES_COLUMN, row );
                 }
-                found = true;
-                final Annotation annotation = new Annotation( "target", t.getValueAsString( TARGET_COLUMN, col ) );
+                final Annotation annotation = new Annotation( "target", t.getValueAsString( TARGET_COLUMN, row ) );
                 node.getNodeData().getSequence().addAnnotation( annotation );
                 System.out.println( node + "->" + annotation );
             }
         }
-        // if ( !found ) {
-        //     throw new Exception( "Sequence from node " + node + " not found: " + node_seq );
-        // }
     }
 
     private static void processNode2( final PhylogenyNode node, final Phylogeny t ) {