forester/java/src/org/forester/application/decoratorX.java

   1 // java -Xmx2048m -cp
   2 // ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar
   3 // org.forester.application.decoratorX
   4 // RRMa_ALL_plus_RRMa_ee3_50_hmmalign_05_40_fme_with_seqs_2.phylo.xml
   5 // nature12311-s3_cz_4.txt x1 x2
   6
   7 package org.forester.application;
   8
   9 import java.io.File;
  10
  11 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
  12 import org.forester.io.writers.PhylogenyWriter;
  13 import org.forester.phylogeny.Phylogeny;
  14 import org.forester.phylogeny.PhylogenyNode;
  15 import org.forester.phylogeny.data.Annotation;
  16 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
  17 import org.forester.phylogeny.factories.PhylogenyFactory;
  18 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
  19 import org.forester.util.BasicTable;
  20 import org.forester.util.BasicTableParser;
  21 import org.forester.util.CommandLineArguments;
  22
  23 public class decoratorX {
  24
  25     private static final int SEQ_NAME_COLUMN = 1;
  26     private static final int SPECIES_COLUMN  = 2;
  27     private static final int SEQ_COLUMN      = 3;
  28     private static final int TARGET_COLUMN   = 4;
  29
  30     public static void main( final String args[] ) {
  31         File intree = null;
  32         File outtree1 = null;
  33         File outtree2 = null;
  34         File intable = null;
  35         try {
  36             CommandLineArguments cla = null;
  37             cla = new CommandLineArguments( args );
  38             intree = cla.getFile( 0 );
  39             intable = cla.getFile( 1 );
  40             outtree1 = cla.getFile( 2 );
  41             outtree2 = cla.getFile( 3 );
  42             if ( outtree1.exists() ) {
  43                 System.out.println( outtree1 + " already exists" );
  44                 System.exit( -1 );
  45             }
  46             if ( outtree2.exists() ) {
  47                 System.out.println( outtree2 + " already exists" );
  48                 System.exit( -1 );
  49             }
  50             final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
  51             final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParserXsdValidating();
  52             final Phylogeny phy = factory.create( intree, xml_parser )[ 0 ];
  53             final BasicTable<String> t = BasicTableParser.parse( intable, '\t' );
  54             final PhylogenyNodeIterator it = phy.iteratorExternalForward();
  55             int i = 0;
  56             while ( it.hasNext() ) {
  57                 final PhylogenyNode node = it.next();
  58                 processNode( node, t );
  59                 i++;
  60             }
  61             final PhylogenyWriter writer1 = new PhylogenyWriter();
  62             writer1.toPhyloXML( outtree1, phy, 0 );
  63             final PhylogenyNodeIterator it2 = phy.iteratorExternalForward();
  64             while ( it2.hasNext() ) {
  65                 final PhylogenyNode node = it2.next();
  66                 processNode2( node, phy );
  67             }
  68             final PhylogenyWriter writer2 = new PhylogenyWriter();
  69             writer2.toPhyloXML( outtree2, phy, 0 );
  70         }
  71         catch ( final Exception e ) {
  72             System.out.println( e.getLocalizedMessage() );
  73             System.exit( -1 );
  74         }
  75     }
  76
  77     private static void processNode( final PhylogenyNode node, final BasicTable<String> t ) throws Exception {
  78         final String node_seq = node.getNodeData().getSequence().getMolecularSequence().toUpperCase();
  79         boolean found = false;
  80         String found_row = "";
  81         String found_protein_name = "";
  82         String found_species = "";
  83         for( int row = 0; row < t.getNumberOfRows(); ++row ) {
  84             final String table_seq = t.getValueAsString( SEQ_COLUMN, row ).toUpperCase();
  85             if ( table_seq.contains( node_seq ) ) {
  86                 if ( found ) {
  87                     if ( !found_protein_name.equals( t.getValueAsString( SEQ_NAME_COLUMN, row ) )
  88                             || !found_species.equals( t.getValueAsString( SPECIES_COLUMN, row ) ) ) {
  89                         throw new Exception( "Sequence from node " + node + " is not unique: " + node_seq + "\n"
  90                                 + "Already found in row " + found_row );
  91                     }
  92                 }
  93                 else {
  94                     found = true;
  95                     found_row = t.getRowAsString( row, ", " );
  96                     found_protein_name = t.getValueAsString( SEQ_NAME_COLUMN, row );
  97                     found_species = t.getValueAsString( SPECIES_COLUMN, row );
  98                 }
  99                 final Annotation annotation = new Annotation( "target", t.getValueAsString( TARGET_COLUMN, row ) );
 100                 node.getNodeData().getSequence().addAnnotation( annotation );
 101                 System.out.println( node + "->" + annotation );
 102             }
 103         }
 104     }
 105
 106     private static void processNode2( final PhylogenyNode node, final Phylogeny t ) {
 107         if ( ( node.getNodeData().getSequence().getAnnotations() == null )
 108                 || node.getNodeData().getSequence().getAnnotations().isEmpty() ) {
 109             t.deleteSubtree( node, true );
 110         }
 111     }
 112 }