X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fapplication%2FdecoratorX.java;h=58a2b00e815280103713cccff30382c9d2a7e3d8;hb=327fc7c7a9a46cbe8983b0ca2a54cc6f45cd1695;hp=c05ce86ba4cff7d44a7c9d3f638b853e9d7b498d;hpb=a752511548a95937c3056e5a7e34e329ae193891;p=jalview.git diff --git a/forester/java/src/org/forester/application/decoratorX.java b/forester/java/src/org/forester/application/decoratorX.java index c05ce86..58a2b00 100644 --- a/forester/java/src/org/forester/application/decoratorX.java +++ b/forester/java/src/org/forester/application/decoratorX.java @@ -1,8 +1,8 @@ // java -Xmx2048m -cp // ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar -// org.forester.application.decorator2 +// org.forester.application.decoratorX // RRMa_ALL_plus_RRMa_ee3_50_hmmalign_05_40_fme_with_seqs_2.phylo.xml -// nature12311-s3_cz_4.txt x +// nature12311-s3_cz_4.txt x1 x2 package org.forester.application; @@ -22,64 +22,91 @@ import org.forester.util.CommandLineArguments; public class decoratorX { - private static final int SEQ_COLUMN = 3; - private static final int TARGET_COLUMN = 4; + private static final int SEQ_NAME_COLUMN = 1; + private static final int SPECIES_COLUMN = 2; + private static final int SEQ_COLUMN = 3; + private static final int TARGET_COLUMN = 4; public static void main( final String args[] ) { File intree = null; - File outtree = null; + File outtree1 = null; + File outtree2 = null; File intable = null; try { CommandLineArguments cla = null; cla = new CommandLineArguments( args ); intree = cla.getFile( 0 ); intable = cla.getFile( 1 ); - outtree = cla.getFile( 2 ); - if ( outtree.exists() ) { - System.out.println( outtree + " already exists" ); + outtree1 = cla.getFile( 2 ); + outtree2 = cla.getFile( 3 ); + if ( outtree1.exists() ) { + System.out.println( outtree1 + " already exists" ); + System.exit( -1 ); + } + if ( outtree2.exists() ) { + System.out.println( outtree2 + " already exists" ); System.exit( -1 ); } final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhyloXmlParser xml_parser = new PhyloXmlParser(); + final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParserXsdValidating(); final Phylogeny phy = factory.create( intree, xml_parser )[ 0 ]; final BasicTable t = BasicTableParser.parse( intable, '\t' ); - System.out.println( t.toString() ); - final PhylogenyNodeIterator it = phy.iteratorPostorder(); + final PhylogenyNodeIterator it = phy.iteratorExternalForward(); int i = 0; while ( it.hasNext() ) { final PhylogenyNode node = it.next(); - if ( node.isExternal() ) { - processNode( node, t ); - } + processNode( node, t ); i++; } - final PhylogenyWriter writer = new PhylogenyWriter(); - writer.toPhyloXML( outtree, phy, 0 ); + final PhylogenyWriter writer1 = new PhylogenyWriter(); + writer1.toPhyloXML( outtree1, phy, 0 ); + final PhylogenyNodeIterator it2 = phy.iteratorExternalForward(); + while ( it2.hasNext() ) { + final PhylogenyNode node = it2.next(); + processNode2( node, phy ); + } + final PhylogenyWriter writer2 = new PhylogenyWriter(); + writer2.toPhyloXML( outtree2, phy, 0 ); } catch ( final Exception e ) { System.out.println( e.getLocalizedMessage() ); - e.printStackTrace(); System.exit( -1 ); } } - private static void processNode( final PhylogenyNode node, BasicTable t ) throws Exception { - String node_seq = node.getNodeData().getSequence().getMolecularSequence().toUpperCase(); + private static void processNode( final PhylogenyNode node, final BasicTable t ) throws Exception { + final String node_seq = node.getNodeData().getSequence().getMolecularSequence().toUpperCase(); boolean found = false; - for( int col = 0; col < t.getNumberOfRows(); ++col ) { - String table_seq = t.getValueAsString( SEQ_COLUMN, col ).toUpperCase(); + String found_row = ""; + String found_protein_name = ""; + String found_species = ""; + for( int row = 0; row < t.getNumberOfRows(); ++row ) { + final String table_seq = t.getValueAsString( SEQ_COLUMN, row ).toUpperCase(); if ( table_seq.contains( node_seq ) ) { if ( found ) { - throw new Exception( "Sequence from node " + node + " is not unique: " + node_seq ); + if ( !found_protein_name.equals( t.getValueAsString( SEQ_NAME_COLUMN, row ) ) + || !found_species.equals( t.getValueAsString( SPECIES_COLUMN, row ) ) ) { + throw new Exception( "Sequence from node " + node + " is not unique: " + node_seq + "\n" + + "Already found in row " + found_row ); + } } - found = true; - Annotation annotation = new Annotation( "target:" + t.getValueAsString( TARGET_COLUMN, col ) ); + else { + found = true; + found_row = t.getRowAsString( row, ", " ); + found_protein_name = t.getValueAsString( SEQ_NAME_COLUMN, row ); + found_species = t.getValueAsString( SPECIES_COLUMN, row ); + } + final Annotation annotation = new Annotation( "target", t.getValueAsString( TARGET_COLUMN, row ) ); node.getNodeData().getSequence().addAnnotation( annotation ); System.out.println( node + "->" + annotation ); } } - // if ( !found ) { - // throw new Exception( "Sequence from node " + node + " not found: " + node_seq ); - // } + } + + private static void processNode2( final PhylogenyNode node, final Phylogeny t ) { + if ( ( node.getNodeData().getSequence().getAnnotations() == null ) + || node.getNodeData().getSequence().getAnnotations().isEmpty() ) { + t.deleteSubtree( node, true ); + } } }