From a6973e954d3086547591afe91f6ea76624eb45c8 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Thu, 22 Nov 2012 03:25:18 +0000 Subject: [PATCH] "rio" work --- .../src/org/forester/datastructures/IntMatrix.java | 66 ++++++++++++++++++++ .../org/forester/phylogeny/PhylogenyMethods.java | 2 +- forester/java/src/org/forester/sdi/RIO.java | 55 +++++++++++++++- 3 files changed, 120 insertions(+), 3 deletions(-) create mode 100644 forester/java/src/org/forester/datastructures/IntMatrix.java diff --git a/forester/java/src/org/forester/datastructures/IntMatrix.java b/forester/java/src/org/forester/datastructures/IntMatrix.java new file mode 100644 index 0000000..70415b9 --- /dev/null +++ b/forester/java/src/org/forester/datastructures/IntMatrix.java @@ -0,0 +1,66 @@ + +package org.forester.datastructures; + +import java.util.List; + +import org.forester.util.ForesterUtil; + +public final class IntMatrix { + + private final int _data[][]; + private final String _labels[]; + + public IntMatrix( final int size ) { + _data = new int[ size ][ size ]; + _labels = new String[ size ]; + } + + public IntMatrix( final List labels ) { + final int size = labels.size(); + _data = new int[ size ][ size ]; + _labels = new String[ size ]; + for( int i = 0; i < size; ++i ) { + setLabel( i, labels.get( i ) ); + } + } + + final public int get( final int x, final int y ) { + return _data[ x ][ y ]; + } + + final public void set( final int x, final int y, final int value ) { + _data[ x ][ y ] = value; + } + + final public String getLabel( final int x ) { + return _labels[ x ]; + } + + final public void setLabel( final int x, final String label ) { + if ( label == null ) { + throw new IllegalArgumentException( "matrix label must not be null" ); + } + _labels[ x ] = label; + } + + final public int size() { + return _labels.length; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + for( int x = 0; x < size(); ++x ) { + if ( getLabel( x ) != null ) { + sb.append( getLabel( x ) ); + sb.append( "\t" ); + } + for( int y = 0; y < size(); ++y ) { + sb.append( get( x, y ) ); + sb.append( "\t" ); + } + sb.append( ForesterUtil.LINE_SEPARATOR ); + } + return sb.toString(); + } +} diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java index 5f0dd30..48fb802 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java @@ -201,7 +201,7 @@ public class PhylogenyMethods { return nodes; } - public boolean isAreOrthologous( final PhylogenyNode node1, final PhylogenyNode node2 ) { + public static boolean isAreOrthologous( final PhylogenyNode node1, final PhylogenyNode node2 ) { return !obtainLCA( node1, node2 ).isDuplication(); } diff --git a/forester/java/src/org/forester/sdi/RIO.java b/forester/java/src/org/forester/sdi/RIO.java index dcee251..47a88b8 100644 --- a/forester/java/src/org/forester/sdi/RIO.java +++ b/forester/java/src/org/forester/sdi/RIO.java @@ -32,8 +32,11 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.List; +import java.util.Set; +import org.forester.datastructures.IntMatrix; import org.forester.evoinference.matrix.distance.DistanceMatrix; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.SymmetricalDistanceMatrixParser; @@ -74,6 +77,49 @@ public final class RIO { reset(); } + public IntMatrix calculateOrthologTable( Phylogeny[] gene_trees ) { + List labels = new ArrayList(); + Set labels_set = new HashSet(); + String label; + for( PhylogenyNode n : gene_trees[ 0 ].getExternalNodes() ) { + if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getName() ) ) { + label = n.getNodeData().getSequence().getName(); + } + else if ( n.getNodeData().isHasSequence() + && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getSymbol() ) ) { + label = n.getNodeData().getSequence().getSymbol(); + } + else if ( !ForesterUtil.isEmpty( n.getName() ) ) { + label = n.getName(); + } + else { + throw new IllegalArgumentException( "node " + n + " has no appropriate label" ); + } + if ( labels_set.contains( label ) ) { + throw new IllegalArgumentException( "label " + label + " is not unique" ); + } + labels_set.add( label ); + labels.add( label ); + } + IntMatrix m = new IntMatrix( labels ); + int counter = 0; + for( Phylogeny gt : gene_trees ) { + System.out.println( counter ); + counter++; + for( int x = 0; x < m.size(); ++x ) { + PhylogenyNode nx = gt.getNode( m.getLabel( x ) ); + for( int y = 0; y < m.size(); ++y ) { + PhylogenyNode ny = gt.getNode( m.getLabel( y ) ); + if ( PhylogenyMethods.isAreOrthologous( nx, ny ) ) { + m.set( x, y, m.get( x, y ) + 1 ); + //System.out.println( x + " " + y ); + } + } + } + } + return m; + } + /** * Returns the numbers of trees analyzed. * @@ -268,13 +314,17 @@ public final class RIO { _sn_hash_maps.put( query, new HashMap( _seq_names.size() ) ); // Go through all gene trees in the file. final Phylogeny[] gene_trees = factory.create( gene_trees_file, p ); + Phylogeny[] assigned_trees = new Phylogeny[ gene_trees.length ]; + int c = 0; for( final Phylogeny gt : gene_trees ) { bs++; // Removes from gene_tree all species not found in species_tree. PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gt ); - inferOrthologsHelper( gt, species_tree, query ); + assigned_trees[ c++ ] = inferOrthologsHelper( gt, species_tree, query ); // System.out.println( bs ); } + IntMatrix m = calculateOrthologTable( assigned_trees ); + System.out.println( m.toString() ); setBootstraps( bs ); if ( RIO.TIME ) { _time = ( System.currentTimeMillis() - _time ); @@ -297,7 +347,7 @@ public final class RIO { // Helper method which performs the actual ortholog inference for // the external node with seqname query. - private void inferOrthologsHelper( final Phylogeny gene_tree, final Phylogeny species_tree, final String query ) + private Phylogeny inferOrthologsHelper( final Phylogeny gene_tree, final Phylogeny species_tree, final String query ) throws SDIException { Phylogeny assigned_tree = null; List nodes = null; @@ -331,6 +381,7 @@ public final class RIO { updateHash( _sn_hash_maps, query, subtree_neighbors ); ultra_paralogs = PhylogenyMethods.getUltraParalogousNodes( query_node ); updateHash( _up_hash_maps, query, ultra_paralogs ); + return assigned_tree; } /** -- 1.7.10.2