From be3291132f4c794f3abfd8e8ff7b1bbcefc49198 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Mon, 26 Nov 2012 22:30:58 +0000 Subject: [PATCH] "rio" work --- .../java/src/org/forester/application/rio.java | 227 +-------- forester/java/src/org/forester/sdi/RIO.java | 487 +++++--------------- forester/java/src/org/forester/sdi/Tuplet.java | 169 ------- 3 files changed, 112 insertions(+), 771 deletions(-) delete mode 100644 forester/java/src/org/forester/sdi/Tuplet.java diff --git a/forester/java/src/org/forester/application/rio.java b/forester/java/src/org/forester/application/rio.java index de4ed75..3815c6d 100644 --- a/forester/java/src/org/forester/application/rio.java +++ b/forester/java/src/org/forester/application/rio.java @@ -29,10 +29,8 @@ package org.forester.application; import java.io.File; import java.io.FileWriter; -import java.io.IOException; import java.io.PrintWriter; import java.util.ArrayList; -import java.util.HashMap; import java.util.Vector; import org.forester.io.parsers.phyloxml.PhyloXmlParser; @@ -42,10 +40,7 @@ import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PreorderTreeIterator; -import org.forester.sdi.DistanceCalculator; import org.forester.sdi.RIO; -import org.forester.sdi.SDIException; -import org.forester.sdi.SDIR; import org.forester.util.ForesterUtil; public class rio { @@ -134,138 +129,24 @@ public class rio { System.exit( -1 ); } - // Uses DistanceCalculator to calculate distances. - private final static StringBuffer getDistances( final File tree_file_for_dist_val, - final File outfile, - final Phylogeny species_tree, - final String seq_name, - final ArrayList al_ortholog_names_for_dc, - final HashMap ortholog_hashmap, - final HashMap super_ortholog_hashmap, - final int warn_more_than_one_ortho, - final int warn_no_orthos, - final double warn_one_ortho, - final int bootstraps, - final double t_orthologs_dc ) throws IOException, SDIException { - Phylogeny consensus_tree = null; - Phylogeny - // to be a consensus tree. - assigned_cons_tree = null; - final SDIR sdiunrooted = new SDIR(); - final ArrayList al_ortholog_nodes = new ArrayList(); - double m = 0.0; - double sd = 0.0; - double d = 0.0; - int n = 0; - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - consensus_tree = factory.create( tree_file_for_dist_val, new PhyloXmlParser() )[ 0 ]; - PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, consensus_tree ); - assigned_cons_tree = sdiunrooted.infer( consensus_tree, - species_tree, - rio.MINIMIZE_COST, - rio.MINIMIZE_DUPS, - rio.MINIMIZE_HEIGHT, - true, - 1 )[ 0 ]; - final DistanceCalculator dc = new DistanceCalculator(); - final StringBuffer sb = new StringBuffer(); - sb.append( "Given the threshold for distance calculations (" + ForesterUtil.roundToInt( t_orthologs_dc ) - + "): " ); - // No orthologs. - if ( al_ortholog_names_for_dc.size() == 0 ) { - dc.setTree( assigned_cons_tree ); - // Remark. Calculation of mean and sd _does_ include the node - // with seq_name. - m = dc.getMean(); - sd = dc.getStandardDeviation(); - d = dc.getDistanceToRoot( seq_name ); - n = dc.getN(); - sb.append( "No sequence is considered orthologous to query." - + "\ndistance of query to root = " + ForesterUtil.FORMATTER_06.format( d ) - + "\nmean of distances (for all sequences) to root = " + ForesterUtil.FORMATTER_06.format( m ) - + "\nsd of distances (for all sequences) to root = " + ForesterUtil.FORMATTER_06.format( sd ) - + "\nn (sum of sequences in alignment plus query) = " + n ); - if ( !( ( ( m - ( warn_no_orthos * sd ) ) < d ) && ( ( m + ( warn_no_orthos * sd ) ) > d ) ) ) { - sb.append( "\nWARNING: distance of query to root is outside of mean+/-" + warn_no_orthos + "*sd!" ); - } - } - // One ortholog. - else if ( al_ortholog_names_for_dc.size() == 1 ) { - final String name_of_ortholog = al_ortholog_names_for_dc.get( 0 ); - al_ortholog_nodes.add( assigned_cons_tree.getNode( name_of_ortholog ) ); - al_ortholog_nodes.add( assigned_cons_tree.getNode( seq_name ) ); - dc.setTreeAndExtNodes( assigned_cons_tree, al_ortholog_nodes ); - // Remark. Calculation of mean _does_ include the node - // with seq_name. - d = dc.getDistanceToLCA( seq_name ); - final double d_o = dc.getDistanceToLCA( name_of_ortholog ); - sb.append( "One sequence is considered orthologous to query." + "\nLCA is LCA of query and its ortholog." - + "\ndistance of query to LCA = " + ForesterUtil.FORMATTER_06.format( d ) - + "\ndistance of ortholog to LCA = " + ForesterUtil.FORMATTER_06.format( d_o ) ); - if ( ( d_o > 0.0 ) - && ( d > 0.0 ) - && ( ( ( d_o >= d ) && ( ( d_o / d ) > warn_one_ortho ) ) || ( ( d_o < d ) && ( ( d / d_o ) > warn_one_ortho ) ) ) ) { - sb.append( "\nWARNING: Ratio of distances to LCA is greater than " + warn_one_ortho + "!" ); - } - else if ( ( ( d_o == 0.0 ) || ( d == 0.0 ) ) && ( ( d_o != 0.0 ) || ( d != 0.0 ) ) ) { - sb.append( "\nWARNING: Ratio could not be calculated, " + " one distance is 0.0!" ); - } - } - // More than one ortholog. - else { - for( int i = 0; i < al_ortholog_names_for_dc.size(); ++i ) { - al_ortholog_nodes.add( assigned_cons_tree.getNodeViaSequenceName( al_ortholog_names_for_dc.get( i ) ) ); - } - al_ortholog_nodes.add( assigned_cons_tree.getNodesViaSequenceName( seq_name ).get( 0 ) ); - dc.setTreeAndExtNodes( assigned_cons_tree, al_ortholog_nodes ); - // Remark. Calculation of mean and sd _does_ include the node - // with seq_name. - m = dc.getMean(); - sd = dc.getStandardDeviation(); - d = dc.getDistanceToLCA( seq_name ); - n = dc.getN(); - sb.append( "More than one sequence is considered orthologous to query." - + "\nLCA is LCA of query and its orthologs." - + "\ndistance of query to LCA = " - + ForesterUtil.FORMATTER_06.format( d ) - + "\nmean of distances (for query and its orthologs) to LCA = " - + ForesterUtil.FORMATTER_06.format( m ) - + "\nsd of distances (for query and its orthologs) to LCA = " - + ForesterUtil.FORMATTER_06.format( sd ) - + "\nn (sum of orthologs plus query) = " + n ); - if ( !( ( ( m - ( warn_more_than_one_ortho * sd ) ) < d ) && ( ( m + ( warn_more_than_one_ortho * sd ) ) > d ) ) ) { - sb.append( "\n!WARNING: distance of query to LCA is outside of mean+/-" + warn_more_than_one_ortho - + "*sd!" ); - } - } - return sb; - } - public static void main( final String[] args ) { ForesterUtil.printProgramInformation( PRG_NAME, PRG_VERSION, PRG_DATE, E_MAIL, WWW ); File species_tree_file = null; File multiple_trees_file = null; File outfile = null; - File distance_matrix_file = null; - File tree_file_for_dist_val = null; - File tree_file_for_avg_bs = null; String seq_name = ""; String arg = ""; boolean output_ultraparalogs = false; ArrayList orthologs_al_for_dc = null; double t_orthologs = 0.0; - double t_sn = 0.0; double t_orthologs_dc = 0.0; + double threshold_ultra_paralogs = 0.0; double[] bs_mean_sd = null; int sort = 13; Phylogeny species_tree = null; RIO rio_instance = null; PrintWriter out = null; long time = 0; - int warn_no_orthos = WARN_NO_ORTHOS_DEFAULT; - int warn_more_than_one_ortho = WARN_MORE_THAN_ONE_ORTHO_DEFAULT; - double warn_one_ortho = WARN_ONE_ORTHO_DEFAULT; - double threshold_ultra_paralogs = THRESHOLD_ULTRA_PARALOGS_DEFAULT; if ( args.length < 2 ) { printHelp(); System.exit( 0 ); @@ -296,15 +177,6 @@ public class rio { case 'O': outfile = new File( arg ); break; - case 'D': - distance_matrix_file = new File( arg ); - break; - case 'T': - tree_file_for_dist_val = new File( arg ); - break; - case 't': - tree_file_for_avg_bs = new File( arg ); - break; case 'p': output_ultraparalogs = true; break; @@ -317,24 +189,9 @@ public class rio { case 'L': t_orthologs = Double.parseDouble( arg ); break; - case 'B': - t_sn = Double.parseDouble( arg ); - break; - case 'U': - t_orthologs_dc = Double.parseDouble( arg ); - break; case 'v': threshold_ultra_paralogs = Double.parseDouble( arg ); break; - case 'X': - warn_more_than_one_ortho = Integer.parseInt( arg ); - break; - case 'Y': - warn_no_orthos = Integer.parseInt( arg ); - break; - case 'Z': - warn_one_ortho = Double.parseDouble( arg ); - break; default: errorInCommandLine(); } @@ -347,10 +204,7 @@ public class rio { || ( outfile == null ) ) { errorInCommandLine(); } - if ( ( sort < 0 ) || ( sort > 17 ) ) { - errorInCommandLine(); - } - if ( ( sort > 2 ) && ( distance_matrix_file == null ) ) { + if ( ( sort < 0 ) || ( sort > 2 ) ) { errorInCommandLine(); } if ( VERBOSE ) { @@ -358,30 +212,12 @@ public class rio { System.out.println( "Seq name: " + seq_name ); System.out.println( "Species tree file: " + species_tree_file ); System.out.println( "Outfile: " + outfile ); - if ( distance_matrix_file != null ) { - System.out.println( "Distance matrix file: " + distance_matrix_file ); - } - if ( tree_file_for_dist_val != null ) { - if ( tree_file_for_avg_bs == null ) { - System.out.println( "Phy to read dists and calc mean support from: " + tree_file_for_dist_val ); - } - else { - System.out.println( "Phylogeny to read dist values from: " + tree_file_for_dist_val ); - } - } - if ( tree_file_for_avg_bs != null ) { - System.out.println( "Phylogeny to calc mean bootstrap from: " + tree_file_for_avg_bs ); - } System.out.println( "Sort: " + sort ); System.out.println( "Threshold orthologs: " + t_orthologs ); - System.out.println( "Threshold subtree neighborings: " + t_sn ); System.out.println( "Threshold orthologs for distance calc.: " + t_orthologs_dc ); if ( output_ultraparalogs ) { System.out.println( "Threshold ultra paralogs: " + threshold_ultra_paralogs ); } - System.out.println( "More than one ortholog sd diff: " + warn_more_than_one_ortho ); - System.out.println( "No orthologs sd diff: " + warn_no_orthos ); - System.out.println( "One ortholog factor : " + warn_one_ortho + "\n" ); } if ( TIME && VERBOSE ) { time = System.currentTimeMillis(); @@ -405,51 +241,15 @@ public class rio { rio_instance = new RIO(); final StringBuffer output = new StringBuffer(); try { - if ( distance_matrix_file != null ) { - rio_instance.readDistanceMatrix( distance_matrix_file ); - } rio_instance.inferOrthologs( multiple_trees_file, species_tree.copy(), seq_name ); - output.append( rio_instance.inferredOrthologsToString( seq_name, sort, t_orthologs, t_sn ) ); - if ( tree_file_for_dist_val != null ) { - orthologs_al_for_dc = rio_instance.inferredOrthologsToArrayList( seq_name, t_orthologs_dc ); - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - if ( tree_file_for_avg_bs != null ) { - final Phylogeny p = factory.create( tree_file_for_avg_bs, new PhyloXmlParser() )[ 0 ]; - bs_mean_sd = calculateMeanBoostrapValue( p ); - } - else { - final Phylogeny p = factory.create( tree_file_for_dist_val, new PhyloXmlParser() )[ 0 ]; - bs_mean_sd = calculateMeanBoostrapValue( p ); - } - if ( ( bs_mean_sd != null ) && ( bs_mean_sd.length == 2 ) ) { - final double bs_mean = bs_mean_sd[ 0 ]; - final double bs_sd = bs_mean_sd[ 1 ]; - output.append( "\n\nMean bootstrap value of consensus tree (sd): " - + ForesterUtil.roundToInt( ( bs_mean * 100.0 ) / rio_instance.getBootstraps() ) + "% (+/-" - + ForesterUtil.roundToInt( ( bs_sd * 100.0 ) / rio_instance.getBootstraps() ) + "%)\n" ); - } - output.append( "\n\nDistance values:\n" ); - output.append( getDistances( tree_file_for_dist_val, - outfile, - species_tree, - seq_name, - orthologs_al_for_dc, - rio_instance.getInferredOrthologs( seq_name ), - rio_instance.getInferredSuperOrthologs( seq_name ), - warn_more_than_one_ortho, - warn_no_orthos, - warn_one_ortho, - rio_instance.getBootstraps(), - t_orthologs_dc ) ); - } + output.append( rio_instance.inferredOrthologsToString( seq_name, sort, t_orthologs ) ); if ( output_ultraparalogs ) { output.append( "\n\nUltra paralogs:\n" ); - output.append( rio_instance - .inferredUltraParalogsToString( seq_name, sort > 2, threshold_ultra_paralogs ) ); + output.append( rio_instance.inferredUltraParalogsToString( seq_name, threshold_ultra_paralogs ) ); } output.append( "\n\nSort priority: " + RIO.getOrder( sort ) ); output.append( "\nExt nodes : " + rio_instance.getExtNodesOfAnalyzedGeneTrees() ); - output.append( "\nSamples : " + rio_instance.getBootstraps() + "\n" ); + output.append( "\nSamples : " + rio_instance.getNumberOfSamples() + "\n" ); out = new PrintWriter( new FileWriter( outfile ), true ); } catch ( final Exception e ) { @@ -473,25 +273,8 @@ public class rio { System.out.println( "N= (String) Query sequence name (mandatory)" ); System.out.println( "S= (String) Species tree file (mandatory)" ); System.out.println( "O= (String) Output file name -- overwritten without warning! (mandatory)" ); - System.out.println( "D= (String) Distance matrix file for pairwise distances" ); - System.out.println( "T= (String) Phylogeny file for distances of query to LCA" ); - System.out.println( " of orthologs and for mean bootstrap value (if t= is not used)," ); - System.out.println( " must be binary )" ); - System.out.println( "t= (String) Phylogeny file for mean bootstrap value (if this option is used," ); - System.out.println( " the mean bootstrap value is not calculated from the tree read in" ); - System.out.println( " with T=), not necessary binary" ); - System.out.println( "p To output ultra paralogs" ); System.out.println( "P= (int) Sort priority" ); System.out.println( "L= (double) Threshold orthologs for output" ); - System.out.println( "U= (double) Threshold orthologs for distance calculation" ); - System.out.println( "X= (int) More than one ortholog: " ); - System.out.println( " numbers of sd the dist. to LCA has to differ from mean to generate a warning" ); - System.out.println( "Y= (int) No orthologs:" ); - System.out.println( " Numbers of sd the dist to root has to differ from mean to generate a warning" ); - System.out.println( "Z= (double) One ortholog:" ); - System.out.println( " threshold for factor between the two distances to their LCA (larger/smaller)" ); - System.out.println( " to generate a warning" ); - System.out.println(); System.out.println( " Sort priority (\"P=\"):" ); System.out.println( RIO.getOrderHelp().toString() ); System.out.println(); diff --git a/forester/java/src/org/forester/sdi/RIO.java b/forester/java/src/org/forester/sdi/RIO.java index 0258db7..78e927a 100644 --- a/forester/java/src/org/forester/sdi/RIO.java +++ b/forester/java/src/org/forester/sdi/RIO.java @@ -37,9 +37,7 @@ import java.util.List; import java.util.Set; import org.forester.datastructures.IntMatrix; -import org.forester.evoinference.matrix.distance.DistanceMatrix; import org.forester.io.parsers.PhylogenyParser; -import org.forester.io.parsers.SymmetricalDistanceMatrixParser; import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; @@ -62,11 +60,8 @@ public final class RIO { private HashMap> _o_hash_maps; private HashMap> _so_hash_maps; private HashMap> _up_hash_maps; - private HashMap> _sn_hash_maps; // HashMap of HashMaps - private DistanceMatrix _m; - private HashMap _l; private List _seq_names; - private int _bootstraps; + private int _samples; private int _ext_nodes_; private long _time; @@ -121,13 +116,9 @@ public final class RIO { return m; } - /** - * Returns the numbers of trees analyzed. - * - * @return the numbers of trees analyzed - */ - public final int getBootstraps() { - return _bootstraps; + + public final int getNumberOfSamples() { + return _samples; } // Helper method for inferredOrthologsToString. @@ -138,37 +129,7 @@ public final class RIO { return 0.0; } final int i = h.get( name ); - return ( ( i * 100.0 ) / getBootstraps() ); - } - - /** - * Returns the distance to a sequences/taxa after a distance list file has - * been read in with readDistanceList(File). Throws an exception if name is - * not found or if no list has been read in. - * - * @param name - * a sequence name - */ - public final double getDistance( String name ) { - double distance = 0.0; - name = name.trim(); - if ( _l == null ) { - throw new RuntimeException( "Distance list has probably not been read in (successfully)." ); - } - if ( _l.get( name ) == null ) { - throw new IllegalArgumentException( name + " not found." ); - } - distance = ( _l.get( name ) ).doubleValue(); - return distance; - } - - public final double getDistance( final String name1, final String name2 ) { - try { - return _m.getValue( _m.getIndex( name1 ), _m.getIndex( name2 ) ); - } - catch ( final Exception e ) { - return 1; - } + return ( ( i * 100.0 ) / getNumberOfSamples() ); } /** @@ -200,13 +161,6 @@ public final class RIO { return _o_hash_maps.get( seq_name ); } - private final HashMap getInferredSubtreeNeighbors( final String seq_name ) { - if ( _sn_hash_maps == null ) { - return null; - } - return _sn_hash_maps.get( seq_name ); - } - /** * Returns a HashMap containing the inferred "super orthologs" of the * external gene tree node with the sequence name seq_name. Sequence names @@ -308,11 +262,9 @@ public final class RIO { _o_hash_maps = new HashMap>(); _so_hash_maps = new HashMap>(); _up_hash_maps = new HashMap>(); - _sn_hash_maps = new HashMap>(); _o_hash_maps.put( query, new HashMap( _seq_names.size() ) ); _so_hash_maps.put( query, new HashMap( _seq_names.size() ) ); _up_hash_maps.put( query, new HashMap( _seq_names.size() ) ); - _sn_hash_maps.put( query, new HashMap( _seq_names.size() ) ); // Go through all gene trees in the file. final Phylogeny[] gene_trees = factory.create( gene_trees_file, p ); final Phylogeny[] assigned_trees = new Phylogeny[ gene_trees.length ]; @@ -326,7 +278,7 @@ public final class RIO { } final IntMatrix m = calculateOrthologTable( assigned_trees ); System.out.println( m.toString() ); - setBootstraps( bs ); + setNumberOfSamples( bs ); if ( RIO.TIME ) { _time = ( System.currentTimeMillis() - _time ); } @@ -356,7 +308,6 @@ public final class RIO { List orthologs = null; List super_orthologs = null; List ultra_paralogs = null; - List subtree_neighbors = null; assigned_tree = sdiunrooted.infer( gene_tree, species_tree, RIO.ROOT_BY_MINIMIZING_MAPPING_COST, @@ -377,8 +328,6 @@ public final class RIO { updateHash( _o_hash_maps, query, orthologs ); super_orthologs = PhylogenyMethods.getSuperOrthologousNodes( query_node ); updateHash( _so_hash_maps, query, super_orthologs ); - subtree_neighbors = getSubtreeNeighbors( query_node, 2 ); - updateHash( _sn_hash_maps, query, subtree_neighbors ); ultra_paralogs = PhylogenyMethods.getUltraParalogousNodes( query_node ); updateHash( _up_hash_maps, query, ultra_paralogs ); return assigned_tree; @@ -486,27 +435,20 @@ public final class RIO { * @see #inferOrthologs(File,Phylogeny) * @see #getOrder(int) */ - public StringBuffer inferredOrthologsToString( final String query_name, - int sort, - double threshold_orthologs, - double threshold_subtreeneighborings ) { + public StringBuffer inferredOrthologsToString( final String query_name, int sort, double threshold_orthologs ) { HashMap o_hashmap = null; HashMap s_hashmap = null; - HashMap n_hashmap = null; String name = ""; - double o = 0.0, // Orthologs. - s = 0.0, // Super orthologs. - sn = 0.0, // Subtree neighbors. - value1 = 0.0, value2 = 0.0, value3 = 0.0, value4 = 0.0, d = 0.0; - final ArrayList nv = new ArrayList(); - if ( ( _o_hash_maps == null ) || ( _so_hash_maps == null ) || ( _sn_hash_maps == null ) ) { - throw new RuntimeException( "Orthologs have not been calculated (successfully)" ); + double o = 0.0; // Orthologs. + double s = 0.0; // Super orthologs. + double value1 = 0.0; + double value2 = 0.0; + final ArrayList nv = new ArrayList(); + if ( ( _o_hash_maps == null ) || ( _so_hash_maps == null ) ) { + throw new RuntimeException( "orthologs have not been calculated (successfully)" ); } - if ( ( sort < 0 ) || ( sort > 17 ) ) { - sort = 12; - } - if ( ( sort > 2 ) && ( _m == null ) && ( _l == null ) ) { - throw new RuntimeException( "Distance list or matrix have not been read in (successfully)" ); + if ( ( sort < 0 ) || ( sort > 2 ) ) { + sort = 1; } if ( threshold_orthologs < 0.0 ) { threshold_orthologs = 0.0; @@ -514,16 +456,9 @@ public final class RIO { else if ( threshold_orthologs > 100.0 ) { threshold_orthologs = 100.0; } - if ( threshold_subtreeneighborings < 0.0 ) { - threshold_subtreeneighborings = 0.0; - } - else if ( threshold_subtreeneighborings > 100.0 ) { - threshold_subtreeneighborings = 100.0; - } o_hashmap = getInferredOrthologs( query_name ); s_hashmap = getInferredSuperOrthologs( query_name ); - n_hashmap = getInferredSubtreeNeighbors( query_name ); - if ( ( o_hashmap == null ) || ( s_hashmap == null ) || ( n_hashmap == null ) ) { + if ( ( o_hashmap == null ) || ( s_hashmap == null ) ) { throw new RuntimeException( "Orthologs for " + query_name + " were not established" ); } final StringBuffer orthologs = new StringBuffer(); @@ -537,92 +472,33 @@ public final class RIO { if ( o < threshold_orthologs ) { continue I; } - sn = getBootstrapValueFromHash( n_hashmap, name ); - if ( sn < threshold_subtreeneighborings ) { - continue I; - } s = getBootstrapValueFromHash( s_hashmap, name ); - if ( sort >= 3 ) { - if ( _m != null ) { - d = getDistance( query_name, name ); - } - else { - d = getDistance( name ); - } - } switch ( sort ) { case 0: - nv.add( new Tuplet( name, o, 5 ) ); + nv.add( new ResultLine( name, o, 5 ) ); break; case 1: - nv.add( new Tuplet( name, o, s, 5 ) ); + nv.add( new ResultLine( name, o, s, 5 ) ); break; case 2: - nv.add( new Tuplet( name, s, o, 5 ) ); - break; - case 3: - nv.add( new Tuplet( name, o, d, 1 ) ); - break; - case 4: - nv.add( new Tuplet( name, d, o, 0 ) ); - break; - case 5: - nv.add( new Tuplet( name, o, s, d, 2 ) ); - break; - case 6: - nv.add( new Tuplet( name, o, d, s, 1 ) ); - break; - case 7: - nv.add( new Tuplet( name, s, o, d, 2 ) ); - break; - case 8: - nv.add( new Tuplet( name, s, d, o, 1 ) ); - break; - case 9: - nv.add( new Tuplet( name, d, o, s, 0 ) ); - break; - case 10: - nv.add( new Tuplet( name, d, s, o, 0 ) ); - break; - case 11: - nv.add( new Tuplet( name, o, sn, d, 2 ) ); - break; - case 12: - nv.add( new Tuplet( name, o, sn, s, d, 3 ) ); - break; - case 13: - nv.add( new Tuplet( name, o, s, sn, d, 3 ) ); - break; - case 14: - nv.add( new Tuplet( name, sn, o, s, d, 3 ) ); - break; - case 15: - nv.add( new Tuplet( name, sn, d, o, s, 1 ) ); - break; - case 16: - nv.add( new Tuplet( name, o, d, sn, s, 1 ) ); - break; - case 17: - nv.add( new Tuplet( name, o, sn, d, s, 2 ) ); + nv.add( new ResultLine( name, s, o, 5 ) ); break; default: - nv.add( new Tuplet( name, o, 5 ) ); + nv.add( new ResultLine( name, o, 5 ) ); } } // End of I for loop. if ( ( nv != null ) && ( nv.size() > 0 ) ) { orthologs.append( "[seq name]\t\t[ortho]\t[st-n]\t[sup-o]\t[dist]" + ForesterUtil.LINE_SEPARATOR ); - final Tuplet[] nv_array = new Tuplet[ nv.size() ]; + final ResultLine[] nv_array = new ResultLine[ nv.size() ]; for( int j = 0; j < nv.size(); ++j ) { nv_array[ j ] = nv.get( j ); } Arrays.sort( nv_array ); - for( final Tuplet element : nv_array ) { + for( final ResultLine element : nv_array ) { name = element.getKey(); value1 = element.getValue1(); value2 = element.getValue2(); - value3 = element.getValue3(); - value4 = element.getValue4(); - orthologs.append( addNameAndValues( name, value1, value2, value3, value4, sort ) ); + orthologs.append( addNameAndValues( name, value1, value2, sort ) ); } } } @@ -649,14 +525,14 @@ public final class RIO { * @return String containing the inferred orthologs, String containing "-" * if no orthologs have been found null in case of error */ - public String inferredUltraParalogsToString( final String query_name, - final boolean return_dists, - double threshold_ultra_paralogs ) { + public String inferredUltraParalogsToString( final String query_name, double threshold_ultra_paralogs ) { HashMap sp_hashmap = null; String name = "", ultra_paralogs = ""; int sort = 0; - double sp = 0.0, value1 = 0.0, value2 = 0.0, d = 0.0; - final List nv = new ArrayList(); + double sp = 0.0; + double value1 = 0.0; + double value2 = 0.0; + final List nv = new ArrayList(); if ( threshold_ultra_paralogs < 1.0 ) { threshold_ultra_paralogs = 1.0; } @@ -666,9 +542,6 @@ public final class RIO { if ( _up_hash_maps == null ) { throw new RuntimeException( "Ultra paralogs have not been calculated (successfully)." ); } - if ( return_dists && ( _m == null ) && ( _l == null ) ) { - throw new RuntimeException( "Distance list or matrix have not been read in (successfully)." ); - } sp_hashmap = getInferredUltraParalogs( query_name ); if ( sp_hashmap == null ) { throw new RuntimeException( "Ultra paralogs for " + query_name + " were not established" ); @@ -683,36 +556,20 @@ public final class RIO { if ( sp < threshold_ultra_paralogs ) { continue I; } - if ( return_dists ) { - if ( _m != null ) { - d = getDistance( query_name, name ); - } - else { - d = getDistance( name ); - } - nv.add( new Tuplet( name, sp, d, 1 ) ); - } - else { - nv.add( new Tuplet( name, sp, 5 ) ); - } + nv.add( new ResultLine( name, sp, 5 ) ); } // End of I for loop. if ( ( nv != null ) && ( nv.size() > 0 ) ) { - final Tuplet[] nv_array = new Tuplet[ nv.size() ]; + final ResultLine[] nv_array = new ResultLine[ nv.size() ]; for( int j = 0; j < nv.size(); ++j ) { nv_array[ j ] = nv.get( j ); } Arrays.sort( nv_array ); - if ( return_dists ) { - sort = 91; - } - else { - sort = 90; - } - for( final Tuplet element : nv_array ) { + sort = 90; + for( final ResultLine element : nv_array ) { name = element.getKey(); value1 = element.getValue1(); value2 = element.getValue2(); - ultra_paralogs += addNameAndValues( name, value1, value2, 0.0, 0.0, sort ); + ultra_paralogs += addNameAndValues( name, value1, value2, sort ); } } } @@ -723,19 +580,6 @@ public final class RIO { return ultra_paralogs; } - public final void readDistanceMatrix( final File matrix_file ) throws IOException { - DistanceMatrix[] matrices = null; - final SymmetricalDistanceMatrixParser parser = SymmetricalDistanceMatrixParser.createInstance(); - matrices = parser.parse( matrix_file ); - if ( ( matrices == null ) || ( matrices.length == 0 ) ) { - throw new IOException( "failed to parse distance matrix from [" + matrix_file + "]" ); - } - if ( matrices.length > 1 ) { - throw new IOException( "[" + matrix_file + "] contains more than once distance matrix" ); - } - _m = matrices[ 0 ]; - } - /** * Brings this into the same state as immediately after construction. */ @@ -744,23 +588,17 @@ public final class RIO { _so_hash_maps = null; _up_hash_maps = null; _seq_names = null; - _m = null; - _l = null; - _bootstraps = 1; + _samples = 1; _ext_nodes_ = 0; _time = 0; } - /** - * Sets the numbers of trees analyzed. - * @param the - * numbers of trees analyzed - */ - private void setBootstraps( int i ) { + + private void setNumberOfSamples( int i ) { if ( i < 1 ) { i = 1; } - _bootstraps = i; + _samples = i; } /** @@ -807,8 +645,6 @@ public final class RIO { private final static String addNameAndValues( final String name, final double value1, final double value2, - final double value3, - final double value4, final int sort ) { final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.#####" ); df.setDecimalSeparatorAlwaysShown( false ); @@ -826,110 +662,14 @@ public final class RIO { case 0: line += addToLine( value1, df ); line += "-\t"; - line += "-\t"; - line += "-\t"; break; case 1: line += addToLine( value1, df ); - line += "-\t"; line += addToLine( value2, df ); - line += "-\t"; break; case 2: line += addToLine( value2, df ); - line += "-\t"; line += addToLine( value1, df ); - line += "-\t"; - break; - case 3: - line += addToLine( value1, df ); - line += "-\t"; - line += "-\t"; - line += addToLine( value2, df ); - break; - case 4: - line += addToLine( value2, df ); - line += "-\t"; - line += "-\t"; - line += addToLine( value1, df ); - break; - case 5: - line += addToLine( value1, df ); - line += "-\t"; - line += addToLine( value2, df ); - line += addToLine( value3, df ); - break; - case 6: - line += addToLine( value1, df ); - line += "-\t"; - line += addToLine( value3, df ); - line += addToLine( value2, df ); - break; - case 7: - line += addToLine( value2, df ); - line += "-\t"; - line += addToLine( value1, df ); - line += addToLine( value3, df ); - break; - case 8: - line += addToLine( value3, df ); - line += "-\t"; - line += addToLine( value1, df ); - line += addToLine( value2, df ); - break; - case 9: - line += addToLine( value2, df ); - line += "-\t"; - line += addToLine( value3, df ); - line += addToLine( value1, df ); - break; - case 10: - line += addToLine( value3, df ); - line += "-\t"; - line += addToLine( value2, df ); - line += addToLine( value1, df ); - break; - case 11: - line += addToLine( value1, df ); - line += addToLine( value2, df ); - line += "-\t"; - line += addToLine( value3, df ); - break; - case 12: - line += addToLine( value1, df ); - line += addToLine( value2, df ); - line += addToLine( value3, df ); - line += addToLine( value4, df ); - break; - case 13: - line += addToLine( value1, df ); - line += addToLine( value3, df ); - line += addToLine( value2, df ); - line += addToLine( value4, df ); - break; - case 14: - line += addToLine( value2, df ); - line += addToLine( value1, df ); - line += addToLine( value3, df ); - line += addToLine( value4, df ); - break; - case 15: - line += addToLine( value3, df ); - line += addToLine( value1, df ); - line += addToLine( value4, df ); - line += addToLine( value2, df ); - break; - case 16: - line += addToLine( value1, df ); - line += addToLine( value3, df ); - line += addToLine( value4, df ); - line += addToLine( value2, df ); - break; - case 17: - line += addToLine( value1, df ); - line += addToLine( value2, df ); - line += addToLine( value4, df ); - line += addToLine( value3, df ); break; case 90: line += addToLine( value1, df ); @@ -947,7 +687,7 @@ public final class RIO { // Helper for addNameAndValues. private final static String addToLine( final double value, final java.text.DecimalFormat df ) { String s = ""; - if ( value != Tuplet.DEFAULT ) { + if ( value != ResultLine.DEFAULT ) { s = df.format( value ) + "\t"; } else { @@ -996,51 +736,6 @@ public final class RIO { case 2: order = "super orthologies > orthologies"; break; - case 3: - order = "orthologies > distance to query"; - break; - case 4: - order = "distance to query > orthologies"; - break; - case 5: - order = "orthologies > super orthologies > distance to query"; - break; - case 6: - order = "orthologies > distance to query > super orthologies"; - break; - case 7: - order = "super orthologies > orthologies > distance to query"; - break; - case 8: - order = "super orthologies > distance to query > orthologies"; - break; - case 9: - order = "distance to query > orthologies > super orthologies"; - break; - case 10: - order = "distance to query > super orthologies > orthologies"; - break; - case 11: - order = "orthologies > subtree neighbors > distance to query"; - break; - case 12: - order = "orthologies > subtree neighbors > super orthologies > distance to query"; - break; - case 13: - order = "orthologies > super orthologies > subtree neighbors > distance to query"; - break; - case 14: - order = "subtree neighbors > orthologies > super orthologies > distance to query"; - break; - case 15: - order = "subtree neighbors > distance to query > orthologies > super orthologies"; - break; - case 16: - order = "orthologies > distance to query > subtree neighbors > super orthologies"; - break; - case 17: - order = "orthologies > subtree neighbors > distance to query > super orthologies"; - break; default: order = "orthologies"; break; @@ -1053,48 +748,80 @@ public final class RIO { sb.append( " 0: orthologies" + ForesterUtil.LINE_SEPARATOR ); sb.append( " 1: orthologies > super orthologies" + ForesterUtil.LINE_SEPARATOR ); sb.append( " 2: super orthologies > orthologies" + ForesterUtil.LINE_SEPARATOR ); - sb.append( " 3: orthologies > distance to query" + ForesterUtil.LINE_SEPARATOR ); - sb.append( " 4: distance to query > orthologies" + ForesterUtil.LINE_SEPARATOR ); - sb.append( " 5: orthologies > super orthologies > distance to query" + ForesterUtil.LINE_SEPARATOR ); - sb.append( " 6: orthologies > distance to query > super orthologies" + ForesterUtil.LINE_SEPARATOR ); - sb.append( " 7: super orthologies > orthologies > distance to query" + ForesterUtil.LINE_SEPARATOR ); - sb.append( " 8: super orthologies > distance to query > orthologies" + ForesterUtil.LINE_SEPARATOR ); - sb.append( " 9: distance to query > orthologies > super orthologies" + ForesterUtil.LINE_SEPARATOR ); - sb.append( " 10: distance to query > super orthologies > orthologies" + ForesterUtil.LINE_SEPARATOR ); - sb.append( " 11: orthologies > subtree neighbors > distance to query" + ForesterUtil.LINE_SEPARATOR ); - sb.append( " 12: orthologies > subtree neighbors > super orthologies > distance to query" - + ForesterUtil.LINE_SEPARATOR ); - sb.append( " 13: orthologies > super orthologies > subtree neighbors > distance to query" - + ForesterUtil.LINE_SEPARATOR ); - sb.append( " 14: subtree neighbors > orthologies > super orthologies > distance to query" - + ForesterUtil.LINE_SEPARATOR ); - sb.append( " 15: subtree neighbors > distance to query > orthologies > super orthologies" - + ForesterUtil.LINE_SEPARATOR ); - sb.append( " 16: orthologies > distance to query > subtree neighbors > super orthologies" - + ForesterUtil.LINE_SEPARATOR ); - sb.append( " 17: orthologies > subtree neighbors > distance to query > super orthologies" - + ForesterUtil.LINE_SEPARATOR ); return sb; } - private final static List getSubtreeNeighbors( final PhylogenyNode query, final int level ) { - PhylogenyNode node = query; - if ( !node.isExternal() ) { - return null; + class ResultLine implements Comparable { + + public static final int DEFAULT = -999; + private final String _key; + private final double _value1; + private final double _value2; + private int[] _p; + + ResultLine() { + setSigns(); + _key = ""; + _value1 = ResultLine.DEFAULT; + _value2 = ResultLine.DEFAULT; + } + + ResultLine( final String name, final double value1, final double value2, final int c ) { + setSigns(); + _key = name; + _value1 = value1; + _value2 = value2; + if ( ( c >= 0 ) && ( c <= 2 ) ) { + _p[ c ] = -1; + } } - if ( !node.isRoot() ) { - node = node.getParent(); + + ResultLine( final String name, final double value1, final int c ) { + setSigns(); + _key = name; + _value1 = value1; + _value2 = ResultLine.DEFAULT; + if ( c == 0 ) { + _p[ 0 ] = -1; + } } - if ( level == 2 ) { - if ( !node.isRoot() ) { - node = node.getParent(); + + @Override + public int compareTo( final ResultLine n ) { + if ( ( getValue1() != ResultLine.DEFAULT ) && ( n.getValue1() != ResultLine.DEFAULT ) ) { + if ( getValue1() < n.getValue1() ) { + return _p[ 0 ]; + } + if ( getValue1() > n.getValue1() ) { + return ( -_p[ 0 ] ); + } + } + if ( ( getValue2() != ResultLine.DEFAULT ) && ( n.getValue2() != ResultLine.DEFAULT ) ) { + if ( getValue2() < n.getValue2() ) { + return _p[ 1 ]; + } + if ( getValue2() > n.getValue2() ) { + return ( -_p[ 1 ] ); + } } + return ( getKey().compareTo( n.getKey() ) ); } - else { - throw new IllegalArgumentException( "currently only supporting level 2 subtree neighbors " ); + + String getKey() { + return _key; } - final List sn = node.getAllExternalDescendants(); - sn.remove( query ); - return sn; - } + + double getValue1() { + return _value1; + } + + double getValue2() { + return _value2; + } + + private void setSigns() { + _p = new int[ 2 ]; + _p[ 0 ] = _p[ 1 ] = +1; + } + } // Tuplet } diff --git a/forester/java/src/org/forester/sdi/Tuplet.java b/forester/java/src/org/forester/sdi/Tuplet.java deleted file mode 100644 index 20bfe6f..0000000 --- a/forester/java/src/org/forester/sdi/Tuplet.java +++ /dev/null @@ -1,169 +0,0 @@ -// $Id: -// FORESTER -- software libraries and applications -// for evolutionary biology research and applications. -// -// Copyright (C) 2008-2009 Christian M. Zmasek -// Copyright (C) 2008-2009 Burnham Institute for Medical Research -// Copyright (C) 2000-2001 Washington University School of Medicine -// and Howard Hughes Medical Institute -// All rights reserved -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA -// -// Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester - -package org.forester.sdi; - -class Tuplet implements Comparable { - - public static final int DEFAULT = -999; - private final String _key; - private final double _value1; - private final double _value2; - private final double _value3; - private final double _value4; - private int[] _p; // Since - - Tuplet() { - setSigns(); - _key = ""; - _value1 = Tuplet.DEFAULT; - _value2 = Tuplet.DEFAULT; - _value3 = Tuplet.DEFAULT; - _value4 = Tuplet.DEFAULT; - } - - // distance - // needs to be - // sorted in - // different - // direction than other values, and it is not - // known which value will be the distance. - Tuplet( final String name, - final double value1, - final double value2, - final double value3, - final double value4, - final int c ) { - setSigns(); - _key = name; - _value1 = value1; - _value2 = value2; - _value3 = value3; - _value4 = value4; - if ( ( c >= 0 ) && ( c <= 3 ) ) { - _p[ c ] = -1; - } - } - - Tuplet( final String name, final double value1, final double value2, final double value3, final int c ) { - setSigns(); - _key = name; - _value1 = value1; - _value2 = value2; - _value3 = value3; - _value4 = Tuplet.DEFAULT; - if ( ( c >= 0 ) && ( c <= 2 ) ) { - _p[ c ] = -1; - } - } - - Tuplet( final String name, final double value1, final double value2, final int c ) { - setSigns(); - _key = name; - _value1 = value1; - _value2 = value2; - _value3 = Tuplet.DEFAULT; - _value4 = Tuplet.DEFAULT; - if ( ( c >= 0 ) && ( c <= 1 ) ) { - _p[ c ] = -1; - } - } - - Tuplet( final String name, final double value1, final int c ) { - setSigns(); - _key = name; - _value1 = value1; - _value2 = Tuplet.DEFAULT; - _value3 = Tuplet.DEFAULT; - _value4 = Tuplet.DEFAULT; - if ( c == 0 ) { - _p[ 0 ] = -1; - } - } - - @Override - public int compareTo( final Tuplet n ) { - if ( ( getValue1() != Tuplet.DEFAULT ) && ( n.getValue1() != Tuplet.DEFAULT ) ) { - if ( getValue1() < n.getValue1() ) { - return _p[ 0 ]; - } - if ( getValue1() > n.getValue1() ) { - return ( -_p[ 0 ] ); - } - } - if ( ( getValue2() != Tuplet.DEFAULT ) && ( n.getValue2() != Tuplet.DEFAULT ) ) { - if ( getValue2() < n.getValue2() ) { - return _p[ 1 ]; - } - if ( getValue2() > n.getValue2() ) { - return ( -_p[ 1 ] ); - } - } - if ( ( getValue3() != Tuplet.DEFAULT ) && ( n.getValue3() != Tuplet.DEFAULT ) ) { - if ( getValue3() < n.getValue3() ) { - return _p[ 2 ]; - } - if ( getValue3() > n.getValue3() ) { - return ( -_p[ 2 ] ); - } - } - if ( ( getValue4() != Tuplet.DEFAULT ) && ( n.getValue4() != Tuplet.DEFAULT ) ) { - if ( getValue4() < n.getValue4() ) { - return _p[ 3 ]; - } - if ( getValue4() > n.getValue4() ) { - return ( -_p[ 3 ] ); - } - } - return ( getKey().compareTo( n.getKey() ) ); - } - - String getKey() { - return _key; - } - - double getValue1() { - return _value1; - } - - double getValue2() { - return _value2; - } - - double getValue3() { - return _value3; - } - - double getValue4() { - return _value4; - } - - private void setSigns() { - _p = new int[ 4 ]; - _p[ 0 ] = _p[ 1 ] = _p[ 2 ] = _p[ 3 ] = +1; - } -} \ No newline at end of file -- 1.7.10.2