package org.forester.sdi;
import java.io.File;
-import java.io.FileWriter;
+import java.io.FileNotFoundException;
import java.io.IOException;
-import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collections;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
+import java.util.Set;
-import org.forester.evoinference.matrix.distance.DistanceMatrix;
+import org.forester.datastructures.IntMatrix;
import org.forester.io.parsers.PhylogenyParser;
-import org.forester.io.parsers.SymmetricalDistanceMatrixParser;
import org.forester.io.parsers.nhx.NHXParser;
import org.forester.io.parsers.util.ParserUtils;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
import org.forester.util.ForesterUtil;
-/*
- * @author Christian M. Zmasek
- */
public final class RIO {
- private final static boolean ROOT_BY_MINIMIZING_MAPPING_COST = false;
- private final static boolean ROOT_BY_MINIMIZING_SUM_OF_DUPS = true;
- private final static boolean ROOT_BY_MINIMIZING_TREE_HEIGHT = true;
- private final static boolean TIME = false;
- private HashMap<String, HashMap<String, Integer>> _o_hash_maps;
- private HashMap<String, HashMap<String, Integer>> _so_hash_maps;
- private HashMap<String, HashMap<String, Integer>> _up_hash_maps;
- private HashMap<String, HashMap<String, Integer>> _sn_hash_maps; // HashMap of HashMaps
- private DistanceMatrix _m;
- private HashMap<String, Double> _l;
+ private final static boolean ROOT_BY_MINIMIZING_SUM_OF_DUPS = true;
+ private final static boolean ROOT_BY_MINIMIZING_TREE_HEIGHT = true;
+ private Phylogeny[] _analyzed_gene_trees;
+ private HashMap<String, HashMap<String, Integer>> _o_maps;
+ private HashMap<String, HashMap<String, Integer>> _so_maps;
+ private HashMap<String, HashMap<String, Integer>> _up_maps;
private List<String> _seq_names;
- private int _bootstraps;
- private int _ext_nodes_;
- private long _time;
+ private int _samples;
+ private int _ext_nodes;
/**
* Default constructor.
+ * @throws SDIException
+ * @throws IOException
+ * @throws RIOException
*/
- public RIO() {
- reset();
- }
-
- /**
- * Returns the numbers of trees analyzed.
- *
- * @return the numbers of trees analyzed
- */
- public final int getBootstraps() {
- return _bootstraps;
- }
-
- // Helper method for inferredOrthologsToString.
- // inferredOrthologsToArrayList,
- // and inferredUltraParalogsToString.
- private final double getBootstrapValueFromHash( final HashMap<String, Integer> h, final String name ) {
- if ( !h.containsKey( name ) ) {
- return 0.0;
+ public RIO( final File gene_trees_file, final Phylogeny species_tree, final String query ) throws IOException,
+ SDIException, RIOException {
+ if ( ForesterUtil.isEmpty( query ) ) {
+ throw new IllegalArgumentException( "query is empty" );
}
- final int i = h.get( name );
- return ( i * 100.0 / getBootstraps() );
+ init();
+ inferOrthologs( gene_trees_file, species_tree, query );
}
- /**
- * Returns the distance to a sequences/taxa after a distance list file has
- * been read in with readDistanceList(File). Throws an exception if name is
- * not found or if no list has been read in.
- *
- * @param name
- * a sequence name
- */
- public final double getDistance( String name ) {
- double distance = 0.0;
- name = name.trim();
- if ( _l == null ) {
- throw new RuntimeException( "Distance list has probably not been read in (successfully)." );
- }
- if ( _l.get( name ) == null ) {
- throw new IllegalArgumentException( name + " not found." );
- }
- distance = ( _l.get( name ) ).doubleValue();
- return distance;
+ public RIO( final File gene_trees_file, final Phylogeny species_tree ) throws IOException, SDIException,
+ RIOException {
+ init();
+ inferOrthologs( gene_trees_file, species_tree, null );
}
- public final double getDistance( final String name1, final String name2 ) {
- try {
- return _m.getValue( _m.getIndex( name1 ), _m.getIndex( name2 ) );
- }
- catch ( final Exception e ) {
- return 1;
- }
+ public final Phylogeny[] getAnalyzedGeneTrees() {
+ return _analyzed_gene_trees;
}
/**
* @return number of ext nodes in gene trees analyzed (after stripping)
*/
public final int getExtNodesOfAnalyzedGeneTrees() {
- return _ext_nodes_;
- }
-
- /**
- * Returns a HashMap containing the inferred orthologs of the external gene
- * tree node with the sequence name seq_name. Sequence names are the keys
- * (String), numbers of observations are the values (Int). Orthologs are to
- * be inferred by method "inferOrthologs". Throws an exception if seq_name
- * is not found.
- *
- * @param seq_name
- * sequence name of a external node of the gene trees
- * @return HashMap containing the inferred orthologs
- * (name(String)->value(Int))
- */
- public final HashMap<String, Integer> getInferredOrthologs( final String seq_name ) {
- if ( _o_hash_maps == null ) {
- return null;
- }
- return _o_hash_maps.get( seq_name );
- }
-
- private final HashMap<String, Integer> getInferredSubtreeNeighbors( final String seq_name ) {
- if ( _sn_hash_maps == null ) {
- return null;
- }
- return _sn_hash_maps.get( seq_name );
- }
-
- /**
- * Returns a HashMap containing the inferred "super orthologs" of the
- * external gene tree node with the sequence name seq_name. Sequence names
- * are the keys (String), numbers of observations are the values (Int).
- * Super orthologs are to be inferred by method "inferOrthologs". Throws an
- * exception if seq_name is not found.
- *
- * @param seq_name
- * sequence name of a external node of the gene trees
- * @return HashMap containing the inferred super orthologs
- * (name(String)->value(Int))
- */
- public final HashMap<String, Integer> getInferredSuperOrthologs( final String seq_name ) {
- if ( _so_hash_maps == null ) {
- return null;
- }
- return _so_hash_maps.get( seq_name );
+ return _ext_nodes;
}
/**
* (name(String)->value(Int))
*/
public final HashMap<String, Integer> getInferredUltraParalogs( final String seq_name ) {
- if ( _up_hash_maps == null ) {
+ if ( _up_maps == null ) {
return null;
}
- return _up_hash_maps.get( seq_name );
- }
-
- /**
- * Returns the time (in ms) needed to run "inferOrthologs". Final variable
- * TIME needs to be set to true.
- *
- * @return time (in ms) needed to run method "inferOrthologs"
- */
- public long getTime() {
- return _time;
- }
-
- /**
- * Infers the orthologs (as well the "super orthologs", the "subtree
- * neighbors", and the "ultra paralogs") for each external node of the gene
- * Trees in multiple tree File gene_trees_file (=output of PHYLIP NEIGHBOR,
- * for example). Tallies how many times each sequence is (super-)
- * orthologous towards the query. Tallies how many times each sequence is
- * ultra paralogous towards the query. Tallies how many times each sequence
- * is a subtree neighbor of the query. Gene duplications are inferred using
- * SDI. Modifies its argument species_tree. Is a little faster than
- * "inferOrthologs(File,Phylogeny)" since orthologs are only inferred for
- * query.
- * <p>
- * To obtain the results use the methods listed below.
- *
- * @param gene_trees_file
- * a File containing gene Trees in NH format, which is the result
- * of performing a bootstrap analysis in PHYLIP
- * @param species_tree
- * a species Phylogeny, which has species names in its species
- * fields
- * @param query
- * the sequence name of the squence whose orthologs are to be
- * inferred
- * @throws SDIException
- */
- public void inferOrthologs( final File gene_trees_file, final Phylogeny species_tree, final String query )
- throws IOException, SDIException {
- int bs = 0;
- if ( RIO.TIME ) {
- _time = System.currentTimeMillis();
- }
- // Read in first tree to get its sequence names
- // and strip species_tree.
- final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
- final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true );
- if ( p instanceof NHXParser ) {
- final NHXParser nhx = ( NHXParser ) p;
- nhx.setReplaceUnderscores( false );
- nhx.setIgnoreQuotes( true );
- nhx.setTaxonomyExtraction( PhylogenyMethods.TAXONOMY_EXTRACTION.YES );
- }
- final Phylogeny gene_tree = factory.create( gene_trees_file, p )[ 0 ];
- System.out.println( "species " + species_tree.toString() );
- // Removes from species_tree all species not found in gene_tree.
- PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( gene_tree, species_tree );
- // System.out.println( "gene " + gene_tree.toString() );
- // System.out.println( "species " + species_tree.toString() );
- // Removes from gene_tree all species not found in species_tree.
- // Archaeopteryx.createApplication( gene_tree );
- // Archaeopteryx.createApplication( species_tree );
- // try {
- // Thread.sleep( 40000 );
- // }
- // catch ( InterruptedException e ) {
- // // TODO Auto-generated catch block
- // e.printStackTrace();
- // }
- PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gene_tree );
- _seq_names = getAllExternalSequenceNames( gene_tree );
- if ( ( _seq_names == null ) || ( _seq_names.size() < 1 ) ) {
- throw new IOException( "could not get sequence names" );
- }
- _o_hash_maps = new HashMap<String, HashMap<String, Integer>>();
- _so_hash_maps = new HashMap<String, HashMap<String, Integer>>();
- _up_hash_maps = new HashMap<String, HashMap<String, Integer>>();
- _sn_hash_maps = new HashMap<String, HashMap<String, Integer>>();
- _o_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.size() ) );
- _so_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.size() ) );
- _up_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.size() ) );
- _sn_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.size() ) );
- // Go through all gene trees in the file.
- final Phylogeny[] gene_trees = factory.create( gene_trees_file, p );
- for( final Phylogeny gt : gene_trees ) {
- bs++;
- // Removes from gene_tree all species not found in species_tree.
- PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gt );
- inferOrthologsHelper( gt, species_tree, query );
- // System.out.println( bs );
- }
- setBootstraps( bs );
- if ( RIO.TIME ) {
- _time = ( System.currentTimeMillis() - _time );
- }
- }
-
- public List<PhylogenyNode> getNodesViaSequenceName( final Phylogeny phy, final String seq_name ) {
- final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
- for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
- final PhylogenyNode n = iter.next();
- if ( n.getNodeData().isHasSequence() && n.getNodeData().getSequence().getName().equals( seq_name ) ) {
- nodes.add( n );
- }
- if ( !n.getNodeData().isHasSequence() && n.getName().equals( seq_name ) ) {
- nodes.add( n );
- }
- }
- return nodes;
- }
-
- // Helper method which performs the actual ortholog inference for
- // the external node with seqname query.
- private void inferOrthologsHelper( final Phylogeny gene_tree, final Phylogeny species_tree, final String query )
- throws SDIException {
- Phylogeny assigned_tree = null;
- List<PhylogenyNode> nodes = null;
- final SDIR sdiunrooted = new SDIR();
- List<PhylogenyNode> orthologs = null;
- List<PhylogenyNode> super_orthologs = null;
- List<PhylogenyNode> ultra_paralogs = null;
- List<PhylogenyNode> subtree_neighbors = null;
- assigned_tree = sdiunrooted.infer( gene_tree,
- species_tree,
- RIO.ROOT_BY_MINIMIZING_MAPPING_COST,
- RIO.ROOT_BY_MINIMIZING_SUM_OF_DUPS,
- RIO.ROOT_BY_MINIMIZING_TREE_HEIGHT,
- true,
- 1 )[ 0 ];
- setExtNodesOfAnalyzedGeneTrees( assigned_tree.getNumberOfExternalNodes() );
- nodes = getNodesViaSequenceName( assigned_tree, query );
- if ( nodes.size() > 1 ) {
- throw new IllegalArgumentException( "node named [" + query + "] not unique" );
- }
- else if ( nodes.isEmpty() ) {
- throw new IllegalArgumentException( "no node containing a sequence named [" + query + "] found" );
- }
- final PhylogenyNode query_node = nodes.get( 0 );
- final PhylogenyMethods methods = PhylogenyMethods.getInstance();
- orthologs = methods.getOrthologousNodes( assigned_tree, query_node );
- updateHash( _o_hash_maps, query, orthologs );
- super_orthologs = PhylogenyMethods.getSuperOrthologousNodes( query_node );
- updateHash( _so_hash_maps, query, super_orthologs );
- subtree_neighbors = getSubtreeNeighbors( query_node, 2 );
- updateHash( _sn_hash_maps, query, subtree_neighbors );
- ultra_paralogs = PhylogenyMethods.getUltraParalogousNodes( query_node );
- updateHash( _up_hash_maps, query, ultra_paralogs );
+ return _up_maps.get( seq_name );
}
- /**
- * Returns an ArrayList containg the names of orthologs of the PhylogenyNode
- * with seq name seq_name.
- *
- * @param seq_name
- * sequence name of a external node of the gene trees
- * @param threshold_orthologs
- * the minimal number of observations for a a sequence to be
- * reported as orthologous as percentage (0.0-100.0%)
- * @return ArrayList containg the names of orthologs of the PhylogenyNode
- * with seq name seq_name
- */
- public ArrayList<String> inferredOrthologsToArrayList( final String seq_name, double threshold_orthologs ) {
- HashMap<String, Integer> o_hashmap = null;
- String name = null;
- double o = 0.0;
- final ArrayList<String> arraylist = new ArrayList<String>();
- if ( _o_hash_maps == null ) {
- throw new RuntimeException( "Orthologs have not been calculated (successfully)." );
- }
- if ( threshold_orthologs < 0.0 ) {
- threshold_orthologs = 0.0;
- }
- else if ( threshold_orthologs > 100.0 ) {
- threshold_orthologs = 100.0;
- }
- o_hashmap = getInferredOrthologs( seq_name );
- if ( o_hashmap == null ) {
- throw new RuntimeException( "Orthologs for " + seq_name + " were not established." );
- }
- if ( _seq_names.size() > 0 ) {
- I: for( int i = 0; i < _seq_names.size(); ++i ) {
- name = _seq_names.get( i );
- if ( name.equals( seq_name ) ) {
- continue I;
- }
- o = getBootstrapValueFromHash( o_hashmap, name );
- if ( o < threshold_orthologs ) {
- continue I;
- }
- arraylist.add( name );
- }
- }
- return arraylist;
+ public final int getNumberOfSamples() {
+ return _samples;
}
/**
* <li>0 : Ortholog
* <li>1 : Ortholog, Super ortholog
* <li>2 : Super ortholog, Ortholog
- * <li>3 : Ortholog, Distance
- * <li>4 : Distance, Ortholog
- * <li>5 : Ortholog, Super ortholog, Distance
- * <li>6 : Ortholog, Distance, Super ortholog
- * <li>7 : Super ortholog, Ortholog, Distance
- * <li>8 : Super ortholog, Distance, Ortholog
- * <li>9 : Distance, Ortholog, Super ortholog
- * <li>10 : Distance, Super ortholog, Ortholog
- * <li>11 : Ortholog, Subtree neighbor, Distance
- * <li>12 : Ortholog, Subtree neighbor, Super ortholog, Distance (default)
- * <li>13 : Ortholog, Super ortholog, Subtree neighbor, Distance
- * <li>14 : Subtree neighbor, Ortholog, Super ortholog, Distance
- * <li>15 : Subtree neighbor, Distance, Ortholog, Super ortholog
- * <li>16 : Ortholog, Distance, Subtree neighbor, Super ortholog
- * <li>17 : Ortholog, Subtree neighbor, Distance, Super ortholog
* </ul>
* <p>
* Returns "-" if no putative orthologs have been found (given
* reported as orthologous, in percents (0.0-100.0%)
* @return String containing the inferred orthologs, String containing "-"
* if no orthologs have been found null in case of error
- * @see #inferOrthologs(File,Phylogeny,String)
- * @see #inferOrthologs(Phylogeny[],Phylogeny)
- * @see #inferOrthologs(File,Phylogeny)
- * @see #getOrder(int)
*/
- public StringBuffer inferredOrthologsToString( final String query_name,
- int sort,
- double threshold_orthologs,
- double threshold_subtreeneighborings ) {
+ public final StringBuffer inferredOrthologsToString( final String query_name, int sort, double threshold_orthologs ) {
HashMap<String, Integer> o_hashmap = null;
HashMap<String, Integer> s_hashmap = null;
- HashMap<String, Integer> n_hashmap = null;
String name = "";
- double o = 0.0, // Orthologs.
- s = 0.0, // Super orthologs.
- sn = 0.0, // Subtree neighbors.
- value1 = 0.0, value2 = 0.0, value3 = 0.0, value4 = 0.0, d = 0.0;
- final ArrayList<Tuplet> nv = new ArrayList<Tuplet>();
- if ( ( _o_hash_maps == null ) || ( _so_hash_maps == null ) || ( _sn_hash_maps == null ) ) {
- //TODO ~~~ throw new RuntimeException( "Orthologs have not been calculated (successfully)" );
- }
- if ( ( sort < 0 ) || ( sort > 17 ) ) {
- sort = 12;
+ double o = 0.0; // Orthologs.
+ double s = 0.0; // Super orthologs.
+ double value1 = 0.0;
+ double value2 = 0.0;
+ final ArrayList<ResultLine> nv = new ArrayList<ResultLine>();
+ if ( ( _o_maps == null ) || ( _so_maps == null ) ) {
+ throw new RuntimeException( "orthologs have not been calculated (successfully)" );
}
- if ( ( sort > 2 ) && ( _m == null ) && ( _l == null ) ) {
- throw new RuntimeException( "Distance list or matrix have not been read in (successfully)" );
+ if ( ( sort < 0 ) || ( sort > 2 ) ) {
+ sort = 1;
}
if ( threshold_orthologs < 0.0 ) {
threshold_orthologs = 0.0;
else if ( threshold_orthologs > 100.0 ) {
threshold_orthologs = 100.0;
}
- if ( threshold_subtreeneighborings < 0.0 ) {
- threshold_subtreeneighborings = 0.0;
- }
- else if ( threshold_subtreeneighborings > 100.0 ) {
- threshold_subtreeneighborings = 100.0;
- }
o_hashmap = getInferredOrthologs( query_name );
s_hashmap = getInferredSuperOrthologs( query_name );
- n_hashmap = getInferredSubtreeNeighbors( query_name );
- if ( ( o_hashmap == null ) || ( s_hashmap == null ) || ( n_hashmap == null ) ) {
- //TODO ~~~ throw new RuntimeException( "Orthologs for " + query_name + " were not established" );
+ if ( ( o_hashmap == null ) || ( s_hashmap == null ) ) {
+ throw new RuntimeException( "Orthologs for " + query_name + " were not established" );
}
final StringBuffer orthologs = new StringBuffer();
if ( _seq_names.size() > 0 ) {
if ( o < threshold_orthologs ) {
continue I;
}
- sn = getBootstrapValueFromHash( n_hashmap, name );
- if ( sn < threshold_subtreeneighborings ) {
- continue I;
- }
s = getBootstrapValueFromHash( s_hashmap, name );
- if ( sort >= 3 ) {
- if ( _m != null ) {
- d = getDistance( query_name, name );
- }
- else {
- d = getDistance( name );
- }
- }
switch ( sort ) {
case 0:
- nv.add( new Tuplet( name, o, 5 ) );
+ nv.add( new ResultLine( name, o, 5 ) );
break;
case 1:
- nv.add( new Tuplet( name, o, s, 5 ) );
+ nv.add( new ResultLine( name, o, s, 5 ) );
break;
case 2:
- nv.add( new Tuplet( name, s, o, 5 ) );
- break;
- case 3:
- nv.add( new Tuplet( name, o, d, 1 ) );
- break;
- case 4:
- nv.add( new Tuplet( name, d, o, 0 ) );
- break;
- case 5:
- nv.add( new Tuplet( name, o, s, d, 2 ) );
- break;
- case 6:
- nv.add( new Tuplet( name, o, d, s, 1 ) );
- break;
- case 7:
- nv.add( new Tuplet( name, s, o, d, 2 ) );
- break;
- case 8:
- nv.add( new Tuplet( name, s, d, o, 1 ) );
- break;
- case 9:
- nv.add( new Tuplet( name, d, o, s, 0 ) );
- break;
- case 10:
- nv.add( new Tuplet( name, d, s, o, 0 ) );
- break;
- case 11:
- nv.add( new Tuplet( name, o, sn, d, 2 ) );
- break;
- case 12:
- nv.add( new Tuplet( name, o, sn, s, d, 3 ) );
- break;
- case 13:
- nv.add( new Tuplet( name, o, s, sn, d, 3 ) );
- break;
- case 14:
- nv.add( new Tuplet( name, sn, o, s, d, 3 ) );
- break;
- case 15:
- nv.add( new Tuplet( name, sn, d, o, s, 1 ) );
- break;
- case 16:
- nv.add( new Tuplet( name, o, d, sn, s, 1 ) );
- break;
- case 17:
- nv.add( new Tuplet( name, o, sn, d, s, 2 ) );
+ nv.add( new ResultLine( name, s, o, 5 ) );
break;
default:
- nv.add( new Tuplet( name, o, 5 ) );
+ nv.add( new ResultLine( name, o, 5 ) );
}
} // End of I for loop.
if ( ( nv != null ) && ( nv.size() > 0 ) ) {
- orthologs.append( "[seq name]\t\t[ortho]\t[st-n]\t[sup-o]\t[dist]" + ForesterUtil.LINE_SEPARATOR );
- final Tuplet[] nv_array = new Tuplet[ nv.size() ];
+ orthologs.append( "seq name\t\tortho\ts-ortho" + ForesterUtil.LINE_SEPARATOR );
+ final ResultLine[] nv_array = new ResultLine[ nv.size() ];
for( int j = 0; j < nv.size(); ++j ) {
nv_array[ j ] = nv.get( j );
}
Arrays.sort( nv_array );
- for( int i = 0; i < nv_array.length; ++i ) {
- name = nv_array[ i ].getKey();
- value1 = nv_array[ i ].getValue1();
- value2 = nv_array[ i ].getValue2();
- value3 = nv_array[ i ].getValue3();
- value4 = nv_array[ i ].getValue4();
- orthologs.append( addNameAndValues( name, value1, value2, value3, value4, sort ) );
+ for( final ResultLine element : nv_array ) {
+ name = element.getKey();
+ value1 = element.getValue1();
+ value2 = element.getValue2();
+ orthologs.append( addNameAndValues( name, value1, value2, sort ) );
}
}
}
orthologs.append( "-" );
}
return orthologs;
- } // inferredOrthologsToString( String, int, double )
-
- // Helper method for inferredOrthologTableToFile.
- // Returns individual rows for the table as String.
- private String inferredOrthologsToTableHelper( final String name2,
- final List<String> names,
- final int j,
- final boolean super_orthologs ) {
- HashMap<String, Integer> hashmap = null;
- String name = null, orthologs = new String( "" );
- int value = 0;
- if ( !super_orthologs ) {
- hashmap = getInferredOrthologs( name2 );
- }
- else {
- hashmap = getInferredSuperOrthologs( name2 );
- }
- if ( hashmap == null ) {
- throw new RuntimeException( "Unexpected failure in method inferredOrthologsToTableHelper" );
- }
- for( int i = 0; i < names.size(); ++i ) {
- name = names.get( i );
- if ( !hashmap.containsKey( name ) ) {
- value = 0;
- }
- else {
- value = hashmap.get( name );
- }
- if ( i == j ) {
- // Sanity check.
- if ( value != 0 ) {
- throw new RuntimeException( "Failed sanity check in method inferredOrthologsToTableHelper: value not 0." );
- }
- orthologs += ( " " + "\t" );
- }
- else {
- orthologs += ( value + "\t" );
- }
- }
- return orthologs;
- }
-
- /**
- * Writes the orthologs for each external node of the gene trees to outfile
- * in the form of a table. Orthologs are to be inferred by method
- * "inferOrthologs". Overwrites without asking! (Last modified: 12/07/00)
- *
- * @param outfile
- * the File to write to
- */
- public void inferredOrthologTableToFile( final File outfile ) throws IOException {
- if ( _o_hash_maps == null ) {
- return;
- }
- inferredOrthologTableToFile( outfile, false );
- }
-
- // Helper for inferredOrthologTableToFile(File).
- // (Last modified: 11/28/00)
- private void inferredOrthologTableToFile( final File outfile, final boolean super_orthologs ) throws IOException {
- String name = "", line = "";
- PrintWriter out = null;
- if ( _seq_names == null ) {
- throw new RuntimeException( "inferredOrthologTableToFile: seq_names_ is null." );
- }
- Collections.sort( _seq_names );
- out = new PrintWriter( new FileWriter( outfile ), true );
- line = "\t\t\t\t";
- for( int i = 0; i < _seq_names.size(); ++i ) {
- line += ( i + ")\t" );
- }
- line += "\n";
- out.println( line );
- for( int i = 0; i < _seq_names.size(); ++i ) {
- name = _seq_names.get( i );
- if ( name.length() < 8 ) {
- line = i + ")\t" + name + "\t\t\t";
- }
- else if ( name.length() < 16 ) {
- line = i + ")\t" + name + "\t\t";
- }
- else {
- line = i + ")\t" + name + "\t";
- }
- line += inferredOrthologsToTableHelper( name, _seq_names, i, super_orthologs );
- out.println( line );
- }
- out.close();
- }
-
- /**
- * Writes the "super orthologs" for each external nodes of the gene trees to
- * outfile in the form of a table. Super orthologs are to be inferred by
- * method "inferOrthologs". Overwrites without asking!
- *
- * @param outfile
- * the File to write to
- */
- public void inferredSuperOrthologTableToFile( final File outfile ) throws IOException {
- if ( _so_hash_maps == null ) {
- return;
- }
- inferredOrthologTableToFile( outfile, true );
}
/**
* @return String containing the inferred orthologs, String containing "-"
* if no orthologs have been found null in case of error
*/
- public String inferredUltraParalogsToString( final String query_name,
- final boolean return_dists,
- double threshold_ultra_paralogs ) {
+ public final String inferredUltraParalogsToString( final String query_name, double threshold_ultra_paralogs ) {
HashMap<String, Integer> sp_hashmap = null;
String name = "", ultra_paralogs = "";
int sort = 0;
- double sp = 0.0, value1 = 0.0, value2 = 0.0, d = 0.0;
- final List<Tuplet> nv = new ArrayList<Tuplet>();
+ double sp = 0.0;
+ double value1 = 0.0;
+ double value2 = 0.0;
+ final List<ResultLine> nv = new ArrayList<ResultLine>();
if ( threshold_ultra_paralogs < 1.0 ) {
threshold_ultra_paralogs = 1.0;
}
else if ( threshold_ultra_paralogs > 100.0 ) {
threshold_ultra_paralogs = 100.0;
}
- if ( _up_hash_maps == null ) {
+ if ( _up_maps == null ) {
throw new RuntimeException( "Ultra paralogs have not been calculated (successfully)." );
}
- if ( return_dists && ( _m == null ) && ( _l == null ) ) {
- throw new RuntimeException( "Distance list or matrix have not been read in (successfully)." );
- }
sp_hashmap = getInferredUltraParalogs( query_name );
if ( sp_hashmap == null ) {
throw new RuntimeException( "Ultra paralogs for " + query_name + " were not established" );
if ( sp < threshold_ultra_paralogs ) {
continue I;
}
- if ( return_dists ) {
- if ( _m != null ) {
- d = getDistance( query_name, name );
- }
- else {
- d = getDistance( name );
- }
- nv.add( new Tuplet( name, sp, d, 1 ) );
- }
- else {
- nv.add( new Tuplet( name, sp, 5 ) );
- }
+ nv.add( new ResultLine( name, sp, 5 ) );
} // End of I for loop.
if ( ( nv != null ) && ( nv.size() > 0 ) ) {
- final Tuplet[] nv_array = new Tuplet[ nv.size() ];
+ final ResultLine[] nv_array = new ResultLine[ nv.size() ];
for( int j = 0; j < nv.size(); ++j ) {
nv_array[ j ] = nv.get( j );
}
Arrays.sort( nv_array );
- if ( return_dists ) {
- sort = 91;
- }
- else {
- sort = 90;
- }
- for( int i = 0; i < nv_array.length; ++i ) {
- name = nv_array[ i ].getKey();
- value1 = nv_array[ i ].getValue1();
- value2 = nv_array[ i ].getValue2();
- ultra_paralogs += addNameAndValues( name, value1, value2, 0.0, 0.0, sort );
+ sort = 90;
+ for( final ResultLine element : nv_array ) {
+ name = element.getKey();
+ value1 = element.getValue1();
+ value2 = element.getValue2();
+ ultra_paralogs += addNameAndValues( name, value1, value2, sort );
}
}
}
return ultra_paralogs;
}
- public final void readDistanceMatrix( final File matrix_file ) throws IOException {
- DistanceMatrix[] matrices = null;
- final SymmetricalDistanceMatrixParser parser = SymmetricalDistanceMatrixParser.createInstance();
- matrices = parser.parse( matrix_file );
- if ( ( matrices == null ) || ( matrices.length == 0 ) ) {
- throw new IOException( "failed to parse distance matrix from [" + matrix_file + "]" );
- }
- if ( matrices.length > 1 ) {
- throw new IOException( "[" + matrix_file + "] contains more than once distance matrix" );
+ // Helper method for inferredOrthologsToString.
+ // inferredOrthologsToArrayList,
+ // and inferredUltraParalogsToString.
+ private final double getBootstrapValueFromHash( final HashMap<String, Integer> h, final String name ) {
+ if ( !h.containsKey( name ) ) {
+ return 0.0;
}
- _m = matrices[ 0 ];
+ final int i = h.get( name );
+ return ( ( i * 100.0 ) / getNumberOfSamples() );
}
/**
- * Brings this into the same state as immediately after construction.
+ * Returns a HashMap containing the inferred orthologs of the external gene
+ * tree node with the sequence name seq_name. Sequence names are the keys
+ * (String), numbers of observations are the values (Int). Orthologs are to
+ * be inferred by method "inferOrthologs". Throws an exception if seq_name
+ * is not found.
+ *
+ * @param seq_name
+ * sequence name of a external node of the gene trees
+ * @return HashMap containing the inferred orthologs
+ * (name(String)->value(Int))
*/
- private final void reset() {
- _o_hash_maps = null;
- _so_hash_maps = null;
- _up_hash_maps = null;
- _seq_names = null;
- _m = null;
- _l = null;
- _bootstraps = 1;
- _ext_nodes_ = 0;
- _time = 0;
+ private final HashMap<String, Integer> getInferredOrthologs( final String seq_name ) {
+ if ( _o_maps == null ) {
+ return null;
+ }
+ return _o_maps.get( seq_name );
}
/**
- * Sets the numbers of trees analyzed.
- * @param the
- * numbers of trees analyzed
+ * Returns a HashMap containing the inferred "super orthologs" of the
+ * external gene tree node with the sequence name seq_name. Sequence names
+ * are the keys (String), numbers of observations are the values (Int).
+ * Super orthologs are to be inferred by method "inferOrthologs". Throws an
+ * exception if seq_name is not found.
+ *
+ * @param seq_name
+ * sequence name of a external node of the gene trees
+ * @return HashMap containing the inferred super orthologs
+ * (name(String)->value(Int))
*/
- private void setBootstraps( int i ) {
- if ( i < 1 ) {
- i = 1;
+ private final HashMap<String, Integer> getInferredSuperOrthologs( final String seq_name ) {
+ if ( _so_maps == null ) {
+ return null;
}
- _bootstraps = i;
+ return _so_maps.get( seq_name );
}
/**
- * Sets number of ext nodes in gene trees analyzed (after stripping).
- * @param the
- * number of ext nodes in gene trees analyzed (after stripping)
+ * Infers the orthologs (as well the "super orthologs", the "subtree
+ * neighbors", and the "ultra paralogs") for each external node of the gene
+ * Trees in multiple tree File gene_trees_file (=output of PHYLIP NEIGHBOR,
+ * for example). Tallies how many times each sequence is (super-)
+ * orthologous towards the query. Tallies how many times each sequence is
+ * ultra paralogous towards the query. Tallies how many times each sequence
+ * is a subtree neighbor of the query. Gene duplications are inferred using
+ * SDI. Modifies its argument species_tree. Is a little faster than
+ * "inferOrthologs(File,Phylogeny)" since orthologs are only inferred for
+ * query.
+ * <p>
+ * To obtain the results use the methods listed below.
+ *
+ * @param gene_trees_file
+ * a File containing gene Trees in NH format, which is the result
+ * of performing a bootstrap analysis in PHYLIP
+ * @param species_tree
+ * a species Phylogeny, which has species names in its species
+ * fields
+ * @param query
+ * the sequence name of the squence whose orthologs are to be
+ * inferred
+ * @throws SDIException
+ * @throws RIOException
+ * @throws IOException
+ * @throws FileNotFoundException
*/
- private void setExtNodesOfAnalyzedGeneTrees( int i ) {
+ private final void inferOrthologs( final File gene_trees_file, final Phylogeny species_tree, final String query )
+ throws SDIException, RIOException, FileNotFoundException, IOException {
+ // Read in first tree to get its sequence names
+ // and strip species_tree.
+ final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+ final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true );
+ if ( p instanceof NHXParser ) {
+ final NHXParser nhx = ( NHXParser ) p;
+ nhx.setReplaceUnderscores( false );
+ nhx.setIgnoreQuotes( true );
+ nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES );
+ }
+ final Phylogeny[] gene_trees = factory.create( gene_trees_file, p );
+ // Removes from species_tree all species not found in gene_tree.
+ PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( gene_trees[ 0 ], species_tree );
+ if ( species_tree.isEmpty() ) {
+ throw new RIOException( "failed to establish species based mapping between gene and species trees" );
+ }
+ if ( !ForesterUtil.isEmpty( query ) ) {
+ PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gene_trees[ 0 ] );
+ if ( gene_trees[ 0 ].isEmpty() ) {
+ throw new RIOException( "failed to establish species based mapping between gene and species trees" );
+ }
+ _seq_names = getAllExternalSequenceNames( gene_trees[ 0 ] );
+ if ( ( _seq_names == null ) || ( _seq_names.size() < 1 ) ) {
+ throw new RIOException( "could not get sequence names" );
+ }
+ _o_maps = new HashMap<String, HashMap<String, Integer>>();
+ _so_maps = new HashMap<String, HashMap<String, Integer>>();
+ _up_maps = new HashMap<String, HashMap<String, Integer>>();
+ _o_maps.put( query, new HashMap<String, Integer>( _seq_names.size() ) );
+ _so_maps.put( query, new HashMap<String, Integer>( _seq_names.size() ) );
+ _up_maps.put( query, new HashMap<String, Integer>( _seq_names.size() ) );
+ }
+ _analyzed_gene_trees = new Phylogeny[ gene_trees.length ];
+ int c = 0;
+ for( final Phylogeny gt : gene_trees ) {
+ // Removes from gene_tree all species not found in species_tree.
+ PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gt );
+ if ( gt.isEmpty() ) {
+ throw new RIOException( "failed to establish species based mapping between gene and species trees" );
+ }
+ _analyzed_gene_trees[ c++ ] = inferOrthologsHelper( gt, species_tree, query );
+ }
+ setNumberOfSamples( gene_trees.length );
+ }
+
+ // Helper method which performs the actual ortholog inference for
+ // the external node with seqname query.
+ private final Phylogeny inferOrthologsHelper( final Phylogeny gene_tree,
+ final Phylogeny species_tree,
+ final String query ) throws SDIException, RIOException {
+ final SDIR sdiunrooted = new SDIR();
+ final Phylogeny assigned_tree = sdiunrooted.infer( gene_tree,
+ species_tree,
+ false,
+ RIO.ROOT_BY_MINIMIZING_SUM_OF_DUPS,
+ RIO.ROOT_BY_MINIMIZING_TREE_HEIGHT,
+ true,
+ 1 )[ 0 ];
+ setExtNodesOfAnalyzedGeneTrees( assigned_tree.getNumberOfExternalNodes() );
+ if ( !ForesterUtil.isEmpty( query ) ) {
+ final List<PhylogenyNode> nodes = getNodesViaSequenceName( assigned_tree, query );
+ if ( nodes.size() > 1 ) {
+ throw new RIOException( "node named [" + query + "] not unique" );
+ }
+ else if ( nodes.isEmpty() ) {
+ throw new RIOException( "no node containing a sequence named [" + query + "] found" );
+ }
+ final PhylogenyNode query_node = nodes.get( 0 );
+ updateCounts( _o_maps, query, PhylogenyMethods.getOrthologousNodes( assigned_tree, query_node ) );
+ updateCounts( _so_maps, query, PhylogenyMethods.getSuperOrthologousNodes( query_node ) );
+ updateCounts( _up_maps, query, PhylogenyMethods.getUltraParalogousNodes( query_node ) );
+ }
+ return assigned_tree;
+ }
+
+ private final void init() {
+ _o_maps = null;
+ _so_maps = null;
+ _up_maps = null;
+ _seq_names = null;
+ _samples = 1;
+ _ext_nodes = 0;
+ }
+
+ private final void setExtNodesOfAnalyzedGeneTrees( final int i ) {
+ _ext_nodes = i;
+ }
+
+ private final void setNumberOfSamples( int i ) {
if ( i < 1 ) {
- i = 0;
+ i = 1;
}
- _ext_nodes_ = i;
+ _samples = i;
}
// Helper for doInferOrthologs( Phylogeny, Phylogeny, String )
// and doInferOrthologs( Phylogeny, Phylogeny ).
- private void updateHash( final HashMap<String, HashMap<String, Integer>> counter_map,
- final String query_seq_name,
- final List<PhylogenyNode> nodes ) {
+ private final void updateCounts( final HashMap<String, HashMap<String, Integer>> counter_map,
+ final String query_seq_name,
+ final List<PhylogenyNode> nodes ) {
final HashMap<String, Integer> hash_map = counter_map.get( query_seq_name );
if ( hash_map == null ) {
- throw new RuntimeException( "Unexpected failure in method updateHash." );
+ throw new RuntimeException( "unexpected error in updateCounts" );
}
for( int j = 0; j < nodes.size(); ++j ) {
String seq_name;
}
}
+ public final static IntMatrix calculateOrthologTable( final Phylogeny[] analyzed_gene_trees ) throws RIOException {
+ final List<String> labels = new ArrayList<String>();
+ final Set<String> labels_set = new HashSet<String>();
+ String label;
+ for( final PhylogenyNode n : analyzed_gene_trees[ 0 ].getExternalNodes() ) {
+ if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getName() ) ) {
+ label = n.getNodeData().getSequence().getName();
+ }
+ else if ( n.getNodeData().isHasSequence()
+ && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getSymbol() ) ) {
+ label = n.getNodeData().getSequence().getSymbol();
+ }
+ else if ( !ForesterUtil.isEmpty( n.getName() ) ) {
+ label = n.getName();
+ }
+ else {
+ throw new IllegalArgumentException( "node " + n + " has no appropriate label" );
+ }
+ if ( labels_set.contains( label ) ) {
+ throw new IllegalArgumentException( "label " + label + " is not unique" );
+ }
+ labels_set.add( label );
+ labels.add( label );
+ }
+ final IntMatrix m = new IntMatrix( labels );
+ int counter = 0;
+ for( final Phylogeny gt : analyzed_gene_trees ) {
+ counter++;
+ PhylogenyMethods.preOrderReId( gt );
+ final HashMap<String, PhylogenyNode> map = PhylogenyMethods.createNameToExtNodeMap( gt );
+ for( int x = 0; x < m.size(); ++x ) {
+ final String mx = m.getLabel( x );
+ final PhylogenyNode nx = map.get( mx );
+ if ( nx == null ) {
+ throw new RIOException( "node \"" + mx + "\" not present in gene tree #" + counter );
+ }
+ String my;
+ PhylogenyNode ny;
+ for( int y = 0; y < m.size(); ++y ) {
+ my = m.getLabel( y );
+ ny = map.get( my );
+ if ( ny == null ) {
+ throw new RIOException( "node \"" + my + "\" not present in gene tree #" + counter );
+ }
+ if ( !PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( nx, ny ).isDuplication() ) {
+ m.inreaseByOne( x, y );
+ }
+ }
+ }
+ }
+ return m;
+ }
+
+ /**
+ * Returns the order in which ortholog (o), "super ortholog" (s) and
+ * distance (d) are returned and sorted (priority of sort always goes from
+ * left to right), given sort. For the meaning of sort
+ *
+ * @see #inferredOrthologsToString(String,int,double,double)
+ *
+ * @param sort
+ * determines order and sort priority
+ * @return String indicating the order
+ */
+ public final static String getOrder( final int sort ) {
+ String order = "";
+ switch ( sort ) {
+ case 0:
+ order = "orthologies";
+ break;
+ case 1:
+ order = "orthologies > super orthologies";
+ break;
+ case 2:
+ order = "super orthologies > orthologies";
+ break;
+ default:
+ order = "orthologies";
+ break;
+ }
+ return order;
+ }
+
+ public final static StringBuffer getOrderHelp() {
+ final StringBuffer sb = new StringBuffer();
+ sb.append( " 0: orthologies" + ForesterUtil.LINE_SEPARATOR );
+ sb.append( " 1: orthologies > super orthologies" + ForesterUtil.LINE_SEPARATOR );
+ sb.append( " 2: super orthologies > orthologies" + ForesterUtil.LINE_SEPARATOR );
+ return sb;
+ }
+
// Helper method for inferredOrthologsToString
// and inferredUltraParalogsToString.
private final static String addNameAndValues( final String name,
final double value1,
final double value2,
- final double value3,
- final double value4,
final int sort ) {
final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.#####" );
df.setDecimalSeparatorAlwaysShown( false );
case 0:
line += addToLine( value1, df );
line += "-\t";
- line += "-\t";
- line += "-\t";
break;
case 1:
line += addToLine( value1, df );
- line += "-\t";
line += addToLine( value2, df );
- line += "-\t";
break;
case 2:
line += addToLine( value2, df );
- line += "-\t";
- line += addToLine( value1, df );
- line += "-\t";
- break;
- case 3:
- line += addToLine( value1, df );
- line += "-\t";
- line += "-\t";
- line += addToLine( value2, df );
- break;
- case 4:
- line += addToLine( value2, df );
- line += "-\t";
- line += "-\t";
- line += addToLine( value1, df );
- break;
- case 5:
- line += addToLine( value1, df );
- line += "-\t";
- line += addToLine( value2, df );
- line += addToLine( value3, df );
- break;
- case 6:
- line += addToLine( value1, df );
- line += "-\t";
- line += addToLine( value3, df );
- line += addToLine( value2, df );
- break;
- case 7:
- line += addToLine( value2, df );
- line += "-\t";
- line += addToLine( value1, df );
- line += addToLine( value3, df );
- break;
- case 8:
- line += addToLine( value3, df );
- line += "-\t";
- line += addToLine( value1, df );
- line += addToLine( value2, df );
- break;
- case 9:
- line += addToLine( value2, df );
- line += "-\t";
- line += addToLine( value3, df );
- line += addToLine( value1, df );
- break;
- case 10:
- line += addToLine( value3, df );
- line += "-\t";
- line += addToLine( value2, df );
- line += addToLine( value1, df );
- break;
- case 11:
- line += addToLine( value1, df );
- line += addToLine( value2, df );
- line += "-\t";
- line += addToLine( value3, df );
- break;
- case 12:
line += addToLine( value1, df );
- line += addToLine( value2, df );
- line += addToLine( value3, df );
- line += addToLine( value4, df );
- break;
- case 13:
- line += addToLine( value1, df );
- line += addToLine( value3, df );
- line += addToLine( value2, df );
- line += addToLine( value4, df );
- break;
- case 14:
- line += addToLine( value2, df );
- line += addToLine( value1, df );
- line += addToLine( value3, df );
- line += addToLine( value4, df );
- break;
- case 15:
- line += addToLine( value3, df );
- line += addToLine( value1, df );
- line += addToLine( value4, df );
- line += addToLine( value2, df );
- break;
- case 16:
- line += addToLine( value1, df );
- line += addToLine( value3, df );
- line += addToLine( value4, df );
- line += addToLine( value2, df );
- break;
- case 17:
- line += addToLine( value1, df );
- line += addToLine( value2, df );
- line += addToLine( value4, df );
- line += addToLine( value3, df );
break;
case 90:
line += addToLine( value1, df );
// Helper for addNameAndValues.
private final static String addToLine( final double value, final java.text.DecimalFormat df ) {
String s = "";
- if ( value != Tuplet.DEFAULT ) {
+ if ( value != ResultLine.DEFAULT ) {
s = df.format( value ) + "\t";
}
else {
return s;
}
- private static List<String> getAllExternalSequenceNames( final Phylogeny phy ) {
+ private final static List<String> getAllExternalSequenceNames( final Phylogeny phy ) throws RIOException {
final List<String> names = new ArrayList<String>();
for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
final PhylogenyNode n = iter.next();
names.add( n.getName() );
}
else {
- throw new IllegalArgumentException( "node has no (sequence) name: " + n );
+ throw new RIOException( "node has no (sequence) name: " + n );
}
}
return names;
}
- /**
- * Returns the order in which ortholog (o), "super ortholog" (s) and
- * distance (d) are returned and sorted (priority of sort always goes from
- * left to right), given sort. For the meaning of sort
- *
- * @see #inferredOrthologsToString(String,int,double,double)
- *
- * @param sort
- * determines order and sort priority
- * @return String indicating the order
- */
- public final static String getOrder( final int sort ) {
- String order = "";
- switch ( sort ) {
- case 0:
- order = "orthologies";
- break;
- case 1:
- order = "orthologies > super orthologies";
- break;
- case 2:
- order = "super orthologies > orthologies";
- break;
- case 3:
- order = "orthologies > distance to query";
- break;
- case 4:
- order = "distance to query > orthologies";
- break;
- case 5:
- order = "orthologies > super orthologies > distance to query";
- break;
- case 6:
- order = "orthologies > distance to query > super orthologies";
- break;
- case 7:
- order = "super orthologies > orthologies > distance to query";
- break;
- case 8:
- order = "super orthologies > distance to query > orthologies";
- break;
- case 9:
- order = "distance to query > orthologies > super orthologies";
- break;
- case 10:
- order = "distance to query > super orthologies > orthologies";
- break;
- case 11:
- order = "orthologies > subtree neighbors > distance to query";
- break;
- case 12:
- order = "orthologies > subtree neighbors > super orthologies > distance to query";
- break;
- case 13:
- order = "orthologies > super orthologies > subtree neighbors > distance to query";
- break;
- case 14:
- order = "subtree neighbors > orthologies > super orthologies > distance to query";
- break;
- case 15:
- order = "subtree neighbors > distance to query > orthologies > super orthologies";
- break;
- case 16:
- order = "orthologies > distance to query > subtree neighbors > super orthologies";
- break;
- case 17:
- order = "orthologies > subtree neighbors > distance to query > super orthologies";
- break;
- default:
- order = "orthologies";
- break;
+ private final static List<PhylogenyNode> getNodesViaSequenceName( final Phylogeny phy, final String seq_name ) {
+ final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
+ for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
+ final PhylogenyNode n = iter.next();
+ if ( n.getNodeData().isHasSequence() && n.getNodeData().getSequence().getName().equals( seq_name ) ) {
+ nodes.add( n );
+ }
+ if ( !n.getNodeData().isHasSequence() && n.getName().equals( seq_name ) ) {
+ nodes.add( n );
+ }
}
- return order;
- }
-
- public final static StringBuffer getOrderHelp() {
- final StringBuffer sb = new StringBuffer();
- sb.append( " 0: orthologies" + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 1: orthologies > super orthologies" + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 2: super orthologies > orthologies" + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 3: orthologies > distance to query" + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 4: distance to query > orthologies" + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 5: orthologies > super orthologies > distance to query" + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 6: orthologies > distance to query > super orthologies" + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 7: super orthologies > orthologies > distance to query" + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 8: super orthologies > distance to query > orthologies" + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 9: distance to query > orthologies > super orthologies" + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 10: distance to query > super orthologies > orthologies" + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 11: orthologies > subtree neighbors > distance to query" + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 12: orthologies > subtree neighbors > super orthologies > distance to query"
- + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 13: orthologies > super orthologies > subtree neighbors > distance to query"
- + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 14: subtree neighbors > orthologies > super orthologies > distance to query"
- + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 15: subtree neighbors > distance to query > orthologies > super orthologies"
- + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 16: orthologies > distance to query > subtree neighbors > super orthologies"
- + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 17: orthologies > subtree neighbors > distance to query > super orthologies"
- + ForesterUtil.LINE_SEPARATOR );
- return sb;
+ return nodes;
}
- private final static List<PhylogenyNode> getSubtreeNeighbors( final PhylogenyNode query, final int level ) {
- PhylogenyNode node = query;
- if ( !node.isExternal() ) {
- return null;
+ private final class ResultLine implements Comparable<ResultLine> {
+
+ public static final int DEFAULT = -999;
+ private final String _key;
+ private final double _value1;
+ private final double _value2;
+ private int[] _p;
+
+ ResultLine( final String name, final double value1, final double value2, final int c ) {
+ setSigns();
+ _key = name;
+ _value1 = value1;
+ _value2 = value2;
+ if ( ( c >= 0 ) && ( c <= 2 ) ) {
+ _p[ c ] = -1;
+ }
}
- if ( !node.isRoot() ) {
- node = node.getParent();
+
+ ResultLine( final String name, final double value1, final int c ) {
+ setSigns();
+ _key = name;
+ _value1 = value1;
+ _value2 = ResultLine.DEFAULT;
+ if ( c == 0 ) {
+ _p[ 0 ] = -1;
+ }
}
- if ( level == 2 ) {
- if ( !node.isRoot() ) {
- node = node.getParent();
+
+ @Override
+ public int compareTo( final ResultLine n ) {
+ if ( ( getValue1() != ResultLine.DEFAULT ) && ( n.getValue1() != ResultLine.DEFAULT ) ) {
+ if ( getValue1() < n.getValue1() ) {
+ return _p[ 0 ];
+ }
+ if ( getValue1() > n.getValue1() ) {
+ return ( -_p[ 0 ] );
+ }
+ }
+ if ( ( getValue2() != ResultLine.DEFAULT ) && ( n.getValue2() != ResultLine.DEFAULT ) ) {
+ if ( getValue2() < n.getValue2() ) {
+ return _p[ 1 ];
+ }
+ if ( getValue2() > n.getValue2() ) {
+ return ( -_p[ 1 ] );
+ }
}
+ return ( getKey().compareTo( n.getKey() ) );
}
- else {
- throw new IllegalArgumentException( "currently only supporting level 2 subtree neighbors " );
+
+ String getKey() {
+ return _key;
}
- final List<PhylogenyNode> sn = node.getAllExternalDescendants();
- sn.remove( query );
- return sn;
- }
+
+ double getValue1() {
+ return _value1;
+ }
+
+ double getValue2() {
+ return _value2;
+ }
+
+ private void setSigns() {
+ _p = new int[ 2 ];
+ _p[ 0 ] = _p[ 1 ] = +1;
+ }
+ } // ResultLine
}