2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
7 // Copyright (C) 2000-2001 Washington University School of Medicine
8 // and Howard Hughes Medical Institute
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
25 // Contact: phylosoft @ gmail . com
26 // WWW: www.phylosoft.org/forester
28 package org.forester.sdi;
31 import java.io.IOException;
32 import java.util.ArrayList;
33 import java.util.Arrays;
34 import java.util.HashMap;
35 import java.util.HashSet;
36 import java.util.List;
39 import org.forester.datastructures.IntMatrix;
40 import org.forester.evoinference.matrix.distance.DistanceMatrix;
41 import org.forester.io.parsers.PhylogenyParser;
42 import org.forester.io.parsers.SymmetricalDistanceMatrixParser;
43 import org.forester.io.parsers.nhx.NHXParser;
44 import org.forester.io.parsers.util.ParserUtils;
45 import org.forester.phylogeny.Phylogeny;
46 import org.forester.phylogeny.PhylogenyMethods;
47 import org.forester.phylogeny.PhylogenyNode;
48 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
49 import org.forester.phylogeny.factories.PhylogenyFactory;
50 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
51 import org.forester.util.ForesterUtil;
54 * @author Christian M. Zmasek
56 public final class RIO {
58 private final static boolean ROOT_BY_MINIMIZING_MAPPING_COST = false;
59 private final static boolean ROOT_BY_MINIMIZING_SUM_OF_DUPS = true;
60 private final static boolean ROOT_BY_MINIMIZING_TREE_HEIGHT = true;
61 private final static boolean TIME = false;
62 private HashMap<String, HashMap<String, Integer>> _o_hash_maps;
63 private HashMap<String, HashMap<String, Integer>> _so_hash_maps;
64 private HashMap<String, HashMap<String, Integer>> _up_hash_maps;
65 private HashMap<String, HashMap<String, Integer>> _sn_hash_maps; // HashMap of HashMaps
66 private DistanceMatrix _m;
67 private HashMap<String, Double> _l;
68 private List<String> _seq_names;
69 private int _bootstraps;
70 private int _ext_nodes_;
74 * Default constructor.
80 public IntMatrix calculateOrthologTable( Phylogeny[] gene_trees ) {
81 List<String> labels = new ArrayList<String>();
82 Set<String> labels_set = new HashSet<String>();
84 for( PhylogenyNode n : gene_trees[ 0 ].getExternalNodes() ) {
85 if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getName() ) ) {
86 label = n.getNodeData().getSequence().getName();
88 else if ( n.getNodeData().isHasSequence()
89 && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getSymbol() ) ) {
90 label = n.getNodeData().getSequence().getSymbol();
92 else if ( !ForesterUtil.isEmpty( n.getName() ) ) {
96 throw new IllegalArgumentException( "node " + n + " has no appropriate label" );
98 if ( labels_set.contains( label ) ) {
99 throw new IllegalArgumentException( "label " + label + " is not unique" );
101 labels_set.add( label );
104 IntMatrix m = new IntMatrix( labels );
106 for( Phylogeny gt : gene_trees ) {
107 System.out.println( counter );
109 for( int x = 0; x < m.size(); ++x ) {
110 PhylogenyNode nx = gt.getNode( m.getLabel( x ) );
111 for( int y = 0; y < m.size(); ++y ) {
112 PhylogenyNode ny = gt.getNode( m.getLabel( y ) );
113 if ( PhylogenyMethods.isAreOrthologous( nx, ny ) ) {
114 m.set( x, y, m.get( x, y ) + 1 );
115 //System.out.println( x + " " + y );
124 * Returns the numbers of trees analyzed.
126 * @return the numbers of trees analyzed
128 public final int getBootstraps() {
132 // Helper method for inferredOrthologsToString.
133 // inferredOrthologsToArrayList,
134 // and inferredUltraParalogsToString.
135 private final double getBootstrapValueFromHash( final HashMap<String, Integer> h, final String name ) {
136 if ( !h.containsKey( name ) ) {
139 final int i = h.get( name );
140 return ( i * 100.0 / getBootstraps() );
144 * Returns the distance to a sequences/taxa after a distance list file has
145 * been read in with readDistanceList(File). Throws an exception if name is
146 * not found or if no list has been read in.
151 public final double getDistance( String name ) {
152 double distance = 0.0;
155 throw new RuntimeException( "Distance list has probably not been read in (successfully)." );
157 if ( _l.get( name ) == null ) {
158 throw new IllegalArgumentException( name + " not found." );
160 distance = ( _l.get( name ) ).doubleValue();
164 public final double getDistance( final String name1, final String name2 ) {
166 return _m.getValue( _m.getIndex( name1 ), _m.getIndex( name2 ) );
168 catch ( final Exception e ) {
174 * Returns the numbers of number of ext nodes in gene trees analyzed (after
177 * @return number of ext nodes in gene trees analyzed (after stripping)
179 public final int getExtNodesOfAnalyzedGeneTrees() {
184 * Returns a HashMap containing the inferred orthologs of the external gene
185 * tree node with the sequence name seq_name. Sequence names are the keys
186 * (String), numbers of observations are the values (Int). Orthologs are to
187 * be inferred by method "inferOrthologs". Throws an exception if seq_name
191 * sequence name of a external node of the gene trees
192 * @return HashMap containing the inferred orthologs
193 * (name(String)->value(Int))
195 public final HashMap<String, Integer> getInferredOrthologs( final String seq_name ) {
196 if ( _o_hash_maps == null ) {
199 return _o_hash_maps.get( seq_name );
202 private final HashMap<String, Integer> getInferredSubtreeNeighbors( final String seq_name ) {
203 if ( _sn_hash_maps == null ) {
206 return _sn_hash_maps.get( seq_name );
210 * Returns a HashMap containing the inferred "super orthologs" of the
211 * external gene tree node with the sequence name seq_name. Sequence names
212 * are the keys (String), numbers of observations are the values (Int).
213 * Super orthologs are to be inferred by method "inferOrthologs". Throws an
214 * exception if seq_name is not found.
217 * sequence name of a external node of the gene trees
218 * @return HashMap containing the inferred super orthologs
219 * (name(String)->value(Int))
221 public final HashMap<String, Integer> getInferredSuperOrthologs( final String seq_name ) {
222 if ( _so_hash_maps == null ) {
225 return _so_hash_maps.get( seq_name );
229 * Returns a HashMap containing the inferred "ultra paralogs" of the
230 * external gene tree node with the sequence name seq_name. Sequence names
231 * are the keys (String), numbers of observations are the values (Int).
232 * "ultra paralogs" are to be inferred by method "inferOrthologs". Throws an
233 * exception if seq_name is not found.
236 * sequence name of a external node of the gene trees
237 * @return HashMap containing the inferred ultra paralogs
238 * (name(String)->value(Int))
240 public final HashMap<String, Integer> getInferredUltraParalogs( final String seq_name ) {
241 if ( _up_hash_maps == null ) {
244 return _up_hash_maps.get( seq_name );
248 * Returns the time (in ms) needed to run "inferOrthologs". Final variable
249 * TIME needs to be set to true.
251 * @return time (in ms) needed to run method "inferOrthologs"
253 public long getTime() {
258 * Infers the orthologs (as well the "super orthologs", the "subtree
259 * neighbors", and the "ultra paralogs") for each external node of the gene
260 * Trees in multiple tree File gene_trees_file (=output of PHYLIP NEIGHBOR,
261 * for example). Tallies how many times each sequence is (super-)
262 * orthologous towards the query. Tallies how many times each sequence is
263 * ultra paralogous towards the query. Tallies how many times each sequence
264 * is a subtree neighbor of the query. Gene duplications are inferred using
265 * SDI. Modifies its argument species_tree. Is a little faster than
266 * "inferOrthologs(File,Phylogeny)" since orthologs are only inferred for
269 * To obtain the results use the methods listed below.
271 * @param gene_trees_file
272 * a File containing gene Trees in NH format, which is the result
273 * of performing a bootstrap analysis in PHYLIP
274 * @param species_tree
275 * a species Phylogeny, which has species names in its species
278 * the sequence name of the squence whose orthologs are to be
280 * @throws SDIException
282 public void inferOrthologs( final File gene_trees_file, final Phylogeny species_tree, final String query )
283 throws IOException, SDIException {
286 _time = System.currentTimeMillis();
288 // Read in first tree to get its sequence names
289 // and strip species_tree.
290 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
291 final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true );
292 if ( p instanceof NHXParser ) {
293 final NHXParser nhx = ( NHXParser ) p;
294 nhx.setReplaceUnderscores( false );
295 nhx.setIgnoreQuotes( true );
296 nhx.setTaxonomyExtraction( PhylogenyMethods.TAXONOMY_EXTRACTION.YES );
298 final Phylogeny gene_tree = factory.create( gene_trees_file, p )[ 0 ];
299 System.out.println( "species " + species_tree.toString() );
300 // Removes from species_tree all species not found in gene_tree.
301 PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( gene_tree, species_tree );
302 PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gene_tree );
303 _seq_names = getAllExternalSequenceNames( gene_tree );
304 if ( ( _seq_names == null ) || ( _seq_names.size() < 1 ) ) {
305 throw new IOException( "could not get sequence names" );
307 _o_hash_maps = new HashMap<String, HashMap<String, Integer>>();
308 _so_hash_maps = new HashMap<String, HashMap<String, Integer>>();
309 _up_hash_maps = new HashMap<String, HashMap<String, Integer>>();
310 _sn_hash_maps = new HashMap<String, HashMap<String, Integer>>();
311 _o_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.size() ) );
312 _so_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.size() ) );
313 _up_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.size() ) );
314 _sn_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.size() ) );
315 // Go through all gene trees in the file.
316 final Phylogeny[] gene_trees = factory.create( gene_trees_file, p );
317 Phylogeny[] assigned_trees = new Phylogeny[ gene_trees.length ];
319 for( final Phylogeny gt : gene_trees ) {
321 // Removes from gene_tree all species not found in species_tree.
322 PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gt );
323 assigned_trees[ c++ ] = inferOrthologsHelper( gt, species_tree, query );
324 // System.out.println( bs );
326 IntMatrix m = calculateOrthologTable( assigned_trees );
327 System.out.println( m.toString() );
330 _time = ( System.currentTimeMillis() - _time );
334 public List<PhylogenyNode> getNodesViaSequenceName( final Phylogeny phy, final String seq_name ) {
335 final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
336 for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
337 final PhylogenyNode n = iter.next();
338 if ( n.getNodeData().isHasSequence() && n.getNodeData().getSequence().getName().equals( seq_name ) ) {
341 if ( !n.getNodeData().isHasSequence() && n.getName().equals( seq_name ) ) {
348 // Helper method which performs the actual ortholog inference for
349 // the external node with seqname query.
350 private Phylogeny inferOrthologsHelper( final Phylogeny gene_tree, final Phylogeny species_tree, final String query )
351 throws SDIException {
352 Phylogeny assigned_tree = null;
353 List<PhylogenyNode> nodes = null;
354 final SDIR sdiunrooted = new SDIR();
355 List<PhylogenyNode> orthologs = null;
356 List<PhylogenyNode> super_orthologs = null;
357 List<PhylogenyNode> ultra_paralogs = null;
358 List<PhylogenyNode> subtree_neighbors = null;
359 assigned_tree = sdiunrooted.infer( gene_tree,
361 RIO.ROOT_BY_MINIMIZING_MAPPING_COST,
362 RIO.ROOT_BY_MINIMIZING_SUM_OF_DUPS,
363 RIO.ROOT_BY_MINIMIZING_TREE_HEIGHT,
366 setExtNodesOfAnalyzedGeneTrees( assigned_tree.getNumberOfExternalNodes() );
367 nodes = getNodesViaSequenceName( assigned_tree, query );
368 if ( nodes.size() > 1 ) {
369 throw new IllegalArgumentException( "node named [" + query + "] not unique" );
371 else if ( nodes.isEmpty() ) {
372 throw new IllegalArgumentException( "no node containing a sequence named [" + query + "] found" );
374 final PhylogenyNode query_node = nodes.get( 0 );
375 final PhylogenyMethods methods = PhylogenyMethods.getInstance();
376 orthologs = methods.getOrthologousNodes( assigned_tree, query_node );
377 updateHash( _o_hash_maps, query, orthologs );
378 super_orthologs = PhylogenyMethods.getSuperOrthologousNodes( query_node );
379 updateHash( _so_hash_maps, query, super_orthologs );
380 subtree_neighbors = getSubtreeNeighbors( query_node, 2 );
381 updateHash( _sn_hash_maps, query, subtree_neighbors );
382 ultra_paralogs = PhylogenyMethods.getUltraParalogousNodes( query_node );
383 updateHash( _up_hash_maps, query, ultra_paralogs );
384 return assigned_tree;
388 * Returns an ArrayList containg the names of orthologs of the PhylogenyNode
389 * with seq name seq_name.
392 * sequence name of a external node of the gene trees
393 * @param threshold_orthologs
394 * the minimal number of observations for a a sequence to be
395 * reported as orthologous as percentage (0.0-100.0%)
396 * @return ArrayList containg the names of orthologs of the PhylogenyNode
397 * with seq name seq_name
399 public ArrayList<String> inferredOrthologsToArrayList( final String seq_name, double threshold_orthologs ) {
400 HashMap<String, Integer> o_hashmap = null;
403 final ArrayList<String> arraylist = new ArrayList<String>();
404 if ( _o_hash_maps == null ) {
405 throw new RuntimeException( "Orthologs have not been calculated (successfully)." );
407 if ( threshold_orthologs < 0.0 ) {
408 threshold_orthologs = 0.0;
410 else if ( threshold_orthologs > 100.0 ) {
411 threshold_orthologs = 100.0;
413 o_hashmap = getInferredOrthologs( seq_name );
414 if ( o_hashmap == null ) {
415 throw new RuntimeException( "Orthologs for " + seq_name + " were not established." );
417 if ( _seq_names.size() > 0 ) {
418 I: for( int i = 0; i < _seq_names.size(); ++i ) {
419 name = _seq_names.get( i );
420 if ( name.equals( seq_name ) ) {
423 o = getBootstrapValueFromHash( o_hashmap, name );
424 if ( o < threshold_orthologs ) {
427 arraylist.add( name );
434 * Returns a String containg the names of orthologs of the PhylogenyNode
435 * with seq name query_name. The String also contains how many times a
436 * particular ortholog has been observed.
439 * The output order is (per line): Name, Ortholog, Subtree neighbor, Super
443 * The sort priority of this is determined by sort in the following manner:
446 * <li>1 : Ortholog, Super ortholog
447 * <li>2 : Super ortholog, Ortholog
448 * <li>3 : Ortholog, Distance
449 * <li>4 : Distance, Ortholog
450 * <li>5 : Ortholog, Super ortholog, Distance
451 * <li>6 : Ortholog, Distance, Super ortholog
452 * <li>7 : Super ortholog, Ortholog, Distance
453 * <li>8 : Super ortholog, Distance, Ortholog
454 * <li>9 : Distance, Ortholog, Super ortholog
455 * <li>10 : Distance, Super ortholog, Ortholog
456 * <li>11 : Ortholog, Subtree neighbor, Distance
457 * <li>12 : Ortholog, Subtree neighbor, Super ortholog, Distance (default)
458 * <li>13 : Ortholog, Super ortholog, Subtree neighbor, Distance
459 * <li>14 : Subtree neighbor, Ortholog, Super ortholog, Distance
460 * <li>15 : Subtree neighbor, Distance, Ortholog, Super ortholog
461 * <li>16 : Ortholog, Distance, Subtree neighbor, Super ortholog
462 * <li>17 : Ortholog, Subtree neighbor, Distance, Super ortholog
465 * Returns "-" if no putative orthologs have been found (given
466 * threshold_orthologs).
468 * Orthologs are to be inferred by method "inferOrthologs".
470 * (Last modified: 05/08/01)
473 * sequence name of a external node of the gene trees
475 * order and sort priority
476 * @param threshold_orthologs
477 * the minimal number of observations for a a sequence to be
478 * reported as orthologous, in percents (0.0-100.0%)
479 * @param threshold_subtreeneighborings
480 * the minimal number of observations for a a sequence to be
481 * reported as orthologous, in percents (0.0-100.0%)
482 * @return String containing the inferred orthologs, String containing "-"
483 * if no orthologs have been found null in case of error
484 * @see #inferOrthologs(File,Phylogeny,String)
485 * @see #inferOrthologs(Phylogeny[],Phylogeny)
486 * @see #inferOrthologs(File,Phylogeny)
487 * @see #getOrder(int)
489 public StringBuffer inferredOrthologsToString( final String query_name,
491 double threshold_orthologs,
492 double threshold_subtreeneighborings ) {
493 HashMap<String, Integer> o_hashmap = null;
494 HashMap<String, Integer> s_hashmap = null;
495 HashMap<String, Integer> n_hashmap = null;
497 double o = 0.0, // Orthologs.
498 s = 0.0, // Super orthologs.
499 sn = 0.0, // Subtree neighbors.
500 value1 = 0.0, value2 = 0.0, value3 = 0.0, value4 = 0.0, d = 0.0;
501 final ArrayList<Tuplet> nv = new ArrayList<Tuplet>();
502 if ( ( _o_hash_maps == null ) || ( _so_hash_maps == null ) || ( _sn_hash_maps == null ) ) {
503 throw new RuntimeException( "Orthologs have not been calculated (successfully)" );
505 if ( ( sort < 0 ) || ( sort > 17 ) ) {
508 if ( ( sort > 2 ) && ( _m == null ) && ( _l == null ) ) {
509 throw new RuntimeException( "Distance list or matrix have not been read in (successfully)" );
511 if ( threshold_orthologs < 0.0 ) {
512 threshold_orthologs = 0.0;
514 else if ( threshold_orthologs > 100.0 ) {
515 threshold_orthologs = 100.0;
517 if ( threshold_subtreeneighborings < 0.0 ) {
518 threshold_subtreeneighborings = 0.0;
520 else if ( threshold_subtreeneighborings > 100.0 ) {
521 threshold_subtreeneighborings = 100.0;
523 o_hashmap = getInferredOrthologs( query_name );
524 s_hashmap = getInferredSuperOrthologs( query_name );
525 n_hashmap = getInferredSubtreeNeighbors( query_name );
526 if ( ( o_hashmap == null ) || ( s_hashmap == null ) || ( n_hashmap == null ) ) {
527 throw new RuntimeException( "Orthologs for " + query_name + " were not established" );
529 final StringBuffer orthologs = new StringBuffer();
530 if ( _seq_names.size() > 0 ) {
531 I: for( int i = 0; i < _seq_names.size(); ++i ) {
532 name = _seq_names.get( i );
533 if ( name.equals( query_name ) ) {
536 o = getBootstrapValueFromHash( o_hashmap, name );
537 if ( o < threshold_orthologs ) {
540 sn = getBootstrapValueFromHash( n_hashmap, name );
541 if ( sn < threshold_subtreeneighborings ) {
544 s = getBootstrapValueFromHash( s_hashmap, name );
547 d = getDistance( query_name, name );
550 d = getDistance( name );
555 nv.add( new Tuplet( name, o, 5 ) );
558 nv.add( new Tuplet( name, o, s, 5 ) );
561 nv.add( new Tuplet( name, s, o, 5 ) );
564 nv.add( new Tuplet( name, o, d, 1 ) );
567 nv.add( new Tuplet( name, d, o, 0 ) );
570 nv.add( new Tuplet( name, o, s, d, 2 ) );
573 nv.add( new Tuplet( name, o, d, s, 1 ) );
576 nv.add( new Tuplet( name, s, o, d, 2 ) );
579 nv.add( new Tuplet( name, s, d, o, 1 ) );
582 nv.add( new Tuplet( name, d, o, s, 0 ) );
585 nv.add( new Tuplet( name, d, s, o, 0 ) );
588 nv.add( new Tuplet( name, o, sn, d, 2 ) );
591 nv.add( new Tuplet( name, o, sn, s, d, 3 ) );
594 nv.add( new Tuplet( name, o, s, sn, d, 3 ) );
597 nv.add( new Tuplet( name, sn, o, s, d, 3 ) );
600 nv.add( new Tuplet( name, sn, d, o, s, 1 ) );
603 nv.add( new Tuplet( name, o, d, sn, s, 1 ) );
606 nv.add( new Tuplet( name, o, sn, d, s, 2 ) );
609 nv.add( new Tuplet( name, o, 5 ) );
611 } // End of I for loop.
612 if ( ( nv != null ) && ( nv.size() > 0 ) ) {
613 orthologs.append( "[seq name]\t\t[ortho]\t[st-n]\t[sup-o]\t[dist]" + ForesterUtil.LINE_SEPARATOR );
614 final Tuplet[] nv_array = new Tuplet[ nv.size() ];
615 for( int j = 0; j < nv.size(); ++j ) {
616 nv_array[ j ] = nv.get( j );
618 Arrays.sort( nv_array );
619 for( int i = 0; i < nv_array.length; ++i ) {
620 name = nv_array[ i ].getKey();
621 value1 = nv_array[ i ].getValue1();
622 value2 = nv_array[ i ].getValue2();
623 value3 = nv_array[ i ].getValue3();
624 value4 = nv_array[ i ].getValue4();
625 orthologs.append( addNameAndValues( name, value1, value2, value3, value4, sort ) );
629 // No orthologs found.
630 if ( ( orthologs == null ) || ( orthologs.length() < 1 ) ) {
631 orthologs.append( "-" );
634 } // inferredOrthologsToString( String, int, double )
637 * Returns a String containg the names of orthologs of the PhylogenyNode
638 * with seq name query_name. The String also contains how many times a
639 * particular ortholog has been observed. Returns "-" if no putative
640 * orthologs have been found (given threshold_orthologs).
642 * Orthologs are to be inferred by method "inferOrthologs".
645 * sequence name of a external node of the gene trees
646 * @param return_dists
647 * @param threshold_ultra_paralogs
649 * @return String containing the inferred orthologs, String containing "-"
650 * if no orthologs have been found null in case of error
652 public String inferredUltraParalogsToString( final String query_name,
653 final boolean return_dists,
654 double threshold_ultra_paralogs ) {
655 HashMap<String, Integer> sp_hashmap = null;
656 String name = "", ultra_paralogs = "";
658 double sp = 0.0, value1 = 0.0, value2 = 0.0, d = 0.0;
659 final List<Tuplet> nv = new ArrayList<Tuplet>();
660 if ( threshold_ultra_paralogs < 1.0 ) {
661 threshold_ultra_paralogs = 1.0;
663 else if ( threshold_ultra_paralogs > 100.0 ) {
664 threshold_ultra_paralogs = 100.0;
666 if ( _up_hash_maps == null ) {
667 throw new RuntimeException( "Ultra paralogs have not been calculated (successfully)." );
669 if ( return_dists && ( _m == null ) && ( _l == null ) ) {
670 throw new RuntimeException( "Distance list or matrix have not been read in (successfully)." );
672 sp_hashmap = getInferredUltraParalogs( query_name );
673 if ( sp_hashmap == null ) {
674 throw new RuntimeException( "Ultra paralogs for " + query_name + " were not established" );
676 if ( _seq_names.size() > 0 ) {
677 I: for( int i = 0; i < _seq_names.size(); ++i ) {
678 name = _seq_names.get( i );
679 if ( name.equals( query_name ) ) {
682 sp = getBootstrapValueFromHash( sp_hashmap, name );
683 if ( sp < threshold_ultra_paralogs ) {
686 if ( return_dists ) {
688 d = getDistance( query_name, name );
691 d = getDistance( name );
693 nv.add( new Tuplet( name, sp, d, 1 ) );
696 nv.add( new Tuplet( name, sp, 5 ) );
698 } // End of I for loop.
699 if ( ( nv != null ) && ( nv.size() > 0 ) ) {
700 final Tuplet[] nv_array = new Tuplet[ nv.size() ];
701 for( int j = 0; j < nv.size(); ++j ) {
702 nv_array[ j ] = nv.get( j );
704 Arrays.sort( nv_array );
705 if ( return_dists ) {
711 for( int i = 0; i < nv_array.length; ++i ) {
712 name = nv_array[ i ].getKey();
713 value1 = nv_array[ i ].getValue1();
714 value2 = nv_array[ i ].getValue2();
715 ultra_paralogs += addNameAndValues( name, value1, value2, 0.0, 0.0, sort );
719 // No ultra paralogs found.
720 if ( ( ultra_paralogs == null ) || ( ultra_paralogs.length() < 1 ) ) {
721 ultra_paralogs = "-";
723 return ultra_paralogs;
726 public final void readDistanceMatrix( final File matrix_file ) throws IOException {
727 DistanceMatrix[] matrices = null;
728 final SymmetricalDistanceMatrixParser parser = SymmetricalDistanceMatrixParser.createInstance();
729 matrices = parser.parse( matrix_file );
730 if ( ( matrices == null ) || ( matrices.length == 0 ) ) {
731 throw new IOException( "failed to parse distance matrix from [" + matrix_file + "]" );
733 if ( matrices.length > 1 ) {
734 throw new IOException( "[" + matrix_file + "] contains more than once distance matrix" );
740 * Brings this into the same state as immediately after construction.
742 private final void reset() {
744 _so_hash_maps = null;
745 _up_hash_maps = null;
755 * Sets the numbers of trees analyzed.
757 * numbers of trees analyzed
759 private void setBootstraps( int i ) {
767 * Sets number of ext nodes in gene trees analyzed (after stripping).
769 * number of ext nodes in gene trees analyzed (after stripping)
771 private void setExtNodesOfAnalyzedGeneTrees( int i ) {
778 // Helper for doInferOrthologs( Phylogeny, Phylogeny, String )
779 // and doInferOrthologs( Phylogeny, Phylogeny ).
780 private void updateHash( final HashMap<String, HashMap<String, Integer>> counter_map,
781 final String query_seq_name,
782 final List<PhylogenyNode> nodes ) {
783 final HashMap<String, Integer> hash_map = counter_map.get( query_seq_name );
784 if ( hash_map == null ) {
785 throw new RuntimeException( "Unexpected failure in method updateHash." );
787 for( int j = 0; j < nodes.size(); ++j ) {
789 if ( ( nodes.get( j ) ).getNodeData().isHasSequence()
790 && !ForesterUtil.isEmpty( ( nodes.get( j ) ).getNodeData().getSequence().getName() ) ) {
791 seq_name = ( nodes.get( j ) ).getNodeData().getSequence().getName();
794 seq_name = ( nodes.get( j ) ).getName();
796 if ( hash_map.containsKey( seq_name ) ) {
797 hash_map.put( seq_name, hash_map.get( seq_name ) + 1 );
800 hash_map.put( seq_name, 1 );
805 // Helper method for inferredOrthologsToString
806 // and inferredUltraParalogsToString.
807 private final static String addNameAndValues( final String name,
813 final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.#####" );
814 df.setDecimalSeparatorAlwaysShown( false );
816 if ( name.length() < 8 ) {
817 line += ( name + "\t\t\t" );
819 else if ( name.length() < 16 ) {
820 line += ( name + "\t\t" );
823 line += ( name + "\t" );
827 line += addToLine( value1, df );
833 line += addToLine( value1, df );
835 line += addToLine( value2, df );
839 line += addToLine( value2, df );
841 line += addToLine( value1, df );
845 line += addToLine( value1, df );
848 line += addToLine( value2, df );
851 line += addToLine( value2, df );
854 line += addToLine( value1, df );
857 line += addToLine( value1, df );
859 line += addToLine( value2, df );
860 line += addToLine( value3, df );
863 line += addToLine( value1, df );
865 line += addToLine( value3, df );
866 line += addToLine( value2, df );
869 line += addToLine( value2, df );
871 line += addToLine( value1, df );
872 line += addToLine( value3, df );
875 line += addToLine( value3, df );
877 line += addToLine( value1, df );
878 line += addToLine( value2, df );
881 line += addToLine( value2, df );
883 line += addToLine( value3, df );
884 line += addToLine( value1, df );
887 line += addToLine( value3, df );
889 line += addToLine( value2, df );
890 line += addToLine( value1, df );
893 line += addToLine( value1, df );
894 line += addToLine( value2, df );
896 line += addToLine( value3, df );
899 line += addToLine( value1, df );
900 line += addToLine( value2, df );
901 line += addToLine( value3, df );
902 line += addToLine( value4, df );
905 line += addToLine( value1, df );
906 line += addToLine( value3, df );
907 line += addToLine( value2, df );
908 line += addToLine( value4, df );
911 line += addToLine( value2, df );
912 line += addToLine( value1, df );
913 line += addToLine( value3, df );
914 line += addToLine( value4, df );
917 line += addToLine( value3, df );
918 line += addToLine( value1, df );
919 line += addToLine( value4, df );
920 line += addToLine( value2, df );
923 line += addToLine( value1, df );
924 line += addToLine( value3, df );
925 line += addToLine( value4, df );
926 line += addToLine( value2, df );
929 line += addToLine( value1, df );
930 line += addToLine( value2, df );
931 line += addToLine( value4, df );
932 line += addToLine( value3, df );
935 line += addToLine( value1, df );
939 line += addToLine( value1, df );
940 line += addToLine( value2, df );
943 line += ForesterUtil.LINE_SEPARATOR;
947 // Helper for addNameAndValues.
948 private final static String addToLine( final double value, final java.text.DecimalFormat df ) {
950 if ( value != Tuplet.DEFAULT ) {
951 s = df.format( value ) + "\t";
959 private static List<String> getAllExternalSequenceNames( final Phylogeny phy ) {
960 final List<String> names = new ArrayList<String>();
961 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
962 final PhylogenyNode n = iter.next();
963 if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getName() ) ) {
964 names.add( n.getNodeData().getSequence().getName() );
966 else if ( !ForesterUtil.isEmpty( n.getName() ) ) {
967 names.add( n.getName() );
970 throw new IllegalArgumentException( "node has no (sequence) name: " + n );
977 * Returns the order in which ortholog (o), "super ortholog" (s) and
978 * distance (d) are returned and sorted (priority of sort always goes from
979 * left to right), given sort. For the meaning of sort
981 * @see #inferredOrthologsToString(String,int,double,double)
984 * determines order and sort priority
985 * @return String indicating the order
987 public final static String getOrder( final int sort ) {
991 order = "orthologies";
994 order = "orthologies > super orthologies";
997 order = "super orthologies > orthologies";
1000 order = "orthologies > distance to query";
1003 order = "distance to query > orthologies";
1006 order = "orthologies > super orthologies > distance to query";
1009 order = "orthologies > distance to query > super orthologies";
1012 order = "super orthologies > orthologies > distance to query";
1015 order = "super orthologies > distance to query > orthologies";
1018 order = "distance to query > orthologies > super orthologies";
1021 order = "distance to query > super orthologies > orthologies";
1024 order = "orthologies > subtree neighbors > distance to query";
1027 order = "orthologies > subtree neighbors > super orthologies > distance to query";
1030 order = "orthologies > super orthologies > subtree neighbors > distance to query";
1033 order = "subtree neighbors > orthologies > super orthologies > distance to query";
1036 order = "subtree neighbors > distance to query > orthologies > super orthologies";
1039 order = "orthologies > distance to query > subtree neighbors > super orthologies";
1042 order = "orthologies > subtree neighbors > distance to query > super orthologies";
1045 order = "orthologies";
1051 public final static StringBuffer getOrderHelp() {
1052 final StringBuffer sb = new StringBuffer();
1053 sb.append( " 0: orthologies" + ForesterUtil.LINE_SEPARATOR );
1054 sb.append( " 1: orthologies > super orthologies" + ForesterUtil.LINE_SEPARATOR );
1055 sb.append( " 2: super orthologies > orthologies" + ForesterUtil.LINE_SEPARATOR );
1056 sb.append( " 3: orthologies > distance to query" + ForesterUtil.LINE_SEPARATOR );
1057 sb.append( " 4: distance to query > orthologies" + ForesterUtil.LINE_SEPARATOR );
1058 sb.append( " 5: orthologies > super orthologies > distance to query" + ForesterUtil.LINE_SEPARATOR );
1059 sb.append( " 6: orthologies > distance to query > super orthologies" + ForesterUtil.LINE_SEPARATOR );
1060 sb.append( " 7: super orthologies > orthologies > distance to query" + ForesterUtil.LINE_SEPARATOR );
1061 sb.append( " 8: super orthologies > distance to query > orthologies" + ForesterUtil.LINE_SEPARATOR );
1062 sb.append( " 9: distance to query > orthologies > super orthologies" + ForesterUtil.LINE_SEPARATOR );
1063 sb.append( " 10: distance to query > super orthologies > orthologies" + ForesterUtil.LINE_SEPARATOR );
1064 sb.append( " 11: orthologies > subtree neighbors > distance to query" + ForesterUtil.LINE_SEPARATOR );
1065 sb.append( " 12: orthologies > subtree neighbors > super orthologies > distance to query"
1066 + ForesterUtil.LINE_SEPARATOR );
1067 sb.append( " 13: orthologies > super orthologies > subtree neighbors > distance to query"
1068 + ForesterUtil.LINE_SEPARATOR );
1069 sb.append( " 14: subtree neighbors > orthologies > super orthologies > distance to query"
1070 + ForesterUtil.LINE_SEPARATOR );
1071 sb.append( " 15: subtree neighbors > distance to query > orthologies > super orthologies"
1072 + ForesterUtil.LINE_SEPARATOR );
1073 sb.append( " 16: orthologies > distance to query > subtree neighbors > super orthologies"
1074 + ForesterUtil.LINE_SEPARATOR );
1075 sb.append( " 17: orthologies > subtree neighbors > distance to query > super orthologies"
1076 + ForesterUtil.LINE_SEPARATOR );
1080 private final static List<PhylogenyNode> getSubtreeNeighbors( final PhylogenyNode query, final int level ) {
1081 PhylogenyNode node = query;
1082 if ( !node.isExternal() ) {
1085 if ( !node.isRoot() ) {
1086 node = node.getParent();
1089 if ( !node.isRoot() ) {
1090 node = node.getParent();
1094 throw new IllegalArgumentException( "currently only supporting level 2 subtree neighbors " );
1096 final List<PhylogenyNode> sn = node.getAllExternalDescendants();