- /**
- * Returns the time (in ms) needed to run "inferOrthologs". Final variable
- * TIME needs to be set to true.
- *
- * @return time (in ms) needed to run method "inferOrthologs"
- */
- public long getTime() {
- return _time;
- }
-
- /**
- * Infers the orthologs (as well the "super orthologs", the "subtree
- * neighbors", and the "ultra paralogs") for each external node of the gene
- * Trees in multiple tree File gene_trees_file (=output of PHYLIP NEIGHBOR,
- * for example). Tallies how many times each sequence is (super-)
- * orthologous towards the query. Tallies how many times each sequence is
- * ultra paralogous towards the query. Tallies how many times each sequence
- * is a subtree neighbor of the query. Gene duplications are inferred using
- * SDI. Modifies its argument species_tree. Is a little faster than
- * "inferOrthologs(File,Phylogeny)" since orthologs are only inferred for
- * query.
- * <p>
- * To obtain the results use the methods listed below.
- *
- * @param gene_trees_file
- * a File containing gene Trees in NH format, which is the result
- * of performing a bootstrap analysis in PHYLIP
- * @param species_tree
- * a species Phylogeny, which has species names in its species
- * fields
- * @param query
- * the sequence name of the squence whose orthologs are to be
- * inferred
- * @throws SDIException
- */
- public void inferOrthologs( final File gene_trees_file, final Phylogeny species_tree, final String query )
- throws IOException, SDIException {
- int bs = 0;
- if ( RIO.TIME ) {
- _time = System.currentTimeMillis();
- }
- // Read in first tree to get its sequence names
- // and strip species_tree.
- final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
- final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true );
- if ( p instanceof NHXParser ) {
- final NHXParser nhx = ( NHXParser ) p;
- nhx.setReplaceUnderscores( false );
- nhx.setIgnoreQuotes( true );
- nhx.setTaxonomyExtraction( PhylogenyMethods.TAXONOMY_EXTRACTION.YES );
- }
- final Phylogeny gene_tree = factory.create( gene_trees_file, p )[ 0 ];
- System.out.println( "species " + species_tree.toString() );
- // Removes from species_tree all species not found in gene_tree.
- PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( gene_tree, species_tree );
- PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gene_tree );
- _seq_names = getAllExternalSequenceNames( gene_tree );
- if ( ( _seq_names == null ) || ( _seq_names.size() < 1 ) ) {
- throw new IOException( "could not get sequence names" );
- }
- _o_hash_maps = new HashMap<String, HashMap<String, Integer>>();
- _so_hash_maps = new HashMap<String, HashMap<String, Integer>>();
- _up_hash_maps = new HashMap<String, HashMap<String, Integer>>();
- _o_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.size() ) );
- _so_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.size() ) );
- _up_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.size() ) );
- // Go through all gene trees in the file.
- final Phylogeny[] gene_trees = factory.create( gene_trees_file, p );
- final Phylogeny[] assigned_trees = new Phylogeny[ gene_trees.length ];
- int c = 0;
- for( final Phylogeny gt : gene_trees ) {
- bs++;
- // Removes from gene_tree all species not found in species_tree.
- PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gt );
- assigned_trees[ c++ ] = inferOrthologsHelper( gt, species_tree, query );
- // System.out.println( bs );
- }
- final IntMatrix m = calculateOrthologTable( assigned_trees );
- System.out.println( m.toString() );
- setNumberOfSamples( bs );
- if ( RIO.TIME ) {
- _time = ( System.currentTimeMillis() - _time );
- }
- }
-
- public List<PhylogenyNode> getNodesViaSequenceName( final Phylogeny phy, final String seq_name ) {
- final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
- for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
- final PhylogenyNode n = iter.next();
- if ( n.getNodeData().isHasSequence() && n.getNodeData().getSequence().getName().equals( seq_name ) ) {
- nodes.add( n );
- }
- if ( !n.getNodeData().isHasSequence() && n.getName().equals( seq_name ) ) {
- nodes.add( n );
- }
- }
- return nodes;
- }
-
- // Helper method which performs the actual ortholog inference for
- // the external node with seqname query.
- private Phylogeny inferOrthologsHelper( final Phylogeny gene_tree, final Phylogeny species_tree, final String query )
- throws SDIException {
- Phylogeny assigned_tree = null;
- List<PhylogenyNode> nodes = null;
- final SDIR sdiunrooted = new SDIR();
- List<PhylogenyNode> orthologs = null;
- List<PhylogenyNode> super_orthologs = null;
- List<PhylogenyNode> ultra_paralogs = null;
- assigned_tree = sdiunrooted.infer( gene_tree,
- species_tree,
- RIO.ROOT_BY_MINIMIZING_MAPPING_COST,
- RIO.ROOT_BY_MINIMIZING_SUM_OF_DUPS,
- RIO.ROOT_BY_MINIMIZING_TREE_HEIGHT,
- true,
- 1 )[ 0 ];
- setExtNodesOfAnalyzedGeneTrees( assigned_tree.getNumberOfExternalNodes() );
- nodes = getNodesViaSequenceName( assigned_tree, query );
- if ( nodes.size() > 1 ) {
- throw new IllegalArgumentException( "node named [" + query + "] not unique" );
- }
- else if ( nodes.isEmpty() ) {
- throw new IllegalArgumentException( "no node containing a sequence named [" + query + "] found" );
- }
- final PhylogenyNode query_node = nodes.get( 0 );
- orthologs = PhylogenyMethods.getOrthologousNodes( assigned_tree, query_node );
- updateHash( _o_hash_maps, query, orthologs );
- super_orthologs = PhylogenyMethods.getSuperOrthologousNodes( query_node );
- updateHash( _so_hash_maps, query, super_orthologs );
- ultra_paralogs = PhylogenyMethods.getUltraParalogousNodes( query_node );
- updateHash( _up_hash_maps, query, ultra_paralogs );
- return assigned_tree;
- }
-
- /**
- * Returns an ArrayList containg the names of orthologs of the PhylogenyNode
- * with seq name seq_name.
- *
- * @param seq_name
- * sequence name of a external node of the gene trees
- * @param threshold_orthologs
- * the minimal number of observations for a a sequence to be
- * reported as orthologous as percentage (0.0-100.0%)
- * @return ArrayList containg the names of orthologs of the PhylogenyNode
- * with seq name seq_name
- */
- public ArrayList<String> inferredOrthologsToArrayList( final String seq_name, double threshold_orthologs ) {
- HashMap<String, Integer> o_hashmap = null;
- String name = null;
- double o = 0.0;
- final ArrayList<String> arraylist = new ArrayList<String>();
- if ( _o_hash_maps == null ) {
- throw new RuntimeException( "Orthologs have not been calculated (successfully)." );
- }
- if ( threshold_orthologs < 0.0 ) {
- threshold_orthologs = 0.0;
- }
- else if ( threshold_orthologs > 100.0 ) {
- threshold_orthologs = 100.0;
- }
- o_hashmap = getInferredOrthologs( seq_name );
- if ( o_hashmap == null ) {
- throw new RuntimeException( "Orthologs for " + seq_name + " were not established." );
- }
- if ( _seq_names.size() > 0 ) {
- I: for( int i = 0; i < _seq_names.size(); ++i ) {
- name = _seq_names.get( i );
- if ( name.equals( seq_name ) ) {
- continue I;
- }
- o = getBootstrapValueFromHash( o_hashmap, name );
- if ( o < threshold_orthologs ) {
- continue I;
- }
- arraylist.add( name );
- }
- }
- return arraylist;