private HashMap<String, HashMap<String, Integer>> _so_maps;
private HashMap<String, HashMap<String, Integer>> _up_maps;
private List<String> _seq_names;
+ private List<PhylogenyNode> _removed_gene_tree_nodes;
private int _samples;
private int _ext_nodes;
* Default constructor.
* @throws SDIException
* @throws IOException
- * @throws RioException
+ * @throws RIOException
*/
public RIO( final File gene_trees_file, final Phylogeny species_tree, final String query ) throws IOException,
- SDIException, RioException {
+ SDIException, RIOException {
if ( ForesterUtil.isEmpty( query ) ) {
throw new IllegalArgumentException( "query is empty" );
}
}
public RIO( final File gene_trees_file, final Phylogeny species_tree ) throws IOException, SDIException,
- RioException {
+ RIOException {
init();
inferOrthologs( gene_trees_file, species_tree, null );
}
* the sequence name of the squence whose orthologs are to be
* inferred
* @throws SDIException
- * @throws RioException
+ * @throws RIOException
* @throws IOException
* @throws FileNotFoundException
*/
private final void inferOrthologs( final File gene_trees_file, final Phylogeny species_tree, final String query )
- throws SDIException, RioException, FileNotFoundException, IOException {
+ throws SDIException, RIOException, FileNotFoundException, IOException {
// Read in first tree to get its sequence names
// and strip species_tree.
final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
}
final Phylogeny[] gene_trees = factory.create( gene_trees_file, p );
// Removes from species_tree all species not found in gene_tree.
- PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( gene_trees[ 0 ], species_tree );
+ List<PhylogenyNode> _removed_gene_tree_nodes = PhylogenyMethods
+ .taxonomyBasedDeletionOfExternalNodes( gene_trees[ 0 ], species_tree );
if ( species_tree.isEmpty() ) {
- throw new RioException( "failed to establish species based mapping between gene and species trees" );
+ throw new RIOException( "failed to establish species based mapping between gene and species trees" );
}
if ( !ForesterUtil.isEmpty( query ) ) {
PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gene_trees[ 0 ] );
if ( gene_trees[ 0 ].isEmpty() ) {
- throw new RioException( "failed to establish species based mapping between gene and species trees" );
+ throw new RIOException( "failed to establish species based mapping between gene and species trees" );
}
_seq_names = getAllExternalSequenceNames( gene_trees[ 0 ] );
if ( ( _seq_names == null ) || ( _seq_names.size() < 1 ) ) {
- throw new RioException( "could not get sequence names" );
+ throw new RIOException( "could not get sequence names" );
}
_o_maps = new HashMap<String, HashMap<String, Integer>>();
_so_maps = new HashMap<String, HashMap<String, Integer>>();
}
_analyzed_gene_trees = new Phylogeny[ gene_trees.length ];
int c = 0;
+ int gene_tree_ext_nodes = 0;
for( final Phylogeny gt : gene_trees ) {
// Removes from gene_tree all species not found in species_tree.
PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gt );
if ( gt.isEmpty() ) {
- throw new RioException( "failed to establish species based mapping between gene and species trees" );
+ throw new RIOException( "failed to establish species based mapping between gene and species trees" );
}
- _analyzed_gene_trees[ c++ ] = inferOrthologsHelper( gt, species_tree, query );
+ if ( c == 0 ) {
+ gene_tree_ext_nodes = gt.getNumberOfExternalNodes();
+ }
+ else if ( gene_tree_ext_nodes != gt.getNumberOfExternalNodes() ) {
+ throw new RIOException( "(cleaned up) gene tree #" + ( c + 1 )
+ + " has a different number of external nodes (" + gt.getNumberOfExternalNodes()
+ + ") than those gene trees preceding it (" + gene_tree_ext_nodes + ")" );
+ }
+ _analyzed_gene_trees[ c++ ] = performOrthologInference( gt, species_tree, query );
}
setNumberOfSamples( gene_trees.length );
}
- // Helper method which performs the actual ortholog inference for
- // the external node with seqname query.
- private final Phylogeny inferOrthologsHelper( final Phylogeny gene_tree,
- final Phylogeny species_tree,
- final String query ) throws SDIException, RioException {
+ private final Phylogeny performOrthologInference( final Phylogeny gene_tree,
+ final Phylogeny species_tree,
+ final String query ) throws SDIException, RIOException {
final SDIR sdiunrooted = new SDIR();
final Phylogeny assigned_tree = sdiunrooted.infer( gene_tree,
species_tree,
if ( !ForesterUtil.isEmpty( query ) ) {
final List<PhylogenyNode> nodes = getNodesViaSequenceName( assigned_tree, query );
if ( nodes.size() > 1 ) {
- throw new RioException( "node named [" + query + "] not unique" );
+ throw new RIOException( "node named [" + query + "] not unique" );
}
else if ( nodes.isEmpty() ) {
- throw new RioException( "no node containing a sequence named [" + query + "] found" );
+ throw new RIOException( "no node containing a sequence named [" + query + "] found" );
}
final PhylogenyNode query_node = nodes.get( 0 );
updateCounts( _o_maps, query, PhylogenyMethods.getOrthologousNodes( assigned_tree, query_node ) );
}
}
- public final static IntMatrix calculateOrthologTable( final Phylogeny[] analyzed_gene_trees ) throws RioException {
+ public final static IntMatrix calculateOrthologTable( final Phylogeny[] analyzed_gene_trees ) throws RIOException {
final List<String> labels = new ArrayList<String>();
final Set<String> labels_set = new HashSet<String>();
String label;
final String mx = m.getLabel( x );
final PhylogenyNode nx = map.get( mx );
if ( nx == null ) {
- throw new RioException( "node \"" + mx + "\" not present in gene tree #" + counter );
+ throw new RIOException( "node \"" + mx + "\" not present in gene tree #" + counter );
}
+ String my;
+ PhylogenyNode ny;
for( int y = 0; y < m.size(); ++y ) {
- final String my = m.getLabel( y );
- final PhylogenyNode ny = map.get( my );
+ my = m.getLabel( y );
+ ny = map.get( my );
if ( ny == null ) {
- throw new RioException( "node \"" + my + "\" not present in gene tree #" + counter );
+ throw new RIOException( "node \"" + my + "\" not present in gene tree #" + counter );
}
if ( !PhylogenyMethods.calculateLCAonTreeWithIdsInPreOrder( nx, ny ).isDuplication() ) {
m.inreaseByOne( x, y );
return s;
}
- private final static List<String> getAllExternalSequenceNames( final Phylogeny phy ) throws RioException {
+ private final static List<String> getAllExternalSequenceNames( final Phylogeny phy ) throws RIOException {
final List<String> names = new ArrayList<String>();
for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
final PhylogenyNode n = iter.next();
names.add( n.getName() );
}
else {
- throw new RioException( "node has no (sequence) name: " + n );
+ throw new RIOException( "node has no (sequence) name: " + n );
}
}
return names;
return nodes;
}
+ public final List<PhylogenyNode> getRemovedGeneTreeNodes() {
+ return _removed_gene_tree_nodes;
+ }
+
private final class ResultLine implements Comparable<ResultLine> {
public static final int DEFAULT = -999;