//
public final static String PRG_NAME = "rio";
- public final static String PRG_VERSION = "5.000";
- public final static String PRG_DATE = "170411";
+ public final static String PRG_VERSION = "5.900";
+ public final static String PRG_DATE = "170420";
final static private String E_MAIL = "phyloxml@gmail.com";
final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester";
final static private String HELP_OPTION_1 = "help";
ForesterUtil.fatalError( e.getLocalizedMessage() );
}
if ( perform_id_mapping ) {
- System.out.println( "Id mappings in-dir :\t" + id_mapping_dir );
+ try {
+ System.out.println( "Id mappings in-dir :\t" + id_mapping_dir.getCanonicalPath() );
+ }
+ catch ( IOException e ) {
+ ForesterUtil.fatalError( e.getLocalizedMessage() );
+ }
System.out.println( "Id mappings suffix :\t" + id_mapping_suffix );
}
if ( use_dir ) {
log.print( "\t" );
log.print( "0.95 O GROUPS" );
log.print( "\t" );
+ if ( true ) { //TODO
+ log.print( "BEST TREE DUP" );
+ log.print( "\t" );
+ }
log.print( "MEDIAN DUP" );
log.print( "\t" );
log.print( "MEAN DUP" );
+ "=<suffix> : suffix for gene trees when operating on gene tree directories (default: "
+ GENE_TREES_SUFFIX_DEFAULT + ")" );
System.out.println( " -" + MAPPINGS_DIR_OPTION + "=<dir> : directory for id mapping files" );
- System.out.println( " -" + MAPPINGS_SUFFIX_OPTION + "=<suffix> : suffix for id mapping files (default: "
+ System.out.println( " -" + MAPPINGS_SUFFIX_OPTION + "=<suffix> : suffix for id mapping files (default: "
+ MAPPINGS_SUFFIX_DEFAULT + ")" );
System.out.println();
System.out.println( " Formats" );
System.out.println( " rio gene_trees.nh species.xml outtable.tsv log.txt" );
System.out.println( " rio -c=0.9 -f=10 -l=100 -r=none gene_trees.xml species.xml outtable.tsv log.txt" );
System.out.println( " rio -g=.xml gene_trees_dir species.xml out_dir log.tsv" );
- System.out.println( " rio -g=.xml -m=mappings -ms=.nim gene_trees_dir species.xml out_dir log.tsv" );
+ System.out.println( " rio -g=.mlt -m=id_maps_dir -ms=.nim -c=0.8 gene_trees_dir species.xml out_dir log.tsv" );
+ System.out.println( " rio -m=id_maps_dir -c=0.8 gene_trees_dir species.xml out_dir log.tsv" );
System.out.println();
System.exit( -1 );
}
new_node.setParent( sibling_parent );
sibling.setParent( new_node );
sibling_parent.setChildNode( sibling_index, new_node );
- final double new_dist = sibling.getDistanceToParent() == PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ? PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT
- : sibling.getDistanceToParent() / 2;
+ final double new_dist = sibling.getDistanceToParent() == PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT
+ ? PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT : sibling.getDistanceToParent() / 2;
new_node.setDistanceToParent( new_dist );
sibling.setDistanceToParent( new_dist );
externalNodesHaveChanged();
else {
double max = -Double.MAX_VALUE;
for( int i = 0; i < n.getNumberOfDescendants(); ++i ) {
- final double l = calculateSubtreeHeight( n.getChildNode( i ), take_collapse_into_account );
+ final double l = calculateSubtreeHeight( n.getChildNode( i ), take_collapse_into_account );
if ( l > max ) {
max = l;
}
}
- return max + ( n.getDistanceToParent() > 0 ? n.getDistanceToParent() : 0);
+ return max + ( n.getDistanceToParent() > 0 ? n.getDistanceToParent() : 0 );
}
}
if ( p.getNumberOfDescendants() == 2 ) {
final int pi = p.getChildNodeIndex();
if ( removed_node.isFirstChildNode() ) {
- p.getChildNode( 1 ).setDistanceToParent( PhylogenyMethods.addPhylogenyDistances( p
- .getDistanceToParent(), p.getChildNode( 1 ).getDistanceToParent() ) );
+ p.getChildNode( 1 )
+ .setDistanceToParent( PhylogenyMethods.addPhylogenyDistances( p.getDistanceToParent(),
+ p.getChildNode( 1 )
+ .getDistanceToParent() ) );
pp.setChildNode( pi, p.getChildNode( 1 ) );
}
else {
- p.getChildNode( 0 ).setDistanceToParent( PhylogenyMethods.addPhylogenyDistances( p
- .getDistanceToParent(), p.getChildNode( 0 ).getDistanceToParent() ) );
+ p.getChildNode( 0 )
+ .setDistanceToParent( PhylogenyMethods.addPhylogenyDistances( p.getDistanceToParent(),
+ p.getChildNode( 0 )
+ .getDistanceToParent() ) );
pp.setChildNode( pi, p.getChildNode( 0 ) );
}
}
*/
public List<PhylogenyNode> getExternalNodes() {
if ( _external_nodes_set == null ) {
- _external_nodes_set = new ArrayList<PhylogenyNode>();
+ _external_nodes_set = new ArrayList<>();
for( final PhylogenyNodeIterator it = iteratorPostorder(); it.hasNext(); ) {
final PhylogenyNode n = it.next();
if ( n.isExternal() ) {
return _external_nodes_set;
}
-
/**
* Returns the first external PhylogenyNode.
*/
*
* @return the height for rooted, tree-shaped phylogenies
*/
- public double calculateHeight(final boolean take_collapse_into_account) {
+ public double calculateHeight( final boolean take_collapse_into_account ) {
if ( isEmpty() ) {
return 0.0;
}
if ( isEmpty() ) {
return null;
}
- final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
+ final List<PhylogenyNode> nodes = new ArrayList<>();
for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) {
final PhylogenyNode n = iter.next();
if ( n.getName().equals( name ) ) {
if ( isEmpty() ) {
return null;
}
- final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
+ final List<PhylogenyNode> nodes = new ArrayList<>();
for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) {
final PhylogenyNode n = iter.next();
if ( n.getNodeData().isHasSequence() && n.getNodeData().getSequence().getName().equals( seq_name ) ) {
if ( isEmpty() ) {
return null;
}
- final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
+ final List<PhylogenyNode> nodes = new ArrayList<>();
for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) {
final PhylogenyNode n = iter.next();
if ( n.getNodeData().isHasSequence() && n.getNodeData().getSequence().getSymbol().equals( seq_name ) ) {
if ( isEmpty() ) {
return null;
}
- final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
+ final List<PhylogenyNode> nodes = new ArrayList<>();
for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) {
final PhylogenyNode n = iter.next();
if ( n.getNodeData().isHasSequence() && n.getNodeData().getSequence().getGeneName().equals( seq_name ) ) {
if ( isEmpty() ) {
return null;
}
- final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
+ final List<PhylogenyNode> nodes = new ArrayList<>();
for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) {
final PhylogenyNode n = iter.next();
if ( n.getNodeData().isHasTaxonomy()
if ( isEmpty() ) {
return null;
}
- final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
+ final List<PhylogenyNode> nodes = new ArrayList<>();
for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) {
final PhylogenyNode n = iter.next();
if ( PhylogenyMethods.getSpecies( n ).equals( specname ) ) {
public List<PhylogenyNode> getParalogousNodes( final PhylogenyNode n, final String[] taxonomyCodeRange ) {
PhylogenyNode node = n;
PhylogenyNode prev = null;
- final List<PhylogenyNode> v = new ArrayList<PhylogenyNode>();
- final Map<PhylogenyNode, List<String>> map = new HashMap<PhylogenyNode, List<String>>();
+ final List<PhylogenyNode> v = new ArrayList<>();
+ final Map<PhylogenyNode, List<String>> map = new HashMap<>();
getTaxonomyMap( getRoot(), map );
if ( !node.isExternal() || isEmpty() ) {
return null;
taxIdList = map.get( node );
if ( node.isDuplication() && isContains( taxIdList, taxonomyCodeRangeList ) ) {
if ( node.getChildNode1() == prev ) {
- v.addAll( getNodeByTaxonomyID( searchNodeSpeciesId, node.getChildNode2()
- .getAllExternalDescendants() ) );
+ v.addAll( getNodeByTaxonomyID( searchNodeSpeciesId,
+ node.getChildNode2().getAllExternalDescendants() ) );
}
else {
- v.addAll( getNodeByTaxonomyID( searchNodeSpeciesId, node.getChildNode1()
- .getAllExternalDescendants() ) );
+ v.addAll( getNodeByTaxonomyID( searchNodeSpeciesId,
+ node.getChildNode1().getAllExternalDescendants() ) );
}
}
}
public Collection<SequenceRelation.SEQUENCE_RELATION_TYPE> getRelevantSequenceRelationTypes() {
if ( _relevant_sequence_relation_types == null ) {
- _relevant_sequence_relation_types = new Vector<SEQUENCE_RELATION_TYPE>();
+ _relevant_sequence_relation_types = new Vector<>();
}
return _relevant_sequence_relation_types;
}
return true;
}
+ public boolean isCompletelyBinaryAllow3ChildrenAtRoot() {
+ if ( isEmpty() ) {
+ return false;
+ }
+ for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) {
+ final PhylogenyNode node = iter.next();
+ if ( node.isRoot() ) {
+ if ( node.isInternal()
+ && ( ( node.getNumberOfDescendants() != 2 ) && ( node.getNumberOfDescendants() != 3 ) ) ) {
+ return false;
+ }
+ }
+ else {
+ if ( node.isInternal() && ( node.getNumberOfDescendants() != 2 ) ) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
/**
* Checks whether a Phylogeny object is deleted (or empty).
*
}
else {
node.setDistanceToParent( ( c.getDistanceToParent() >= 0.0 ? c.getDistanceToParent() : 0.0 )
- + ( node.getDistanceToParent() >= 0.0 ? node.getDistanceToParent() : 0.0 ) );
+ + ( node.getDistanceToParent() >= 0.0 ? node.getDistanceToParent() : 0.0 ) );
}
if ( c.getBranchDataDirectly() != null ) {
node.setBranchData( ( BranchData ) c.getBranchDataDirectly().copy() );
* @return List node with the same taxonomy identifier
*/
private List<PhylogenyNode> getNodeByTaxonomyID( final String taxonomyID, final List<PhylogenyNode> nodes ) {
- final List<PhylogenyNode> retour = new ArrayList<PhylogenyNode>();
+ final List<PhylogenyNode> retour = new ArrayList<>();
for( final PhylogenyNode node : nodes ) {
if ( taxonomyID.equals( PhylogenyMethods.getTaxonomyIdentifier( node ) ) ) {
retour.add( node );
* @return species contains in all leaf under the param node
*/
private List<String> getSubNodeTaxonomy( final PhylogenyNode node ) {
- final List<String> taxonomyList = new ArrayList<String>();
+ final List<String> taxonomyList = new ArrayList<>();
final List<PhylogenyNode> childs = node.getAllExternalDescendants();
String speciesId = null;
for( final PhylogenyNode phylogenyNode : childs ) {
package org.forester.rio;
import java.io.File;
-import java.io.FilenameFilter;
import java.io.IOException;
import java.math.RoundingMode;
import java.util.ArrayList;
import java.util.SortedSet;
import java.util.TreeSet;
+import javax.swing.JOptionPane;
+
+import org.forester.archaeopteryx.AptxUtil;
import org.forester.datastructures.IntMatrix;
import org.forester.io.parsers.IteratingPhylogenyParser;
import org.forester.io.parsers.PhylogenyParser;
import org.forester.io.parsers.nexus.NexusPhylogeniesParser;
import org.forester.io.parsers.nhx.NHXParser;
import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
+import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
import org.forester.io.parsers.phyloxml.PhyloXmlParser;
import org.forester.io.parsers.util.ParserUtils;
import org.forester.io.writers.PhylogenyWriter;
import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyMethods;
import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY;
import org.forester.phylogeny.data.Sequence;
+import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
+import org.forester.phylogeny.factories.PhylogenyFactory;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
import org.forester.rio.RIO.REROOTING;
+import org.forester.sdi.GSDIR;
import org.forester.sdi.SDIException;
+import org.forester.sdi.SDIutil;
import org.forester.sdi.SDIutil.ALGORITHM;
import org.forester.util.BasicDescriptiveStatistics;
import org.forester.util.BasicTable;
else {
m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true );
}
+ ////////////////////////////////////////////
+ ////////////////////////////////////////////
+ //TODO
+ final boolean perform_gsdir_on_best_tree = true;
+ final File best_trees_dir = new File( "best_trees" );
+ final String best_trees_suffix = ".xml";
+ final GSDIR gsdir_for_best_tree;
+ if ( perform_gsdir_on_best_tree ) {
+ final Phylogeny best_tree = obtainTree( best_trees_dir, gene_trees_file.getName(), best_trees_suffix );
+ final Phylogeny species_tree = SDIutil
+ .parseSpeciesTree( best_tree, species_tree_file, false, true, TAXONOMY_EXTRACTION.NO );
+ PhylogenyMethods.deleteInternalNodesWithOnlyOneDescendent( species_tree );
+ best_tree.setRooted( true );
+ species_tree.setRooted( true );
+ if ( !best_tree.isCompletelyBinaryAllow3ChildrenAtRoot() ) {
+ throw new IOException( "gene tree matching to ["
+ + ForesterUtil.removeFileExtension( gene_trees_file.getName() )
+ + "] is not completely binary" );
+ }
+ final PhylogenyNodeIterator it = best_tree.iteratorExternalForward();
+ while ( it.hasNext() ) {
+ final PhylogenyNode n = it.next();
+ final String name = n.getName().trim();
+ if ( !ForesterUtil.isEmpty( name ) ) {
+ try {
+ ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE );
+ }
+ catch ( final PhyloXmlDataFormatException e ) {
+ // Ignore.
+ }
+ }
+ }
+ gsdir_for_best_tree = new GSDIR( best_tree, species_tree, true, true, true );
+ final Phylogeny result_gene_tree = gsdir_for_best_tree.getMinDuplicationsSumGeneTree();
+ System.out.println( gsdir_for_best_tree.getMinDuplicationsSum() );
+ result_gene_tree.setRerootable( false );
+ PhylogenyMethods.orderAppearance( result_gene_tree.getRoot(),
+ true,
+ true,
+ DESCENDANT_SORT_PRIORITY.NODE_NAME );
+ writeTree( result_gene_tree, new File( gene_trees_file.getName() + "____.xml" ), null, id_map );
+ }
+ else {
+ gsdir_for_best_tree = null;
+ }
+ ////////////////////////////////////////////
+ ////////////////////////////////////////////
final BasicDescriptiveStatistics stats = rio.getDuplicationsStatistics();
if ( perform_id_mapping ) {
writeOrthologyTable( orthology_outtable, stats.getN(), m, !use_gene_trees_dir, id_map, true );
final String prefix,
final String suffix )
throws IOException {
- if ( !dir.exists() ) {
- throw new IOException( "[" + dir + "] does not exist" );
- }
- if ( !dir.isDirectory() ) {
- throw new IOException( "[" + dir + "] is not a directory" );
- }
- final File mapping_files[] = dir.listFiles( new FilenameFilter() {
-
- @Override
- public boolean accept( final File dir, final String name ) {
- return ( name.endsWith( suffix ) );
- }
- } );
- if ( mapping_files.length == 1 ) {
- throw new IOException( "no files ending with \"" + suffix + "\" found in [" + dir + "]" );
- }
- String my_prefix = ForesterUtil.removeFileExtension( prefix );
- boolean done = false;
- boolean more_than_one = false;
- File the_one = null;
- do {
- int matches = 0;
- for( File file : mapping_files ) {
- if ( file.getName().startsWith( my_prefix ) ) {
- matches++;
- if ( matches > 1 ) {
- the_one = null;
- break;
- }
- the_one = file;
- }
- }
- if ( matches > 1 ) {
- more_than_one = true;
- done = true;
- }
- if ( matches == 1 ) {
- done = true;
- }
- else {
- if ( my_prefix.length() <= 1 ) {
- throw new IOException( "no file matching \"" + ForesterUtil.removeFileExtension( prefix )
- + "\" and ending with \"" + suffix + "\" found in [" + dir + "]" );
- }
- my_prefix = my_prefix.substring( 0, my_prefix.length() - 1 );
- }
- } while ( !done );
- if ( more_than_one ) {
- throw new IOException( "multiple files matching \"" + ForesterUtil.removeFileExtension( prefix )
- + "\" and ending with \"" + suffix + "\" found in [" + dir + "]" );
- }
- else if ( the_one != null ) {
- }
- else {
- throw new IOException( "no file matching \"" + ForesterUtil.removeFileExtension( prefix )
- + "\" and ending with \"" + suffix + "\" found in [" + dir + "]" );
- }
+ final File the_one = ForesterUtil.getMatchingFile( dir, prefix, suffix );
final BasicTable<String> t = BasicTableParser.parse( the_one, '\t' );
return t.getColumnsAsMap( 0, 1 );
}
+
+ private final static Phylogeny obtainTree( final File dir, final String prefix, final String suffix )
+ throws IOException {
+ final File the_one = ForesterUtil.getMatchingFile( dir, prefix, suffix );
+ final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+ return factory.create( the_one, PhyloXmlParser.createPhyloXmlParserXsdValidating() )[ 0 ];
+ }
}
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
+import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
private ForesterUtil() {
}
+
+ public final static File getMatchingFile( final File dir, final String prefix, final String suffix )
+ throws IOException {
+ if ( !dir.exists() ) {
+ throw new IOException( "[" + dir + "] does not exist" );
+ }
+ if ( !dir.isDirectory() ) {
+ throw new IOException( "[" + dir + "] is not a directory" );
+ }
+ final File mapping_files[] = dir.listFiles( new FilenameFilter() {
+
+ @Override
+ public boolean accept( final File dir, final String name ) {
+ return ( name.endsWith( suffix ) );
+ }
+ } );
+ if ( mapping_files.length == 1 ) {
+ throw new IOException( "no files ending with \"" + suffix + "\" found in [" + dir + "]" );
+ }
+ String my_prefix = removeFileExtension( prefix );
+ boolean done = false;
+ boolean more_than_one = false;
+ File the_one = null;
+ do {
+ int matches = 0;
+ for( File file : mapping_files ) {
+ if ( file.getName().startsWith( my_prefix ) ) {
+ matches++;
+ if ( matches > 1 ) {
+ the_one = null;
+ break;
+ }
+ the_one = file;
+ }
+ }
+ if ( matches > 1 ) {
+ more_than_one = true;
+ done = true;
+ }
+ if ( matches == 1 ) {
+ done = true;
+ }
+ else {
+ if ( my_prefix.length() <= 1 ) {
+ throw new IOException( "no file matching \"" + removeFileExtension( prefix )
+ + "\" and ending with \"" + suffix + "\" found in [" + dir + "]" );
+ }
+ my_prefix = my_prefix.substring( 0, my_prefix.length() - 1 );
+ }
+ } while ( !done );
+ if ( more_than_one ) {
+ throw new IOException( "multiple files matching \"" + removeFileExtension( prefix )
+ + "\" and ending with \"" + suffix + "\" found in [" + dir + "]" );
+ }
+ else if ( the_one != null ) {
+ }
+ else {
+ throw new IOException( "no file matching \"" + removeFileExtension( prefix ) + "\" and ending with \""
+ + suffix + "\" found in [" + dir + "]" );
+ }
+ return the_one;
+ }
}