From: cmzmasek Date: Sat, 22 Apr 2017 01:19:32 +0000 (-0700) Subject: in pprogress... X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=975cabe8898d1fa574380f5800c2d69d8f632b0d;p=jalview.git in pprogress... --- diff --git a/forester/java/src/org/forester/application/rio.java b/forester/java/src/org/forester/application/rio.java index 926441c..aa50639 100644 --- a/forester/java/src/org/forester/application/rio.java +++ b/forester/java/src/org/forester/application/rio.java @@ -42,8 +42,8 @@ public class rio { // public final static String PRG_NAME = "rio"; - public final static String PRG_VERSION = "5.000"; - public final static String PRG_DATE = "170411"; + public final static String PRG_VERSION = "5.900"; + public final static String PRG_DATE = "170420"; final static private String E_MAIL = "phyloxml@gmail.com"; final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"; final static private String HELP_OPTION_1 = "help"; @@ -344,7 +344,12 @@ public class rio { ForesterUtil.fatalError( e.getLocalizedMessage() ); } if ( perform_id_mapping ) { - System.out.println( "Id mappings in-dir :\t" + id_mapping_dir ); + try { + System.out.println( "Id mappings in-dir :\t" + id_mapping_dir.getCanonicalPath() ); + } + catch ( IOException e ) { + ForesterUtil.fatalError( e.getLocalizedMessage() ); + } System.out.println( "Id mappings suffix :\t" + id_mapping_suffix ); } if ( use_dir ) { @@ -515,6 +520,10 @@ public class rio { log.print( "\t" ); log.print( "0.95 O GROUPS" ); log.print( "\t" ); + if ( true ) { //TODO + log.print( "BEST TREE DUP" ); + log.print( "\t" ); + } log.print( "MEDIAN DUP" ); log.print( "\t" ); log.print( "MEAN DUP" ); @@ -650,7 +659,7 @@ public class rio { + "= : suffix for gene trees when operating on gene tree directories (default: " + GENE_TREES_SUFFIX_DEFAULT + ")" ); System.out.println( " -" + MAPPINGS_DIR_OPTION + "= : directory for id mapping files" ); - System.out.println( " -" + MAPPINGS_SUFFIX_OPTION + "= : suffix for id mapping files (default: " + System.out.println( " -" + MAPPINGS_SUFFIX_OPTION + "= : suffix for id mapping files (default: " + MAPPINGS_SUFFIX_DEFAULT + ")" ); System.out.println(); System.out.println( " Formats" ); @@ -667,7 +676,8 @@ public class rio { System.out.println( " rio gene_trees.nh species.xml outtable.tsv log.txt" ); System.out.println( " rio -c=0.9 -f=10 -l=100 -r=none gene_trees.xml species.xml outtable.tsv log.txt" ); System.out.println( " rio -g=.xml gene_trees_dir species.xml out_dir log.tsv" ); - System.out.println( " rio -g=.xml -m=mappings -ms=.nim gene_trees_dir species.xml out_dir log.tsv" ); + System.out.println( " rio -g=.mlt -m=id_maps_dir -ms=.nim -c=0.8 gene_trees_dir species.xml out_dir log.tsv" ); + System.out.println( " rio -m=id_maps_dir -c=0.8 gene_trees_dir species.xml out_dir log.tsv" ); System.out.println(); System.exit( -1 ); } diff --git a/forester/java/src/org/forester/phylogeny/Phylogeny.java b/forester/java/src/org/forester/phylogeny/Phylogeny.java index 4aefb5c..379a933 100644 --- a/forester/java/src/org/forester/phylogeny/Phylogeny.java +++ b/forester/java/src/org/forester/phylogeny/Phylogeny.java @@ -115,8 +115,8 @@ public class Phylogeny { new_node.setParent( sibling_parent ); sibling.setParent( new_node ); sibling_parent.setChildNode( sibling_index, new_node ); - final double new_dist = sibling.getDistanceToParent() == PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ? PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT - : sibling.getDistanceToParent() / 2; + final double new_dist = sibling.getDistanceToParent() == PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT + ? PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT : sibling.getDistanceToParent() / 2; new_node.setDistanceToParent( new_dist ); sibling.setDistanceToParent( new_dist ); externalNodesHaveChanged(); @@ -137,12 +137,12 @@ public class Phylogeny { else { double max = -Double.MAX_VALUE; for( int i = 0; i < n.getNumberOfDescendants(); ++i ) { - final double l = calculateSubtreeHeight( n.getChildNode( i ), take_collapse_into_account ); + final double l = calculateSubtreeHeight( n.getChildNode( i ), take_collapse_into_account ); if ( l > max ) { max = l; } } - return max + ( n.getDistanceToParent() > 0 ? n.getDistanceToParent() : 0); + return max + ( n.getDistanceToParent() > 0 ? n.getDistanceToParent() : 0 ); } } @@ -259,13 +259,17 @@ public class Phylogeny { if ( p.getNumberOfDescendants() == 2 ) { final int pi = p.getChildNodeIndex(); if ( removed_node.isFirstChildNode() ) { - p.getChildNode( 1 ).setDistanceToParent( PhylogenyMethods.addPhylogenyDistances( p - .getDistanceToParent(), p.getChildNode( 1 ).getDistanceToParent() ) ); + p.getChildNode( 1 ) + .setDistanceToParent( PhylogenyMethods.addPhylogenyDistances( p.getDistanceToParent(), + p.getChildNode( 1 ) + .getDistanceToParent() ) ); pp.setChildNode( pi, p.getChildNode( 1 ) ); } else { - p.getChildNode( 0 ).setDistanceToParent( PhylogenyMethods.addPhylogenyDistances( p - .getDistanceToParent(), p.getChildNode( 0 ).getDistanceToParent() ) ); + p.getChildNode( 0 ) + .setDistanceToParent( PhylogenyMethods.addPhylogenyDistances( p.getDistanceToParent(), + p.getChildNode( 0 ) + .getDistanceToParent() ) ); pp.setChildNode( pi, p.getChildNode( 0 ) ); } } @@ -320,7 +324,7 @@ public class Phylogeny { */ public List getExternalNodes() { if ( _external_nodes_set == null ) { - _external_nodes_set = new ArrayList(); + _external_nodes_set = new ArrayList<>(); for( final PhylogenyNodeIterator it = iteratorPostorder(); it.hasNext(); ) { final PhylogenyNode n = it.next(); if ( n.isExternal() ) { @@ -331,7 +335,6 @@ public class Phylogeny { return _external_nodes_set; } - /** * Returns the first external PhylogenyNode. */ @@ -352,7 +355,7 @@ public class Phylogeny { * * @return the height for rooted, tree-shaped phylogenies */ - public double calculateHeight(final boolean take_collapse_into_account) { + public double calculateHeight( final boolean take_collapse_into_account ) { if ( isEmpty() ) { return 0.0; } @@ -435,7 +438,7 @@ public class Phylogeny { if ( isEmpty() ) { return null; } - final List nodes = new ArrayList(); + final List nodes = new ArrayList<>(); for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); if ( n.getName().equals( name ) ) { @@ -449,7 +452,7 @@ public class Phylogeny { if ( isEmpty() ) { return null; } - final List nodes = new ArrayList(); + final List nodes = new ArrayList<>(); for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); if ( n.getNodeData().isHasSequence() && n.getNodeData().getSequence().getName().equals( seq_name ) ) { @@ -463,7 +466,7 @@ public class Phylogeny { if ( isEmpty() ) { return null; } - final List nodes = new ArrayList(); + final List nodes = new ArrayList<>(); for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); if ( n.getNodeData().isHasSequence() && n.getNodeData().getSequence().getSymbol().equals( seq_name ) ) { @@ -477,7 +480,7 @@ public class Phylogeny { if ( isEmpty() ) { return null; } - final List nodes = new ArrayList(); + final List nodes = new ArrayList<>(); for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); if ( n.getNodeData().isHasSequence() && n.getNodeData().getSequence().getGeneName().equals( seq_name ) ) { @@ -491,7 +494,7 @@ public class Phylogeny { if ( isEmpty() ) { return null; } - final List nodes = new ArrayList(); + final List nodes = new ArrayList<>(); for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); if ( n.getNodeData().isHasTaxonomy() @@ -516,7 +519,7 @@ public class Phylogeny { if ( isEmpty() ) { return null; } - final List nodes = new ArrayList(); + final List nodes = new ArrayList<>(); for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); if ( PhylogenyMethods.getSpecies( n ).equals( specname ) ) { @@ -615,8 +618,8 @@ public class Phylogeny { public List getParalogousNodes( final PhylogenyNode n, final String[] taxonomyCodeRange ) { PhylogenyNode node = n; PhylogenyNode prev = null; - final List v = new ArrayList(); - final Map> map = new HashMap>(); + final List v = new ArrayList<>(); + final Map> map = new HashMap<>(); getTaxonomyMap( getRoot(), map ); if ( !node.isExternal() || isEmpty() ) { return null; @@ -633,12 +636,12 @@ public class Phylogeny { taxIdList = map.get( node ); if ( node.isDuplication() && isContains( taxIdList, taxonomyCodeRangeList ) ) { if ( node.getChildNode1() == prev ) { - v.addAll( getNodeByTaxonomyID( searchNodeSpeciesId, node.getChildNode2() - .getAllExternalDescendants() ) ); + v.addAll( getNodeByTaxonomyID( searchNodeSpeciesId, + node.getChildNode2().getAllExternalDescendants() ) ); } else { - v.addAll( getNodeByTaxonomyID( searchNodeSpeciesId, node.getChildNode1() - .getAllExternalDescendants() ) ); + v.addAll( getNodeByTaxonomyID( searchNodeSpeciesId, + node.getChildNode1().getAllExternalDescendants() ) ); } } } @@ -647,7 +650,7 @@ public class Phylogeny { public Collection getRelevantSequenceRelationTypes() { if ( _relevant_sequence_relation_types == null ) { - _relevant_sequence_relation_types = new Vector(); + _relevant_sequence_relation_types = new Vector<>(); } return _relevant_sequence_relation_types; } @@ -702,6 +705,27 @@ public class Phylogeny { return true; } + public boolean isCompletelyBinaryAllow3ChildrenAtRoot() { + if ( isEmpty() ) { + return false; + } + for( final PhylogenyNodeIterator iter = iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + if ( node.isRoot() ) { + if ( node.isInternal() + && ( ( node.getNumberOfDescendants() != 2 ) && ( node.getNumberOfDescendants() != 3 ) ) ) { + return false; + } + } + else { + if ( node.isInternal() && ( node.getNumberOfDescendants() != 2 ) ) { + return false; + } + } + } + return true; + } + /** * Checks whether a Phylogeny object is deleted (or empty). * @@ -971,7 +995,7 @@ public class Phylogeny { } else { node.setDistanceToParent( ( c.getDistanceToParent() >= 0.0 ? c.getDistanceToParent() : 0.0 ) - + ( node.getDistanceToParent() >= 0.0 ? node.getDistanceToParent() : 0.0 ) ); + + ( node.getDistanceToParent() >= 0.0 ? node.getDistanceToParent() : 0.0 ) ); } if ( c.getBranchDataDirectly() != null ) { node.setBranchData( ( BranchData ) c.getBranchDataDirectly().copy() ); @@ -1164,7 +1188,7 @@ public class Phylogeny { * @return List node with the same taxonomy identifier */ private List getNodeByTaxonomyID( final String taxonomyID, final List nodes ) { - final List retour = new ArrayList(); + final List retour = new ArrayList<>(); for( final PhylogenyNode node : nodes ) { if ( taxonomyID.equals( PhylogenyMethods.getTaxonomyIdentifier( node ) ) ) { retour.add( node ); @@ -1182,7 +1206,7 @@ public class Phylogeny { * @return species contains in all leaf under the param node */ private List getSubNodeTaxonomy( final PhylogenyNode node ) { - final List taxonomyList = new ArrayList(); + final List taxonomyList = new ArrayList<>(); final List childs = node.getAllExternalDescendants(); String speciesId = null; for( final PhylogenyNode phylogenyNode : childs ) { diff --git a/forester/java/src/org/forester/rio/RIOUtil.java b/forester/java/src/org/forester/rio/RIOUtil.java index 8266047..03d2796 100644 --- a/forester/java/src/org/forester/rio/RIOUtil.java +++ b/forester/java/src/org/forester/rio/RIOUtil.java @@ -2,7 +2,6 @@ package org.forester.rio; import java.io.File; -import java.io.FilenameFilter; import java.io.IOException; import java.math.RoundingMode; import java.util.ArrayList; @@ -12,21 +11,31 @@ import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeSet; +import javax.swing.JOptionPane; + +import org.forester.archaeopteryx.AptxUtil; import org.forester.datastructures.IntMatrix; import org.forester.io.parsers.IteratingPhylogenyParser; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.nexus.NexusPhylogeniesParser; import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; +import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; +import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY; import org.forester.phylogeny.data.Sequence; +import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; +import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.rio.RIO.REROOTING; +import org.forester.sdi.GSDIR; import org.forester.sdi.SDIException; +import org.forester.sdi.SDIutil; import org.forester.sdi.SDIutil.ALGORITHM; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.BasicTable; @@ -122,6 +131,53 @@ public final class RIOUtil { else { m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true ); } + //////////////////////////////////////////// + //////////////////////////////////////////// + //TODO + final boolean perform_gsdir_on_best_tree = true; + final File best_trees_dir = new File( "best_trees" ); + final String best_trees_suffix = ".xml"; + final GSDIR gsdir_for_best_tree; + if ( perform_gsdir_on_best_tree ) { + final Phylogeny best_tree = obtainTree( best_trees_dir, gene_trees_file.getName(), best_trees_suffix ); + final Phylogeny species_tree = SDIutil + .parseSpeciesTree( best_tree, species_tree_file, false, true, TAXONOMY_EXTRACTION.NO ); + PhylogenyMethods.deleteInternalNodesWithOnlyOneDescendent( species_tree ); + best_tree.setRooted( true ); + species_tree.setRooted( true ); + if ( !best_tree.isCompletelyBinaryAllow3ChildrenAtRoot() ) { + throw new IOException( "gene tree matching to [" + + ForesterUtil.removeFileExtension( gene_trees_file.getName() ) + + "] is not completely binary" ); + } + final PhylogenyNodeIterator it = best_tree.iteratorExternalForward(); + while ( it.hasNext() ) { + final PhylogenyNode n = it.next(); + final String name = n.getName().trim(); + if ( !ForesterUtil.isEmpty( name ) ) { + try { + ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ); + } + catch ( final PhyloXmlDataFormatException e ) { + // Ignore. + } + } + } + gsdir_for_best_tree = new GSDIR( best_tree, species_tree, true, true, true ); + final Phylogeny result_gene_tree = gsdir_for_best_tree.getMinDuplicationsSumGeneTree(); + System.out.println( gsdir_for_best_tree.getMinDuplicationsSum() ); + result_gene_tree.setRerootable( false ); + PhylogenyMethods.orderAppearance( result_gene_tree.getRoot(), + true, + true, + DESCENDANT_SORT_PRIORITY.NODE_NAME ); + writeTree( result_gene_tree, new File( gene_trees_file.getName() + "____.xml" ), null, id_map ); + } + else { + gsdir_for_best_tree = null; + } + //////////////////////////////////////////// + //////////////////////////////////////////// final BasicDescriptiveStatistics stats = rio.getDuplicationsStatistics(); if ( perform_id_mapping ) { writeOrthologyTable( orthology_outtable, stats.getN(), m, !use_gene_trees_dir, id_map, true ); @@ -529,64 +585,15 @@ public final class RIOUtil { final String prefix, final String suffix ) throws IOException { - if ( !dir.exists() ) { - throw new IOException( "[" + dir + "] does not exist" ); - } - if ( !dir.isDirectory() ) { - throw new IOException( "[" + dir + "] is not a directory" ); - } - final File mapping_files[] = dir.listFiles( new FilenameFilter() { - - @Override - public boolean accept( final File dir, final String name ) { - return ( name.endsWith( suffix ) ); - } - } ); - if ( mapping_files.length == 1 ) { - throw new IOException( "no files ending with \"" + suffix + "\" found in [" + dir + "]" ); - } - String my_prefix = ForesterUtil.removeFileExtension( prefix ); - boolean done = false; - boolean more_than_one = false; - File the_one = null; - do { - int matches = 0; - for( File file : mapping_files ) { - if ( file.getName().startsWith( my_prefix ) ) { - matches++; - if ( matches > 1 ) { - the_one = null; - break; - } - the_one = file; - } - } - if ( matches > 1 ) { - more_than_one = true; - done = true; - } - if ( matches == 1 ) { - done = true; - } - else { - if ( my_prefix.length() <= 1 ) { - throw new IOException( "no file matching \"" + ForesterUtil.removeFileExtension( prefix ) - + "\" and ending with \"" + suffix + "\" found in [" + dir + "]" ); - } - my_prefix = my_prefix.substring( 0, my_prefix.length() - 1 ); - } - } while ( !done ); - if ( more_than_one ) { - throw new IOException( "multiple files matching \"" + ForesterUtil.removeFileExtension( prefix ) - + "\" and ending with \"" + suffix + "\" found in [" + dir + "]" ); - } - else if ( the_one != null ) { - } - else { - throw new IOException( "no file matching \"" + ForesterUtil.removeFileExtension( prefix ) - + "\" and ending with \"" + suffix + "\" found in [" + dir + "]" ); - } + final File the_one = ForesterUtil.getMatchingFile( dir, prefix, suffix ); final BasicTable t = BasicTableParser.parse( the_one, '\t' ); return t.getColumnsAsMap( 0, 1 ); } + + private final static Phylogeny obtainTree( final File dir, final String prefix, final String suffix ) + throws IOException { + final File the_one = ForesterUtil.getMatchingFile( dir, prefix, suffix ); + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + return factory.create( the_one, PhyloXmlParser.createPhyloXmlParserXsdValidating() )[ 0 ]; + } } diff --git a/forester/java/src/org/forester/util/ForesterUtil.java b/forester/java/src/org/forester/util/ForesterUtil.java index 8fb4c68..8f0744e 100644 --- a/forester/java/src/org/forester/util/ForesterUtil.java +++ b/forester/java/src/org/forester/util/ForesterUtil.java @@ -34,6 +34,7 @@ import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileReader; import java.io.FileWriter; +import java.io.FilenameFilter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; @@ -1508,4 +1509,66 @@ public final class ForesterUtil { private ForesterUtil() { } + + public final static File getMatchingFile( final File dir, final String prefix, final String suffix ) + throws IOException { + if ( !dir.exists() ) { + throw new IOException( "[" + dir + "] does not exist" ); + } + if ( !dir.isDirectory() ) { + throw new IOException( "[" + dir + "] is not a directory" ); + } + final File mapping_files[] = dir.listFiles( new FilenameFilter() { + + @Override + public boolean accept( final File dir, final String name ) { + return ( name.endsWith( suffix ) ); + } + } ); + if ( mapping_files.length == 1 ) { + throw new IOException( "no files ending with \"" + suffix + "\" found in [" + dir + "]" ); + } + String my_prefix = removeFileExtension( prefix ); + boolean done = false; + boolean more_than_one = false; + File the_one = null; + do { + int matches = 0; + for( File file : mapping_files ) { + if ( file.getName().startsWith( my_prefix ) ) { + matches++; + if ( matches > 1 ) { + the_one = null; + break; + } + the_one = file; + } + } + if ( matches > 1 ) { + more_than_one = true; + done = true; + } + if ( matches == 1 ) { + done = true; + } + else { + if ( my_prefix.length() <= 1 ) { + throw new IOException( "no file matching \"" + removeFileExtension( prefix ) + + "\" and ending with \"" + suffix + "\" found in [" + dir + "]" ); + } + my_prefix = my_prefix.substring( 0, my_prefix.length() - 1 ); + } + } while ( !done ); + if ( more_than_one ) { + throw new IOException( "multiple files matching \"" + removeFileExtension( prefix ) + + "\" and ending with \"" + suffix + "\" found in [" + dir + "]" ); + } + else if ( the_one != null ) { + } + else { + throw new IOException( "no file matching \"" + removeFileExtension( prefix ) + "\" and ending with \"" + + suffix + "\" found in [" + dir + "]" ); + } + return the_one; + } }