From c0aa43dc8998eb9497855ca17c5b866625107703 Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Wed, 10 May 2017 16:57:31 -0700 Subject: [PATCH] in progress... --- .../src/org/forester/archaeopteryx/TreePanel.java | 10 - .../org/forester/phylogeny/PhylogenyMethods.java | 70 ++-- .../surfacing/MinimalDomainomeCalculator.java | 245 +++++++++++++ .../surfacing/MinimalDomainomeCalculatorOLD.java | 378 ++++++++++++++++++++ forester/java/src/org/forester/test/Test.java | 78 ++++ 5 files changed, 736 insertions(+), 45 deletions(-) create mode 100644 forester/java/src/org/forester/surfacing/MinimalDomainomeCalculatorOLD.java diff --git a/forester/java/src/org/forester/archaeopteryx/TreePanel.java b/forester/java/src/org/forester/archaeopteryx/TreePanel.java index 9bc89cc..75ef09b 100644 --- a/forester/java/src/org/forester/archaeopteryx/TreePanel.java +++ b/forester/java/src/org/forester/archaeopteryx/TreePanel.java @@ -1297,16 +1297,6 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee return _last_drag_point_y; } - final private short getMaxBranchesToLeaf( final PhylogenyNode node ) { - if ( !_nodeid_dist_to_leaf.containsKey( node.getId() ) ) { - final short m = PhylogenyMethods.calculateMaxBranchesToLeaf( node ); - _nodeid_dist_to_leaf.put( node.getId(), m ); - return m; - } - else { - return _nodeid_dist_to_leaf.get( node.getId() ); - } - } final private double getMaxDistanceToRoot() { if ( _max_distance_to_root < 0 ) { diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java index 150177b..9f73532 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java @@ -151,6 +151,30 @@ public class PhylogenyMethods { } /** + * For external nodes the level is 0. + * + * @param node + * @return + */ + public static int calculateLevel( final PhylogenyNode node ) { + if ( node.isExternal() ) { + return 0; + } + int level = 0; + for( PhylogenyNode ext : node.getAllExternalDescendants() ) { + int counter = 0; + while ( ext != node ) { + ext = ext.getParent(); + ++counter; + } + if ( counter > level ) { + level = counter; + } + } + return level; + } + + /** * Calculates the distance between PhylogenyNodes node1 and node2. * * @@ -237,28 +261,7 @@ public class PhylogenyMethods { return node1; } - public static short calculateMaxBranchesToLeaf( final PhylogenyNode node ) { - if ( node.isExternal() ) { - return 0; - } - short max = 0; - for( PhylogenyNode d : node.getAllExternalDescendants() ) { - short steps = 0; - while ( d != node ) { - if ( d.isCollapse() ) { - steps = 0; - } - else { - steps++; - } - d = d.getParent(); - } - if ( max < steps ) { - max = steps; - } - } - return max; - } + public static int calculateMaxDepth( final Phylogeny phy ) { int max = 0; @@ -271,7 +274,7 @@ public class PhylogenyMethods { } return max; } - + public static String[] obtainPresentRanksSorted( final Phylogeny phy ) { final Set present_ranks = new HashSet(); for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) { @@ -284,12 +287,12 @@ public class PhylogenyMethods { } } } - final String ordered_ranks[] = new String[present_ranks.size() + 1]; + final String ordered_ranks[] = new String[ present_ranks.size() + 1 ]; int c = 0; for( final String rank : TaxonomyUtil.RANKS ) { - if ( present_ranks.contains( rank ) ) { - ordered_ranks[ c++ ] = rank; - } + if ( present_ranks.contains( rank ) ) { + ordered_ranks[ c++ ] = rank; + } } ordered_ranks[ c ] = "off"; return ordered_ranks; @@ -1543,7 +1546,8 @@ public class PhylogenyMethods { return nodes_to_delete; } - final static public void transferInternalNamesToConfidenceValues( final Phylogeny phy, final String confidence_type ) { + final static public void transferInternalNamesToConfidenceValues( final Phylogeny phy, + final String confidence_type ) { final PhylogenyNodeIterator it = phy.iteratorPostorder(); while ( it.hasNext() ) { final PhylogenyNode n = it.next(); @@ -2058,7 +2062,7 @@ public class PhylogenyMethods { } public final static void collapseToDepth( final Phylogeny phy, final int depth ) { - if ( phy.getNumberOfExternalNodes() < 3 ) { + if ( phy.getNumberOfExternalNodes() < 3 ) { return; } collapseToDepthHelper( phy.getRoot(), 0, depth ); @@ -2086,8 +2090,6 @@ public class PhylogenyMethods { } } - - public final static void collapseToRank( final Phylogeny phy, final int rank ) { if ( phy.getNumberOfExternalNodes() < 3 ) { return; @@ -2112,7 +2114,6 @@ public class PhylogenyMethods { else { if ( TaxonomyUtil.RANK_TO_INT.get( current_rank ) >= target_rank ) { n.setCollapse( true ); - final PhylogenyNodeIterator it = new PreorderTreeIterator( n ); while ( it.hasNext() ) { it.next().setCollapse( true ); @@ -2127,7 +2128,7 @@ public class PhylogenyMethods { collapseToRankHelper( desc, target_rank ); } } - + public final static PhylogenyNode getFirstExternalNode( final PhylogenyNode node ) { PhylogenyNode n = node; while ( n.isInternal() ) { @@ -2135,7 +2136,7 @@ public class PhylogenyMethods { } return n; } - + public final static PhylogenyNode getLastExternalNode( final PhylogenyNode node ) { PhylogenyNode n = node; while ( n.isInternal() ) { @@ -2153,5 +2154,4 @@ public class PhylogenyMethods { } return false; } - } diff --git a/forester/java/src/org/forester/surfacing/MinimalDomainomeCalculator.java b/forester/java/src/org/forester/surfacing/MinimalDomainomeCalculator.java index ab82419..8c25cd3 100644 --- a/forester/java/src/org/forester/surfacing/MinimalDomainomeCalculator.java +++ b/forester/java/src/org/forester/surfacing/MinimalDomainomeCalculator.java @@ -19,6 +19,7 @@ import java.util.TreeSet; import org.forester.application.surfacing; import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.protein.Domain; @@ -236,6 +237,250 @@ public final class MinimalDomainomeCalculator { } } + public final static void calcNEW( final boolean use_domain_architectures, + final Phylogeny tre, + final int level, + final SortedMap> protein_lists_per_species, + final String separator, + final double ie_cutoff, + final String outfile_base, + final boolean write_protein_files ) + throws IOException { + final SortedMap> species_to_features_map = new TreeMap>(); + if ( protein_lists_per_species == null || tre == null ) { + throw new IllegalArgumentException( "argument is null" ); + } + if ( protein_lists_per_species.size() < 2 ) { + throw new IllegalArgumentException( "not enough genomes" ); + } + final String x; + if ( use_domain_architectures ) { + x = "DA"; + } + else { + x = "domain"; + } + final File outfile = new File( outfile_base + "_minimal_" + x + "ome.tsv" ); + final File outfile_table = new File( outfile_base + "_minimal_" + x + "ome_matrix.tsv" ); + SurfacingUtil.checkForOutputFileWriteability( outfile ); + SurfacingUtil.checkForOutputFileWriteability( outfile_table ); + final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) ); + final BufferedWriter out_table = new BufferedWriter( new FileWriter( outfile_table ) ); + out.write( "SPECIES\tCOMMON NAME\tCODE\tRANK\t#EXT NODES\tEXT NODE CODES\t#" + x + "\t" + x + "" ); + out.write( ForesterUtil.LINE_SEPARATOR ); + /////////// + ////////// + SortedMap> protein_lists_per_quasi_species = null; + if ( level >= 1 ) { + protein_lists_per_quasi_species = makeProteinListsPerQuasiSpecies( tre, level, protein_lists_per_species ); + } + ///////// + /////////// + for( final PhylogenyNodeIterator iter = tre.iteratorPostorder(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + final String species_name = node.getNodeData().isHasTaxonomy() + ? node.getNodeData().getTaxonomy().getScientificName() : node.getName(); + final String common = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy().getCommonName() + : ""; + final String tcode = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy().getTaxonomyCode() + : ""; + final String rank = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy().getRank() : ""; + out.write( species_name ); + if ( !ForesterUtil.isEmpty( common ) ) { + out.write( "\t" + common ); + } + else { + out.write( "\t" ); + } + if ( !ForesterUtil.isEmpty( tcode ) ) { + out.write( "\t" + tcode ); + } + else { + out.write( "\t" ); + } + if ( !ForesterUtil.isEmpty( rank ) ) { + out.write( "\t" + rank ); + } + else { + out.write( "\t" ); + } + final List external_descs = node.getAllExternalDescendants(); + if ( node.isInternal() ) { + out.write( "\t" + external_descs.size() + "\t" ); + } + else { + out.write( "\t\t" ); + } + final List> features_per_genome_list = new ArrayList>(); + boolean first = true; + for( final PhylogenyNode external_desc : external_descs ) { + final String code = external_desc.getNodeData().getTaxonomy().getTaxonomyCode(); + if ( node.isInternal() ) { + if ( first ) { + first = false; + } + else { + out.write( ", " ); + } + out.write( code ); + } + final List proteins_per_species = protein_lists_per_species.get( new BasicSpecies( code ) ); + final int node_level = PhylogenyMethods.calculateLevel( node ); + final List proteins_per_quasi_species = protein_lists_per_species + .get( new BasicSpecies( code ) ); + if ( proteins_per_species != null ) { + final SortedSet features_per_genome = new TreeSet(); + for( final Protein protein : proteins_per_species ) { + if ( use_domain_architectures ) { + final String da = protein.toDomainArchitectureString( separator, ie_cutoff ); + features_per_genome.add( da ); + } + else { + List domains = protein.getProteinDomains(); + for( final Domain domain : domains ) { + if ( ( ie_cutoff <= -1 ) || ( domain.getPerDomainEvalue() <= ie_cutoff ) ) { + features_per_genome.add( domain.getDomainId() ); + } + } + } + } + if ( features_per_genome.size() > 0 ) { + features_per_genome_list.add( features_per_genome ); + } + } + } // for( final PhylogenyNode external_desc : external_descs ) + if ( features_per_genome_list.size() > 0 ) { + SortedSet intersection = calcIntersection( features_per_genome_list ); + out.write( "\t" + intersection.size() + "\t" ); + first = true; + for( final String s : intersection ) { + if ( first ) { + first = false; + } + else { + out.write( ", " ); + } + out.write( s ); + } + out.write( ForesterUtil.LINE_SEPARATOR ); + species_to_features_map.put( species_name, intersection ); + } + } + final SortedSet all_species_names = new TreeSet(); + final SortedSet all_features = new TreeSet(); + for( final Entry> e : species_to_features_map.entrySet() ) { + all_species_names.add( e.getKey() ); + for( final String f : e.getValue() ) { + all_features.add( f ); + } + } + out_table.write( '\t' ); + boolean first = true; + for( final String species_name : all_species_names ) { + if ( first ) { + first = false; + } + else { + out_table.write( '\t' ); + } + out_table.write( species_name ); + } + out_table.write( ForesterUtil.LINE_SEPARATOR ); + for( final String das : all_features ) { + out_table.write( das ); + out_table.write( '\t' ); + first = true; + for( final String species_name : all_species_names ) { + if ( first ) { + first = false; + } + else { + out_table.write( '\t' ); + } + if ( species_to_features_map.get( species_name ).contains( das ) ) { + out_table.write( '1' ); + } + else { + out_table.write( '0' ); + } + } + out_table.write( ForesterUtil.LINE_SEPARATOR ); + } + out.flush(); + out.close(); + out_table.flush(); + out_table.close(); + ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote minimal DAome data to : " + outfile ); + ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote minimal DAome data to (as table): " + outfile_table ); + if ( write_protein_files ) { + final String protdirname; + final String a; + final String b; + if ( use_domain_architectures ) { + a = "_DA"; + b = "domain architectures (DAs)"; + protdirname = "_DAS"; + } + else { + a = "_domain"; + b = "domains"; + protdirname = "_DOMAINS"; + } + final File prot_dir = new File( outfile_base + protdirname ); + final boolean success = prot_dir.mkdir(); + if ( !success ) { + throw new IOException( "failed to create dir " + prot_dir ); + } + int total = 0; + final String dir = outfile_base + protdirname + "/"; + for( final String feat : all_features ) { + final File extract_outfile = new File( dir + feat + a + surfacing.SEQ_EXTRACT_SUFFIX ); + SurfacingUtil.checkForOutputFileWriteability( extract_outfile ); + final Writer proteins_file_writer = new BufferedWriter( new FileWriter( extract_outfile ) ); + final int counter = extractProteinFeatures( use_domain_architectures, + protein_lists_per_species, + feat, + proteins_file_writer, + ie_cutoff, + separator ); + if ( counter < 1 ) { + ForesterUtil.printWarningMessage( "surfacing", feat + " not present (in " + b + " extraction)" ); + } + total += counter; + proteins_file_writer.close(); + } + ForesterUtil.programMessage( "surfacing", + "Wrote " + total + " individual " + b + " from a total of " + + all_features.size() + " into: " + dir ); + } + } + + private final static SortedMap> makeProteinListsPerQuasiSpecies( final Phylogeny tre, + final int level, + final SortedMap> protein_lists_per_species ) { + final SortedMap> protein_lists_per_quasi_species = new TreeMap>(); + for( final PhylogenyNodeIterator iter = tre.iteratorPostorder(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + final int node_level = PhylogenyMethods.calculateLevel( node ); + if ( node_level == level ) { + final List external_descs = node.getAllExternalDescendants(); + final List protein_list_per_quasi_species = new ArrayList(); + for( final PhylogenyNode external_desc : external_descs ) { + final String code = external_desc.getNodeData().getTaxonomy().getTaxonomyCode(); + final List proteins_per_species = protein_lists_per_species + .get( new BasicSpecies( code ) ); + for( Protein protein : proteins_per_species ) { + protein_list_per_quasi_species.add( protein ); + } + } + final String species_name = node.getNodeData().isHasTaxonomy() + ? node.getNodeData().getTaxonomy().getScientificName() : node.getName(); + protein_lists_per_quasi_species.put( species_name, protein_list_per_quasi_species ); + } + } + return protein_lists_per_quasi_species; + } + private final static SortedSet calcIntersection( final List> features_per_genome_list ) { final Set first = features_per_genome_list.get( 0 ); final SortedSet my_first = new TreeSet(); diff --git a/forester/java/src/org/forester/surfacing/MinimalDomainomeCalculatorOLD.java b/forester/java/src/org/forester/surfacing/MinimalDomainomeCalculatorOLD.java new file mode 100644 index 0000000..d306c0f --- /dev/null +++ b/forester/java/src/org/forester/surfacing/MinimalDomainomeCalculatorOLD.java @@ -0,0 +1,378 @@ + +package org.forester.surfacing; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.Writer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Map.Entry; +import java.util.Set; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; + +import org.forester.application.surfacing; +import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyNode; +import org.forester.phylogeny.iterators.PhylogenyNodeIterator; +import org.forester.protein.Domain; +import org.forester.protein.Protein; +import org.forester.species.BasicSpecies; +import org.forester.species.Species; +import org.forester.surfacing.SurfacingUtil.DomainComparator; +import org.forester.util.ForesterUtil; + +public final class MinimalDomainomeCalculatorOLD { + + public final static void calc( final boolean use_domain_architectures, + final Phylogeny tre, + final SortedMap> protein_lists_per_species, + final String separator, + final double ie_cutoff, + final String outfile_base, + final boolean write_protein_files ) + throws IOException { + final SortedMap> species_to_features_map = new TreeMap>(); + if ( protein_lists_per_species == null || tre == null ) { + throw new IllegalArgumentException( "argument is null" ); + } + if ( protein_lists_per_species.size() < 2 ) { + throw new IllegalArgumentException( "not enough genomes" ); + } + final String x; + if ( use_domain_architectures ) { + x = "DA"; + } + else { + x = "domain"; + } + final File outfile = new File( outfile_base + "_minimal_" + x + "ome.tsv" ); + final File outfile_table = new File( outfile_base + "_minimal_" + x + "ome_matrix.tsv" ); + SurfacingUtil.checkForOutputFileWriteability( outfile ); + SurfacingUtil.checkForOutputFileWriteability( outfile_table ); + final BufferedWriter out = new BufferedWriter( new FileWriter( outfile ) ); + final BufferedWriter out_table = new BufferedWriter( new FileWriter( outfile_table ) ); + out.write( "SPECIES\tCOMMON NAME\tCODE\tRANK\t#EXT NODES\tEXT NODE CODES\t#" + x + "\t" + x + "" ); + out.write( ForesterUtil.LINE_SEPARATOR ); + for( final PhylogenyNodeIterator iter = tre.iteratorPostorder(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + final String species_name = node.getNodeData().isHasTaxonomy() + ? node.getNodeData().getTaxonomy().getScientificName() : node.getName(); + final String common = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy().getCommonName() + : ""; + final String tcode = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy().getTaxonomyCode() + : ""; + final String rank = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy().getRank() : ""; + out.write( species_name ); + if ( !ForesterUtil.isEmpty( common ) ) { + out.write( "\t" + common ); + } + else { + out.write( "\t" ); + } + if ( !ForesterUtil.isEmpty( tcode ) ) { + out.write( "\t" + tcode ); + } + else { + out.write( "\t" ); + } + if ( !ForesterUtil.isEmpty( rank ) ) { + out.write( "\t" + rank ); + } + else { + out.write( "\t" ); + } + final List external_descs = node.getAllExternalDescendants(); + if ( node.isInternal() ) { + out.write( "\t" + external_descs.size() + "\t" ); + } + else { + out.write( "\t\t" ); + } + final List> features_per_genome_list = new ArrayList>(); + boolean first = true; + for( final PhylogenyNode external_desc : external_descs ) { + final String code = external_desc.getNodeData().getTaxonomy().getTaxonomyCode(); + if ( node.isInternal() ) { + if ( first ) { + first = false; + } + else { + out.write( ", " ); + } + out.write( code ); + } + final List proteins_per_species = protein_lists_per_species.get( new BasicSpecies( code ) ); + if ( proteins_per_species != null ) { + final SortedSet features_per_genome = new TreeSet(); + for( final Protein protein : proteins_per_species ) { + if ( use_domain_architectures ) { + final String da = protein.toDomainArchitectureString( separator, ie_cutoff ); + features_per_genome.add( da ); + } + else { + List domains = protein.getProteinDomains(); + for( final Domain domain : domains ) { + if ( ( ie_cutoff <= -1 ) || ( domain.getPerDomainEvalue() <= ie_cutoff ) ) { + features_per_genome.add( domain.getDomainId() ); + } + } + } + } + if ( features_per_genome.size() > 0 ) { + features_per_genome_list.add( features_per_genome ); + } + } + } + if ( features_per_genome_list.size() > 0 ) { + SortedSet intersection = calcIntersection( features_per_genome_list ); + out.write( "\t" + intersection.size() + "\t" ); + first = true; + for( final String s : intersection ) { + if ( first ) { + first = false; + } + else { + out.write( ", " ); + } + out.write( s ); + } + out.write( ForesterUtil.LINE_SEPARATOR ); + species_to_features_map.put( species_name, intersection ); + } + } + final SortedSet all_species_names = new TreeSet(); + final SortedSet all_features = new TreeSet(); + for( final Entry> e : species_to_features_map.entrySet() ) { + all_species_names.add( e.getKey() ); + for( final String f : e.getValue() ) { + all_features.add( f ); + } + } + out_table.write( '\t' ); + boolean first = true; + for( final String species_name : all_species_names ) { + if ( first ) { + first = false; + } + else { + out_table.write( '\t' ); + } + out_table.write( species_name ); + } + out_table.write( ForesterUtil.LINE_SEPARATOR ); + for( final String das : all_features ) { + out_table.write( das ); + out_table.write( '\t' ); + first = true; + for( final String species_name : all_species_names ) { + if ( first ) { + first = false; + } + else { + out_table.write( '\t' ); + } + if ( species_to_features_map.get( species_name ).contains( das ) ) { + out_table.write( '1' ); + } + else { + out_table.write( '0' ); + } + } + out_table.write( ForesterUtil.LINE_SEPARATOR ); + } + out.flush(); + out.close(); + out_table.flush(); + out_table.close(); + ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote minimal DAome data to : " + outfile ); + ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote minimal DAome data to (as table): " + outfile_table ); + if ( write_protein_files ) { + final String protdirname; + final String a; + final String b; + if ( use_domain_architectures ) { + a = "_DA"; + b = "domain architectures (DAs)"; + protdirname = "_DAS"; + } + else { + a = "_domain"; + b = "domains"; + protdirname = "_DOMAINS"; + } + final File prot_dir = new File( outfile_base + protdirname ); + final boolean success = prot_dir.mkdir(); + if ( !success ) { + throw new IOException( "failed to create dir " + prot_dir ); + } + int total = 0; + final String dir = outfile_base + protdirname + "/"; + for( final String feat : all_features ) { + final File extract_outfile = new File( dir + feat + a + surfacing.SEQ_EXTRACT_SUFFIX ); + SurfacingUtil.checkForOutputFileWriteability( extract_outfile ); + final Writer proteins_file_writer = new BufferedWriter( new FileWriter( extract_outfile ) ); + final int counter = extractProteinFeatures( use_domain_architectures, + protein_lists_per_species, + feat, + proteins_file_writer, + ie_cutoff, + separator ); + if ( counter < 1 ) { + ForesterUtil.printWarningMessage( "surfacing", feat + " not present (in " + b + " extraction)" ); + } + total += counter; + proteins_file_writer.close(); + } + ForesterUtil.programMessage( "surfacing", + "Wrote " + total + " individual " + b + " from a total of " + + all_features.size() + " into: " + dir ); + } + } + + private final static SortedSet calcIntersection( final List> features_per_genome_list ) { + final Set first = features_per_genome_list.get( 0 ); + final SortedSet my_first = new TreeSet(); + for( final String s : first ) { + my_first.add( s ); + } + for( int i = 1; i < features_per_genome_list.size(); ++i ) { + my_first.retainAll( features_per_genome_list.get( i ) ); + } + return my_first; + } + + private final static int extractProteinFeatures( final boolean use_domain_architectures, + final SortedMap> protein_lists_per_species, + final String domain_id, + final Writer out, + final double ie_cutoff, + final String domain_separator ) + throws IOException { + int counter = 0; + final String separator_for_output = "\t"; + for( final Species species : protein_lists_per_species.keySet() ) { + final List proteins_per_species = protein_lists_per_species.get( species ); + for( final Protein protein : proteins_per_species ) { + if ( use_domain_architectures ) { + if ( domain_id.equals( protein.toDomainArchitectureString( domain_separator, ie_cutoff ) ) ) { + int from = Integer.MAX_VALUE; + int to = -1; + for( final Domain d : protein.getProteinDomains() ) { + if ( ( ie_cutoff <= -1 ) || ( d.getPerDomainEvalue() <= ie_cutoff ) ) { + if ( d.getFrom() < from ) { + from = d.getFrom(); + } + if ( d.getTo() > to ) { + to = d.getTo(); + } + } + } + out.write( protein.getSpecies().getSpeciesId() ); + out.write( separator_for_output ); + out.write( protein.getProteinId().getId() ); + out.write( separator_for_output ); + out.write( domain_id ); + out.write( separator_for_output ); + out.write( "/" ); + out.write( from + "-" + to ); + out.write( "/" ); + out.write( SurfacingConstants.NL ); + ++counter; + } + } + else { + final List domains = protein.getProteinDomains( domain_id ); + if ( domains.size() > 0 ) { + out.write( protein.getSpecies().getSpeciesId() ); + out.write( separator_for_output ); + out.write( protein.getProteinId().getId() ); + out.write( separator_for_output ); + out.write( domain_id ); + out.write( separator_for_output ); + for( final Domain domain : domains ) { + if ( ( ie_cutoff < 0 ) || ( domain.getPerDomainEvalue() <= ie_cutoff ) ) { + out.write( "/" ); + out.write( domain.getFrom() + "-" + domain.getTo() ); + } + } + out.write( "/" ); + out.write( separator_for_output ); + final List domain_list = new ArrayList(); + for( final Domain domain : protein.getProteinDomains() ) { + if ( ( ie_cutoff < 0 ) || ( domain.getPerDomainEvalue() <= ie_cutoff ) ) { + domain_list.add( domain ); + } + } + final Domain domain_ary[] = new Domain[ domain_list.size() ]; + for( int i = 0; i < domain_list.size(); ++i ) { + domain_ary[ i ] = domain_list.get( i ); + } + Arrays.sort( domain_ary, new DomainComparator( true ) ); + out.write( "{" ); + boolean first = true; + for( final Domain domain : domain_ary ) { + if ( first ) { + first = false; + } + else { + out.write( "," ); + } + out.write( domain.getDomainId().toString() ); + out.write( ":" + domain.getFrom() + "-" + domain.getTo() ); + out.write( ":" + domain.getPerDomainEvalue() ); + } + out.write( "}" ); + if ( !( ForesterUtil.isEmpty( protein.getDescription() ) + || protein.getDescription().equals( SurfacingConstants.NONE ) ) ) { + out.write( protein.getDescription() ); + } + out.write( separator_for_output ); + if ( !( ForesterUtil.isEmpty( protein.getAccession() ) + || protein.getAccession().equals( SurfacingConstants.NONE ) ) ) { + out.write( protein.getAccession() ); + } + out.write( SurfacingConstants.NL ); + ++counter; + } + } + } + } + out.flush(); + return counter; + } + + public static void main( final String[] args ) { + Set a = new HashSet(); + Set b = new HashSet(); + Set c = new HashSet(); + Set d = new HashSet(); + a.add( "x" ); + a.add( "b" ); + a.add( "c" ); + b.add( "a" ); + b.add( "b" ); + b.add( "c" ); + c.add( "a" ); + c.add( "b" ); + c.add( "c" ); + c.add( "c" ); + c.add( "f" ); + d.add( "a" ); + d.add( "c" ); + d.add( "d" ); + List> domains_per_genome_list = new ArrayList>(); + domains_per_genome_list.add( a ); + domains_per_genome_list.add( b ); + domains_per_genome_list.add( c ); + domains_per_genome_list.add( d ); + Set x = calcIntersection( domains_per_genome_list ); + System.out.println( x ); + } +} diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index de674db..949d9dd 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -551,6 +551,15 @@ public final class Test { System.out.println( "failed." ); failed++; } + System.out.print( "Phylogeny methods:" ); + if ( Test.testPhylogenyMethods() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } System.out.print( "Postorder Iterator: " ); if ( Test.testPostOrderIterator() ) { System.out.println( "OK." ); @@ -13444,6 +13453,75 @@ public final class Test { } return true; } + + private static boolean testPhylogenyMethods() { + try { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny t0 = factory.create( "((((A,B)ab,C)abc,D)abcd,E)r", new NHXParser() )[ 0 ]; + + if ( PhylogenyMethods.calculateLevel( t0.getNode( "A" ) ) != 0 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t0.getNode( "B" ) ) != 0 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t0.getNode( "ab" ) ) != 1 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t0.getNode( "C" ) ) != 0 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t0.getNode( "abc" ) ) != 2 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t0.getNode( "D" ) ) != 0 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t0.getNode( "abcd" ) ) != 3 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t0.getNode( "E" ) ) != 0 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t0.getNode( "r" ) ) != 4 ) { + return false; + } + final Phylogeny t1 = factory.create( "((((A,B)ab,C)abc,D)abcd,E,((((((X)1)2)3)4)5)6)r", new NHXParser() )[ 0 ]; + if ( PhylogenyMethods.calculateLevel( t1.getNode( "r" ) ) != 7 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t1.getNode( "X" ) ) != 0 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t1.getNode( "6" ) ) != 6 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t1.getNode( "5" ) ) != 5 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t1.getNode( "4" ) ) != 4 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t1.getNode( "3" ) ) != 3 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t1.getNode( "2" ) ) != 2 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t1.getNode( "1" ) ) != 1 ) { + return false; + } + if ( PhylogenyMethods.calculateLevel( t1.getNode( "abcd" ) ) != 3 ) { + return false; + } + + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } private static boolean testUniprotEntryRetrieval() { try { -- 1.7.10.2