From 94fc8a7809432b86c41a2ae5f3ab4977cd5d5599 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Sat, 19 Nov 2011 03:38:18 +0000 Subject: [PATCH] added infraorder --- .../src/org/forester/application/surfacing.java | 19 +- .../io/parsers/HmmscanPerDomainTableParser.java | 49 ++++- .../org/forester/io/parsers/util/ParserUtils.java | 191 ++++++++++---------- .../src/org/forester/surfacing/BasicProtein.java | 2 + .../src/org/forester/surfacing/SurfacingUtil.java | 8 +- 5 files changed, 167 insertions(+), 102 deletions(-) diff --git a/forester/java/src/org/forester/application/surfacing.java b/forester/java/src/org/forester/application/surfacing.java index 12274f7..e8ae4bf 100644 --- a/forester/java/src/org/forester/application/surfacing.java +++ b/forester/java/src/org/forester/application/surfacing.java @@ -1778,12 +1778,14 @@ public class surfacing { input_file_properties[ i ][ 1 ], filter, filter_type, - ind_score_cutoff ); + ind_score_cutoff, + true ); } else { parser = new HmmscanPerDomainTableParser( new File( input_file_properties[ i ][ 0 ] ), input_file_properties[ i ][ 1 ], - ind_score_cutoff ); + ind_score_cutoff, + true ); } if ( e_value_max >= 0.0 ) { parser.setEValueMaximum( e_value_max ); @@ -2388,12 +2390,21 @@ public class surfacing { final PhylogenyNode n = it.next(); if ( ForesterUtil.isEmpty( n.getName() ) ) { if ( n.getNodeData().isHasTaxonomy() + && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) { + n.setName( n.getNodeData().getTaxonomy().getTaxonomyCode() ); + } + else if ( n.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { n.setName( n.getNodeData().getTaxonomy().getScientificName() ); } + else if ( n.getNodeData().isHasTaxonomy() + && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getCommonName() ) ) { + n.setName( n.getNodeData().getTaxonomy().getCommonName() ); + } else { - ForesterUtil.fatalError( surfacing.PRG_NAME, - "node without both name and scientific taxonomy name found" ); + ForesterUtil + .fatalError( surfacing.PRG_NAME, + "node with no name, scientific name, common name, or taxonomy code present" ); } } } diff --git a/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java b/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java index 186ecc8..7d31a11 100644 --- a/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java +++ b/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java @@ -91,6 +91,7 @@ public final class HmmscanPerDomainTableParser { private int _domains_ignored_due_to_virus_like_id; private Map _domains_ignored_due_to_virus_like_id_counts_map; private final INDIVIDUAL_SCORE_CUTOFF _ind_cutoff; + private final boolean _allow_proteins_with_same_name; public HmmscanPerDomainTableParser( final File input_file, final String species, @@ -100,6 +101,20 @@ public final class HmmscanPerDomainTableParser { _filter = null; _filter_type = FilterType.NONE; _ind_cutoff = individual_cutoff_applies_to; + _allow_proteins_with_same_name = false; + init(); + } + + public HmmscanPerDomainTableParser( final File input_file, + final String species, + final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to, + final boolean allow_proteins_with_same_name ) { + _input_file = input_file; + _species = species; + _filter = null; + _filter_type = FilterType.NONE; + _ind_cutoff = individual_cutoff_applies_to; + _allow_proteins_with_same_name = allow_proteins_with_same_name; init(); } @@ -113,9 +128,29 @@ public final class HmmscanPerDomainTableParser { _filter = filter; _filter_type = filter_type; _ind_cutoff = individual_cutoff_applies_to; + _allow_proteins_with_same_name = false; init(); } + public HmmscanPerDomainTableParser( final File input_file, + final String species, + final Set filter, + final FilterType filter_type, + final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to, + final boolean allow_proteins_with_same_name ) { + _input_file = input_file; + _species = species; + _filter = filter; + _filter_type = filter_type; + _ind_cutoff = individual_cutoff_applies_to; + _allow_proteins_with_same_name = allow_proteins_with_same_name; + init(); + } + + public boolean isAllowProteinsWithSameName() { + return _allow_proteins_with_same_name; + } + private void actuallyAddProtein( final List proteins, final Protein current_protein ) { final List l = current_protein.getProteinDomains(); for( final Domain d : l ) { @@ -356,12 +391,14 @@ public final class HmmscanPerDomainTableParser { final int env_to = parseInt( tokens[ 20 ], line_number, "env to" ); ++_domains_encountered; if ( !query.equals( prev_query ) || ( qlen != prev_qlen ) ) { - if ( query.equals( prev_query ) ) { - throw new IOException( "more than one protein named [" + query + "]" + " lengths: " + qlen + ", " - + prev_qlen ); - } - if ( prev_queries.contains( query ) ) { - throw new IOException( "more than one protein named [" + query + "]" ); + if ( !isAllowProteinsWithSameName() ) { + if ( query.equals( prev_query ) ) { + throw new IOException( "more than one protein named [" + query + "]" + " lengths: " + qlen + + ", " + prev_qlen ); + } + if ( prev_queries.contains( query ) ) { + throw new IOException( "more than one protein named [" + query + "]" ); + } } prev_query = query; prev_qlen = qlen; diff --git a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java index b8dd42e..b6191ee 100644 --- a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java +++ b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java @@ -42,48 +42,51 @@ import org.forester.io.parsers.nexus.NexusPhylogeniesParser; import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.io.parsers.tol.TolParser; +import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.util.ForesterConstants; import org.forester.util.ForesterUtil; public final class ParserUtils { - final public static PhylogenyParser createParserDependingOnUrlContents( final URL url, - final boolean phyloxml_validate_against_xsd ) + final public static PhylogenyParser createParserDependingFileContents( final File file, + final boolean phyloxml_validate_against_xsd ) throws FileNotFoundException, IOException { - final String lc_filename = url.getFile().toString().toLowerCase(); - PhylogenyParser parser = createParserDependingOnSuffix( lc_filename, phyloxml_validate_against_xsd ); - if ( ( parser != null ) && lc_filename.endsWith( ".zip" ) ) { - if ( parser instanceof PhyloXmlParser ) { - ( ( PhyloXmlParser ) parser ).setZippedInputstream( true ); - } - else if ( parser instanceof TolParser ) { - ( ( TolParser ) parser ).setZippedInputstream( true ); - } - } - if ( parser == null ) { - final String first_line = ForesterUtil.getFirstLine( url ).trim().toLowerCase(); - if ( first_line.startsWith( "<" ) ) { - parser = new PhyloXmlParser(); - if ( phyloxml_validate_against_xsd ) { - final ClassLoader cl = PhyloXmlParser.class.getClassLoader(); - final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE ); - if ( xsd_url != null ) { - ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() ); - } - else { + PhylogenyParser parser = null; + final String first_line = ForesterUtil.getFirstLine( file ).trim().toLowerCase(); + if ( first_line.startsWith( "<" ) ) { + parser = new PhyloXmlParser(); + if ( phyloxml_validate_against_xsd ) { + final ClassLoader cl = PhyloXmlParser.class.getClassLoader(); + final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE ); + if ( xsd_url != null ) { + ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() ); + } + else { + if ( ForesterConstants.RELEASE ) { throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from [" + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" ); } } } - else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) ) - || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) { - parser = new NexusPhylogeniesParser(); - } - else { - parser = new NHXParser(); - } + } + else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) ) + || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) { + parser = new NexusPhylogeniesParser(); + } + else { + parser = new NHXParser(); + } + return parser; + } + + final public static PhylogenyParser createParserDependingOnFileType( final File file, + final boolean phyloxml_validate_against_xsd ) + throws FileNotFoundException, IOException { + PhylogenyParser parser = null; + parser = ParserUtils.createParserDependingOnSuffix( file.getName(), phyloxml_validate_against_xsd ); + if ( parser == null ) { + parser = createParserDependingFileContents( file, phyloxml_validate_against_xsd ); } return parser; } @@ -127,46 +130,78 @@ public final class ParserUtils { return parser; } - final public static PhylogenyParser createParserDependingOnFileType( final File file, - final boolean phyloxml_validate_against_xsd ) + final public static PhylogenyParser createParserDependingOnUrlContents( final URL url, + final boolean phyloxml_validate_against_xsd ) throws FileNotFoundException, IOException { - PhylogenyParser parser = null; - parser = ParserUtils.createParserDependingOnSuffix( file.getName(), phyloxml_validate_against_xsd ); - if ( parser == null ) { - parser = createParserDependingFileContents( file, phyloxml_validate_against_xsd ); + final String lc_filename = url.getFile().toString().toLowerCase(); + PhylogenyParser parser = createParserDependingOnSuffix( lc_filename, phyloxml_validate_against_xsd ); + if ( ( parser != null ) && lc_filename.endsWith( ".zip" ) ) { + if ( parser instanceof PhyloXmlParser ) { + ( ( PhyloXmlParser ) parser ).setZippedInputstream( true ); + } + else if ( parser instanceof TolParser ) { + ( ( TolParser ) parser ).setZippedInputstream( true ); + } } - return parser; - } - - final public static PhylogenyParser createParserDependingFileContents( final File file, - final boolean phyloxml_validate_against_xsd ) - throws FileNotFoundException, IOException { - PhylogenyParser parser = null; - final String first_line = ForesterUtil.getFirstLine( file ).trim().toLowerCase(); - if ( first_line.startsWith( "<" ) ) { - parser = new PhyloXmlParser(); - if ( phyloxml_validate_against_xsd ) { - final ClassLoader cl = PhyloXmlParser.class.getClassLoader(); - final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE ); - if ( xsd_url != null ) { - ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() ); - } - else { - if ( ForesterConstants.RELEASE ) { + if ( parser == null ) { + final String first_line = ForesterUtil.getFirstLine( url ).trim().toLowerCase(); + if ( first_line.startsWith( "<" ) ) { + parser = new PhyloXmlParser(); + if ( phyloxml_validate_against_xsd ) { + final ClassLoader cl = PhyloXmlParser.class.getClassLoader(); + final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE ); + if ( xsd_url != null ) { + ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() ); + } + else { throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from [" + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" ); } } } + else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) ) + || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) { + parser = new NexusPhylogeniesParser(); + } + else { + parser = new NHXParser(); + } } - else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) ) - || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) { - parser = new NexusPhylogeniesParser(); + return parser; + } + + public static BufferedReader createReader( final Object source ) throws IOException, FileNotFoundException { + BufferedReader reader = null; + if ( ( source instanceof File ) || ( source instanceof String ) ) { + File f = null; + if ( source instanceof File ) { + f = ( File ) source; + } + else { + f = new File( ( String ) source ); + } + if ( !f.exists() ) { + throw new IOException( "[" + f.getAbsolutePath() + "] does not exist" ); + } + else if ( !f.isFile() ) { + throw new IOException( "[" + f.getAbsolutePath() + "] is not a file" ); + } + else if ( !f.canRead() ) { + throw new IOException( "[" + f.getAbsolutePath() + "] is not a readable" ); + } + reader = new BufferedReader( new FileReader( f ) ); + } + else if ( source instanceof InputStream ) { + reader = new BufferedReader( new InputStreamReader( ( InputStream ) source ) ); + } + else if ( ( source instanceof StringBuffer ) || ( source instanceof StringBuilder ) ) { + reader = new BufferedReader( new StringReader( source.toString() ) ); } else { - parser = new NHXParser(); + throw new IllegalArgumentException( "attempt to parse object of type [" + source.getClass() + + "] (can only parse objects of type File/String, InputStream, StringBuffer, or StringBuilder)" ); } - return parser; + return reader; } /** @@ -221,37 +256,11 @@ public final class ParserUtils { return null; } - public static BufferedReader createReader( final Object source ) throws IOException, FileNotFoundException { - BufferedReader reader = null; - if ( ( source instanceof File ) || ( source instanceof String ) ) { - File f = null; - if ( source instanceof File ) { - f = ( File ) source; - } - else { - f = new File( ( String ) source ); - } - if ( !f.exists() ) { - throw new IOException( "[" + f.getAbsolutePath() + "] does not exist" ); - } - else if ( !f.isFile() ) { - throw new IOException( "[" + f.getAbsolutePath() + "] is not a file" ); - } - else if ( !f.canRead() ) { - throw new IOException( "[" + f.getAbsolutePath() + "] is not a readable" ); - } - reader = new BufferedReader( new FileReader( f ) ); - } - else if ( source instanceof InputStream ) { - reader = new BufferedReader( new InputStreamReader( ( InputStream ) source ) ); - } - else if ( ( source instanceof StringBuffer ) || ( source instanceof StringBuilder ) ) { - reader = new BufferedReader( new StringReader( source.toString() ) ); - } - else { - throw new IllegalArgumentException( "attempt to parse object of type [" + source.getClass() - + "] (can only parse objects of type File/String, InputStream, StringBuffer, or StringBuilder)" ); - } - return reader; + public final static Phylogeny[] readPhylogenies( final File file ) throws FileNotFoundException, IOException { + return PhylogenyMethods.readPhylogenies( ParserUtils.createParserDependingOnFileType( file, true ), file ); + } + + public final static Phylogeny[] readPhylogenies( final String file_name ) throws FileNotFoundException, IOException { + return readPhylogenies( new File( file_name ) ); } } diff --git a/forester/java/src/org/forester/surfacing/BasicProtein.java b/forester/java/src/org/forester/surfacing/BasicProtein.java index 93949d1..e7c4171 100644 --- a/forester/java/src/org/forester/surfacing/BasicProtein.java +++ b/forester/java/src/org/forester/surfacing/BasicProtein.java @@ -31,6 +31,8 @@ import java.util.List; import java.util.SortedSet; import java.util.TreeSet; +// Note: when implementing any "equals" method need to keep in mind that +// proteins could have the same name and/or id! public class BasicProtein implements Protein { private final ProteinId _id; diff --git a/forester/java/src/org/forester/surfacing/SurfacingUtil.java b/forester/java/src/org/forester/surfacing/SurfacingUtil.java index eef4ff9..409a5d1 100644 --- a/forester/java/src/org/forester/surfacing/SurfacingUtil.java +++ b/forester/java/src/org/forester/surfacing/SurfacingUtil.java @@ -893,11 +893,17 @@ public final class SurfacingUtil { final PhylogenyNode n = it.next(); if ( ForesterUtil.isEmpty( n.getName() ) && ( !n.getNodeData().isHasTaxonomy() || ForesterUtil.isEmpty( n.getNodeData().getTaxonomy() - .getScientificName() ) ) ) { + .getScientificName() ) ) + && ( !n.getNodeData().isHasTaxonomy() || ForesterUtil.isEmpty( n.getNodeData().getTaxonomy() + .getCommonName() ) ) ) { if ( n.getParent() != null ) { names.append( " " ); names.append( n.getParent().getName() ); } + final List l = n.getAllExternalDescendants(); + for( final Object object : l ) { + System.out.println( l.toString() ); + } ++c; } } -- 1.7.10.2