added infraorder
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Sat, 19 Nov 2011 03:38:18 +0000 (03:38 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Sat, 19 Nov 2011 03:38:18 +0000 (03:38 +0000)
forester/java/src/org/forester/application/surfacing.java
forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java
forester/java/src/org/forester/io/parsers/util/ParserUtils.java
forester/java/src/org/forester/surfacing/BasicProtein.java
forester/java/src/org/forester/surfacing/SurfacingUtil.java

index 12274f7..e8ae4bf 100644 (file)
@@ -1778,12 +1778,14 @@ public class surfacing {
                                                           input_file_properties[ i ][ 1 ],
                                                           filter,
                                                           filter_type,
-                                                          ind_score_cutoff );
+                                                          ind_score_cutoff,
+                                                          true );
             }
             else {
                 parser = new HmmscanPerDomainTableParser( new File( input_file_properties[ i ][ 0 ] ),
                                                           input_file_properties[ i ][ 1 ],
-                                                          ind_score_cutoff );
+                                                          ind_score_cutoff,
+                                                          true );
             }
             if ( e_value_max >= 0.0 ) {
                 parser.setEValueMaximum( e_value_max );
@@ -2388,12 +2390,21 @@ public class surfacing {
             final PhylogenyNode n = it.next();
             if ( ForesterUtil.isEmpty( n.getName() ) ) {
                 if ( n.getNodeData().isHasTaxonomy()
+                        && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
+                    n.setName( n.getNodeData().getTaxonomy().getTaxonomyCode() );
+                }
+                else if ( n.getNodeData().isHasTaxonomy()
                         && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
                     n.setName( n.getNodeData().getTaxonomy().getScientificName() );
                 }
+                else if ( n.getNodeData().isHasTaxonomy()
+                        && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getCommonName() ) ) {
+                    n.setName( n.getNodeData().getTaxonomy().getCommonName() );
+                }
                 else {
-                    ForesterUtil.fatalError( surfacing.PRG_NAME,
-                                             "node without both name and scientific taxonomy name found" );
+                    ForesterUtil
+                            .fatalError( surfacing.PRG_NAME,
+                                         "node with no name, scientific name, common name, or taxonomy code present" );
                 }
             }
         }
index 186ecc8..7d31a11 100644 (file)
@@ -91,6 +91,7 @@ public final class HmmscanPerDomainTableParser {
     private int                           _domains_ignored_due_to_virus_like_id;
     private Map<String, Integer>          _domains_ignored_due_to_virus_like_id_counts_map;
     private final INDIVIDUAL_SCORE_CUTOFF _ind_cutoff;
+    private final boolean                 _allow_proteins_with_same_name;
 
     public HmmscanPerDomainTableParser( final File input_file,
                                         final String species,
@@ -100,6 +101,20 @@ public final class HmmscanPerDomainTableParser {
         _filter = null;
         _filter_type = FilterType.NONE;
         _ind_cutoff = individual_cutoff_applies_to;
+        _allow_proteins_with_same_name = false;
+        init();
+    }
+
+    public HmmscanPerDomainTableParser( final File input_file,
+                                        final String species,
+                                        final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to,
+                                        final boolean allow_proteins_with_same_name ) {
+        _input_file = input_file;
+        _species = species;
+        _filter = null;
+        _filter_type = FilterType.NONE;
+        _ind_cutoff = individual_cutoff_applies_to;
+        _allow_proteins_with_same_name = allow_proteins_with_same_name;
         init();
     }
 
@@ -113,9 +128,29 @@ public final class HmmscanPerDomainTableParser {
         _filter = filter;
         _filter_type = filter_type;
         _ind_cutoff = individual_cutoff_applies_to;
+        _allow_proteins_with_same_name = false;
         init();
     }
 
+    public HmmscanPerDomainTableParser( final File input_file,
+                                        final String species,
+                                        final Set<DomainId> filter,
+                                        final FilterType filter_type,
+                                        final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to,
+                                        final boolean allow_proteins_with_same_name ) {
+        _input_file = input_file;
+        _species = species;
+        _filter = filter;
+        _filter_type = filter_type;
+        _ind_cutoff = individual_cutoff_applies_to;
+        _allow_proteins_with_same_name = allow_proteins_with_same_name;
+        init();
+    }
+
+    public boolean isAllowProteinsWithSameName() {
+        return _allow_proteins_with_same_name;
+    }
+
     private void actuallyAddProtein( final List<Protein> proteins, final Protein current_protein ) {
         final List<Domain> l = current_protein.getProteinDomains();
         for( final Domain d : l ) {
@@ -356,12 +391,14 @@ public final class HmmscanPerDomainTableParser {
             final int env_to = parseInt( tokens[ 20 ], line_number, "env to" );
             ++_domains_encountered;
             if ( !query.equals( prev_query ) || ( qlen != prev_qlen ) ) {
-                if ( query.equals( prev_query ) ) {
-                    throw new IOException( "more than one protein named [" + query + "]" + " lengths: " + qlen + ", "
-                            + prev_qlen );
-                }
-                if ( prev_queries.contains( query ) ) {
-                    throw new IOException( "more than one protein named [" + query + "]" );
+                if ( !isAllowProteinsWithSameName() ) {
+                    if ( query.equals( prev_query ) ) {
+                        throw new IOException( "more than one protein named [" + query + "]" + " lengths: " + qlen
+                                + ", " + prev_qlen );
+                    }
+                    if ( prev_queries.contains( query ) ) {
+                        throw new IOException( "more than one protein named [" + query + "]" );
+                    }
                 }
                 prev_query = query;
                 prev_qlen = qlen;
index b8dd42e..b6191ee 100644 (file)
@@ -42,48 +42,51 @@ import org.forester.io.parsers.nexus.NexusPhylogeniesParser;
 import org.forester.io.parsers.nhx.NHXParser;
 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
 import org.forester.io.parsers.tol.TolParser;
+import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.PhylogenyMethods;
 import org.forester.util.ForesterConstants;
 import org.forester.util.ForesterUtil;
 
 public final class ParserUtils {
 
-    final public static PhylogenyParser createParserDependingOnUrlContents( final URL url,
-                                                                            final boolean phyloxml_validate_against_xsd )
+    final public static PhylogenyParser createParserDependingFileContents( final File file,
+                                                                           final boolean phyloxml_validate_against_xsd )
             throws FileNotFoundException, IOException {
-        final String lc_filename = url.getFile().toString().toLowerCase();
-        PhylogenyParser parser = createParserDependingOnSuffix( lc_filename, phyloxml_validate_against_xsd );
-        if ( ( parser != null ) && lc_filename.endsWith( ".zip" ) ) {
-            if ( parser instanceof PhyloXmlParser ) {
-                ( ( PhyloXmlParser ) parser ).setZippedInputstream( true );
-            }
-            else if ( parser instanceof TolParser ) {
-                ( ( TolParser ) parser ).setZippedInputstream( true );
-            }
-        }
-        if ( parser == null ) {
-            final String first_line = ForesterUtil.getFirstLine( url ).trim().toLowerCase();
-            if ( first_line.startsWith( "<" ) ) {
-                parser = new PhyloXmlParser();
-                if ( phyloxml_validate_against_xsd ) {
-                    final ClassLoader cl = PhyloXmlParser.class.getClassLoader();
-                    final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE );
-                    if ( xsd_url != null ) {
-                        ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() );
-                    }
-                    else {
+        PhylogenyParser parser = null;
+        final String first_line = ForesterUtil.getFirstLine( file ).trim().toLowerCase();
+        if ( first_line.startsWith( "<" ) ) {
+            parser = new PhyloXmlParser();
+            if ( phyloxml_validate_against_xsd ) {
+                final ClassLoader cl = PhyloXmlParser.class.getClassLoader();
+                final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE );
+                if ( xsd_url != null ) {
+                    ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() );
+                }
+                else {
+                    if ( ForesterConstants.RELEASE ) {
                         throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from ["
                                 + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" );
                     }
                 }
             }
-            else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) )
-                    || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) {
-                parser = new NexusPhylogeniesParser();
-            }
-            else {
-                parser = new NHXParser();
-            }
+        }
+        else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) )
+                || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) {
+            parser = new NexusPhylogeniesParser();
+        }
+        else {
+            parser = new NHXParser();
+        }
+        return parser;
+    }
+
+    final public static PhylogenyParser createParserDependingOnFileType( final File file,
+                                                                         final boolean phyloxml_validate_against_xsd )
+            throws FileNotFoundException, IOException {
+        PhylogenyParser parser = null;
+        parser = ParserUtils.createParserDependingOnSuffix( file.getName(), phyloxml_validate_against_xsd );
+        if ( parser == null ) {
+            parser = createParserDependingFileContents( file, phyloxml_validate_against_xsd );
         }
         return parser;
     }
@@ -127,46 +130,78 @@ public final class ParserUtils {
         return parser;
     }
 
-    final public static PhylogenyParser createParserDependingOnFileType( final File file,
-                                                                         final boolean phyloxml_validate_against_xsd )
+    final public static PhylogenyParser createParserDependingOnUrlContents( final URL url,
+                                                                            final boolean phyloxml_validate_against_xsd )
             throws FileNotFoundException, IOException {
-        PhylogenyParser parser = null;
-        parser = ParserUtils.createParserDependingOnSuffix( file.getName(), phyloxml_validate_against_xsd );
-        if ( parser == null ) {
-            parser = createParserDependingFileContents( file, phyloxml_validate_against_xsd );
+        final String lc_filename = url.getFile().toString().toLowerCase();
+        PhylogenyParser parser = createParserDependingOnSuffix( lc_filename, phyloxml_validate_against_xsd );
+        if ( ( parser != null ) && lc_filename.endsWith( ".zip" ) ) {
+            if ( parser instanceof PhyloXmlParser ) {
+                ( ( PhyloXmlParser ) parser ).setZippedInputstream( true );
+            }
+            else if ( parser instanceof TolParser ) {
+                ( ( TolParser ) parser ).setZippedInputstream( true );
+            }
         }
-        return parser;
-    }
-
-    final public static PhylogenyParser createParserDependingFileContents( final File file,
-                                                                           final boolean phyloxml_validate_against_xsd )
-            throws FileNotFoundException, IOException {
-        PhylogenyParser parser = null;
-        final String first_line = ForesterUtil.getFirstLine( file ).trim().toLowerCase();
-        if ( first_line.startsWith( "<" ) ) {
-            parser = new PhyloXmlParser();
-            if ( phyloxml_validate_against_xsd ) {
-                final ClassLoader cl = PhyloXmlParser.class.getClassLoader();
-                final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE );
-                if ( xsd_url != null ) {
-                    ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() );
-                }
-                else {
-                    if ( ForesterConstants.RELEASE ) {
+        if ( parser == null ) {
+            final String first_line = ForesterUtil.getFirstLine( url ).trim().toLowerCase();
+            if ( first_line.startsWith( "<" ) ) {
+                parser = new PhyloXmlParser();
+                if ( phyloxml_validate_against_xsd ) {
+                    final ClassLoader cl = PhyloXmlParser.class.getClassLoader();
+                    final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE );
+                    if ( xsd_url != null ) {
+                        ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() );
+                    }
+                    else {
                         throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from ["
                                 + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" );
                     }
                 }
             }
+            else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) )
+                    || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) {
+                parser = new NexusPhylogeniesParser();
+            }
+            else {
+                parser = new NHXParser();
+            }
         }
-        else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) )
-                || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) {
-            parser = new NexusPhylogeniesParser();
+        return parser;
+    }
+
+    public static BufferedReader createReader( final Object source ) throws IOException, FileNotFoundException {
+        BufferedReader reader = null;
+        if ( ( source instanceof File ) || ( source instanceof String ) ) {
+            File f = null;
+            if ( source instanceof File ) {
+                f = ( File ) source;
+            }
+            else {
+                f = new File( ( String ) source );
+            }
+            if ( !f.exists() ) {
+                throw new IOException( "[" + f.getAbsolutePath() + "] does not exist" );
+            }
+            else if ( !f.isFile() ) {
+                throw new IOException( "[" + f.getAbsolutePath() + "] is not a file" );
+            }
+            else if ( !f.canRead() ) {
+                throw new IOException( "[" + f.getAbsolutePath() + "] is not a readable" );
+            }
+            reader = new BufferedReader( new FileReader( f ) );
+        }
+        else if ( source instanceof InputStream ) {
+            reader = new BufferedReader( new InputStreamReader( ( InputStream ) source ) );
+        }
+        else if ( ( source instanceof StringBuffer ) || ( source instanceof StringBuilder ) ) {
+            reader = new BufferedReader( new StringReader( source.toString() ) );
         }
         else {
-            parser = new NHXParser();
+            throw new IllegalArgumentException( "attempt to parse object of type [" + source.getClass()
+                    + "] (can only parse objects of type File/String, InputStream, StringBuffer, or StringBuilder)" );
         }
-        return parser;
+        return reader;
     }
 
     /**
@@ -221,37 +256,11 @@ public final class ParserUtils {
         return null;
     }
 
-    public static BufferedReader createReader( final Object source ) throws IOException, FileNotFoundException {
-        BufferedReader reader = null;
-        if ( ( source instanceof File ) || ( source instanceof String ) ) {
-            File f = null;
-            if ( source instanceof File ) {
-                f = ( File ) source;
-            }
-            else {
-                f = new File( ( String ) source );
-            }
-            if ( !f.exists() ) {
-                throw new IOException( "[" + f.getAbsolutePath() + "] does not exist" );
-            }
-            else if ( !f.isFile() ) {
-                throw new IOException( "[" + f.getAbsolutePath() + "] is not a file" );
-            }
-            else if ( !f.canRead() ) {
-                throw new IOException( "[" + f.getAbsolutePath() + "] is not a readable" );
-            }
-            reader = new BufferedReader( new FileReader( f ) );
-        }
-        else if ( source instanceof InputStream ) {
-            reader = new BufferedReader( new InputStreamReader( ( InputStream ) source ) );
-        }
-        else if ( ( source instanceof StringBuffer ) || ( source instanceof StringBuilder ) ) {
-            reader = new BufferedReader( new StringReader( source.toString() ) );
-        }
-        else {
-            throw new IllegalArgumentException( "attempt to parse object of type [" + source.getClass()
-                    + "] (can only parse objects of type File/String, InputStream, StringBuffer, or StringBuilder)" );
-        }
-        return reader;
+    public final static Phylogeny[] readPhylogenies( final File file ) throws FileNotFoundException, IOException {
+        return PhylogenyMethods.readPhylogenies( ParserUtils.createParserDependingOnFileType( file, true ), file );
+    }
+
+    public final static Phylogeny[] readPhylogenies( final String file_name ) throws FileNotFoundException, IOException {
+        return readPhylogenies( new File( file_name ) );
     }
 }
index 93949d1..e7c4171 100644 (file)
@@ -31,6 +31,8 @@ import java.util.List;
 import java.util.SortedSet;
 import java.util.TreeSet;
 
+// Note: when implementing any "equals" method need to keep in mind that
+// proteins could have the same name and/or id!
 public class BasicProtein implements Protein {
 
     private final ProteinId    _id;
index eef4ff9..409a5d1 100644 (file)
@@ -893,11 +893,17 @@ public final class SurfacingUtil {
             final PhylogenyNode n = it.next();
             if ( ForesterUtil.isEmpty( n.getName() )
                     && ( !n.getNodeData().isHasTaxonomy() || ForesterUtil.isEmpty( n.getNodeData().getTaxonomy()
-                            .getScientificName() ) ) ) {
+                            .getScientificName() ) )
+                    && ( !n.getNodeData().isHasTaxonomy() || ForesterUtil.isEmpty( n.getNodeData().getTaxonomy()
+                            .getCommonName() ) ) ) {
                 if ( n.getParent() != null ) {
                     names.append( " " );
                     names.append( n.getParent().getName() );
                 }
+                final List l = n.getAllExternalDescendants();
+                for( final Object object : l ) {
+                    System.out.println( l.toString() );
+                }
                 ++c;
             }
         }