added infraorder

author cmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>

Sat, 19 Nov 2011 03:38:18 +0000 (03:38 +0000)

committer cmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>

Sat, 19 Nov 2011 03:38:18 +0000 (03:38 +0000)
author cmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Sat, 19 Nov 2011 03:38:18 +0000 (03:38 +0000)
committer cmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Sat, 19 Nov 2011 03:38:18 +0000 (03:38 +0000)
diff --git a/forester/java/src/org/forester/application/surfacing.java b/forester/java/src/org/forester/application/surfacing.java

index 12274f7..e8ae4bf 100644 (file)
--- a/forester/java/src/org/forester/application/surfacing.java
+++ b/forester/java/src/org/forester/application/surfacing.java
@@ -1778,12 +1778,14 @@ public class surfacing {
                                                            input_file_properties[ i ][ 1 ],
                                                            filter,
                                                            filter_type,
-                                                          ind_score_cutoff );
+                                                          ind_score_cutoff,
+                                                          true );
              }
              else {
                  parser = new HmmscanPerDomainTableParser( new File( input_file_properties[ i ][ 0 ] ),
                                                            input_file_properties[ i ][ 1 ],
-                                                          ind_score_cutoff );
+                                                          ind_score_cutoff,
+                                                          true );
              }
              if ( e_value_max >= 0.0 ) {
                  parser.setEValueMaximum( e_value_max );
@@ -2388,12 +2390,21 @@ public class surfacing {
              final PhylogenyNode n = it.next();
              if ( ForesterUtil.isEmpty( n.getName() ) ) {
                  if ( n.getNodeData().isHasTaxonomy()
+                        && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
+                    n.setName( n.getNodeData().getTaxonomy().getTaxonomyCode() );
+                }
+                else if ( n.getNodeData().isHasTaxonomy()
                          && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
                      n.setName( n.getNodeData().getTaxonomy().getScientificName() );
                  }
+                else if ( n.getNodeData().isHasTaxonomy()
+                        && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getCommonName() ) ) {
+                    n.setName( n.getNodeData().getTaxonomy().getCommonName() );
+                }
                  else {
-                    ForesterUtil.fatalError( surfacing.PRG_NAME,
-                                             "node without both name and scientific taxonomy name found" );
+                    ForesterUtil
+                            .fatalError( surfacing.PRG_NAME,
+                                         "node with no name, scientific name, common name, or taxonomy code present" );
                  }
              }
          }
diff --git a/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java b/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java

index 186ecc8..7d31a11 100644 (file)
--- a/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java
+++ b/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java
@@ -91,6 +91,7 @@ public final class HmmscanPerDomainTableParser {
      private int                           _domains_ignored_due_to_virus_like_id;
      private Map<String, Integer>          _domains_ignored_due_to_virus_like_id_counts_map;
      private final INDIVIDUAL_SCORE_CUTOFF _ind_cutoff;
+    private final boolean                 _allow_proteins_with_same_name;
  
      public HmmscanPerDomainTableParser( final File input_file,
                                          final String species,
@@ -100,6 +101,20 @@ public final class HmmscanPerDomainTableParser {
          _filter = null;
          _filter_type = FilterType.NONE;
          _ind_cutoff = individual_cutoff_applies_to;
+        _allow_proteins_with_same_name = false;
+        init();
+    }
+
+    public HmmscanPerDomainTableParser( final File input_file,
+                                        final String species,
+                                        final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to,
+                                        final boolean allow_proteins_with_same_name ) {
+        _input_file = input_file;
+        _species = species;
+        _filter = null;
+        _filter_type = FilterType.NONE;
+        _ind_cutoff = individual_cutoff_applies_to;
+        _allow_proteins_with_same_name = allow_proteins_with_same_name;
          init();
      }
  
@@ -113,9 +128,29 @@ public final class HmmscanPerDomainTableParser {
          _filter = filter;
          _filter_type = filter_type;
          _ind_cutoff = individual_cutoff_applies_to;
+        _allow_proteins_with_same_name = false;
          init();
      }
  
+    public HmmscanPerDomainTableParser( final File input_file,
+                                        final String species,
+                                        final Set<DomainId> filter,
+                                        final FilterType filter_type,
+                                        final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to,
+                                        final boolean allow_proteins_with_same_name ) {
+        _input_file = input_file;
+        _species = species;
+        _filter = filter;
+        _filter_type = filter_type;
+        _ind_cutoff = individual_cutoff_applies_to;
+        _allow_proteins_with_same_name = allow_proteins_with_same_name;
+        init();
+    }
+
+    public boolean isAllowProteinsWithSameName() {
+        return _allow_proteins_with_same_name;
+    }
+
      private void actuallyAddProtein( final List<Protein> proteins, final Protein current_protein ) {
          final List<Domain> l = current_protein.getProteinDomains();
          for( final Domain d : l ) {
@@ -356,12 +391,14 @@ public final class HmmscanPerDomainTableParser {
              final int env_to = parseInt( tokens[ 20 ], line_number, "env to" );
              ++_domains_encountered;
              if ( !query.equals( prev_query ) || ( qlen != prev_qlen ) ) {
-                if ( query.equals( prev_query ) ) {
-                    throw new IOException( "more than one protein named [" + query + "]" + " lengths: " + qlen + ", "
-                            + prev_qlen );
-                }
-                if ( prev_queries.contains( query ) ) {
-                    throw new IOException( "more than one protein named [" + query + "]" );
+                if ( !isAllowProteinsWithSameName() ) {
+                    if ( query.equals( prev_query ) ) {
+                        throw new IOException( "more than one protein named [" + query + "]" + " lengths: " + qlen
+                                + ", " + prev_qlen );
+                    }
+                    if ( prev_queries.contains( query ) ) {
+                        throw new IOException( "more than one protein named [" + query + "]" );
+                    }
                  }
                  prev_query = query;
                  prev_qlen = qlen;
diff --git a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java

index b8dd42e..b6191ee 100644 (file)
--- a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java
+++ b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java
@@ -42,48 +42,51 @@ import org.forester.io.parsers.nexus.NexusPhylogeniesParser;
  import org.forester.io.parsers.nhx.NHXParser;
  import org.forester.io.parsers.phyloxml.PhyloXmlParser;
  import org.forester.io.parsers.tol.TolParser;
+import org.forester.phylogeny.Phylogeny;
  import org.forester.phylogeny.PhylogenyMethods;
  import org.forester.util.ForesterConstants;
  import org.forester.util.ForesterUtil;
  
  public final class ParserUtils {
  
-    final public static PhylogenyParser createParserDependingOnUrlContents( final URL url,
-                                                                            final boolean phyloxml_validate_against_xsd )
+    final public static PhylogenyParser createParserDependingFileContents( final File file,
+                                                                           final boolean phyloxml_validate_against_xsd )
              throws FileNotFoundException, IOException {
-        final String lc_filename = url.getFile().toString().toLowerCase();
-        PhylogenyParser parser = createParserDependingOnSuffix( lc_filename, phyloxml_validate_against_xsd );
-        if ( ( parser != null ) && lc_filename.endsWith( ".zip" ) ) {
-            if ( parser instanceof PhyloXmlParser ) {
-                ( ( PhyloXmlParser ) parser ).setZippedInputstream( true );
-            }
-            else if ( parser instanceof TolParser ) {
-                ( ( TolParser ) parser ).setZippedInputstream( true );
-            }
-        }
-        if ( parser == null ) {
-            final String first_line = ForesterUtil.getFirstLine( url ).trim().toLowerCase();
-            if ( first_line.startsWith( "<" ) ) {
-                parser = new PhyloXmlParser();
-                if ( phyloxml_validate_against_xsd ) {
-                    final ClassLoader cl = PhyloXmlParser.class.getClassLoader();
-                    final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE );
-                    if ( xsd_url != null ) {
-                        ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() );
-                    }
-                    else {
+        PhylogenyParser parser = null;
+        final String first_line = ForesterUtil.getFirstLine( file ).trim().toLowerCase();
+        if ( first_line.startsWith( "<" ) ) {
+            parser = new PhyloXmlParser();
+            if ( phyloxml_validate_against_xsd ) {
+                final ClassLoader cl = PhyloXmlParser.class.getClassLoader();
+                final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE );
+                if ( xsd_url != null ) {
+                    ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() );
+                }
+                else {
+                    if ( ForesterConstants.RELEASE ) {
                          throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from ["
                                  + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" );
                      }
                  }
              }
-            else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) )
-                    || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) {
-                parser = new NexusPhylogeniesParser();
-            }
-            else {
-                parser = new NHXParser();
-            }
+        }
+        else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) )
+                || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) {
+            parser = new NexusPhylogeniesParser();
+        }
+        else {
+            parser = new NHXParser();
+        }
+        return parser;
+    }
+
+    final public static PhylogenyParser createParserDependingOnFileType( final File file,
+                                                                         final boolean phyloxml_validate_against_xsd )
+            throws FileNotFoundException, IOException {
+        PhylogenyParser parser = null;
+        parser = ParserUtils.createParserDependingOnSuffix( file.getName(), phyloxml_validate_against_xsd );
+        if ( parser == null ) {
+            parser = createParserDependingFileContents( file, phyloxml_validate_against_xsd );
          }
          return parser;
      }
@@ -127,46 +130,78 @@ public final class ParserUtils {
          return parser;
      }
  
-    final public static PhylogenyParser createParserDependingOnFileType( final File file,
-                                                                         final boolean phyloxml_validate_against_xsd )
+    final public static PhylogenyParser createParserDependingOnUrlContents( final URL url,
+                                                                            final boolean phyloxml_validate_against_xsd )
              throws FileNotFoundException, IOException {
-        PhylogenyParser parser = null;
-        parser = ParserUtils.createParserDependingOnSuffix( file.getName(), phyloxml_validate_against_xsd );
-        if ( parser == null ) {
-            parser = createParserDependingFileContents( file, phyloxml_validate_against_xsd );
+        final String lc_filename = url.getFile().toString().toLowerCase();
+        PhylogenyParser parser = createParserDependingOnSuffix( lc_filename, phyloxml_validate_against_xsd );
+        if ( ( parser != null ) && lc_filename.endsWith( ".zip" ) ) {
+            if ( parser instanceof PhyloXmlParser ) {
+                ( ( PhyloXmlParser ) parser ).setZippedInputstream( true );
+            }
+            else if ( parser instanceof TolParser ) {
+                ( ( TolParser ) parser ).setZippedInputstream( true );
+            }
          }
-        return parser;
-    }
-
-    final public static PhylogenyParser createParserDependingFileContents( final File file,
-                                                                           final boolean phyloxml_validate_against_xsd )
-            throws FileNotFoundException, IOException {
-        PhylogenyParser parser = null;
-        final String first_line = ForesterUtil.getFirstLine( file ).trim().toLowerCase();
-        if ( first_line.startsWith( "<" ) ) {
-            parser = new PhyloXmlParser();
-            if ( phyloxml_validate_against_xsd ) {
-                final ClassLoader cl = PhyloXmlParser.class.getClassLoader();
-                final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE );
-                if ( xsd_url != null ) {
-                    ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() );
-                }
-                else {
-                    if ( ForesterConstants.RELEASE ) {
+        if ( parser == null ) {
+            final String first_line = ForesterUtil.getFirstLine( url ).trim().toLowerCase();
+            if ( first_line.startsWith( "<" ) ) {
+                parser = new PhyloXmlParser();
+                if ( phyloxml_validate_against_xsd ) {
+                    final ClassLoader cl = PhyloXmlParser.class.getClassLoader();
+                    final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE );
+                    if ( xsd_url != null ) {
+                        ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() );
+                    }
+                    else {
                          throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from ["
                                  + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" );
                      }
                  }
              }
+            else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) )
+                    || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) {
+                parser = new NexusPhylogeniesParser();
+            }
+            else {
+                parser = new NHXParser();
+            }
          }
-        else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) )
-                || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) {
-            parser = new NexusPhylogeniesParser();
+        return parser;
+    }
+
+    public static BufferedReader createReader( final Object source ) throws IOException, FileNotFoundException {
+        BufferedReader reader = null;
+        if ( ( source instanceof File ) || ( source instanceof String ) ) {
+            File f = null;
+            if ( source instanceof File ) {
+                f = ( File ) source;
+            }
+            else {
+                f = new File( ( String ) source );
+            }
+            if ( !f.exists() ) {
+                throw new IOException( "[" + f.getAbsolutePath() + "] does not exist" );
+            }
+            else if ( !f.isFile() ) {
+                throw new IOException( "[" + f.getAbsolutePath() + "] is not a file" );
+            }
+            else if ( !f.canRead() ) {
+                throw new IOException( "[" + f.getAbsolutePath() + "] is not a readable" );
+            }
+            reader = new BufferedReader( new FileReader( f ) );
+        }
+        else if ( source instanceof InputStream ) {
+            reader = new BufferedReader( new InputStreamReader( ( InputStream ) source ) );
+        }
+        else if ( ( source instanceof StringBuffer ) || ( source instanceof StringBuilder ) ) {
+            reader = new BufferedReader( new StringReader( source.toString() ) );
          }
          else {
-            parser = new NHXParser();
+            throw new IllegalArgumentException( "attempt to parse object of type [" + source.getClass()
+                    + "] (can only parse objects of type File/String, InputStream, StringBuffer, or StringBuilder)" );
          }
-        return parser;
+        return reader;
      }
  
      /**
@@ -221,37 +256,11 @@ public final class ParserUtils {
          return null;
      }
  
-    public static BufferedReader createReader( final Object source ) throws IOException, FileNotFoundException {
-        BufferedReader reader = null;
-        if ( ( source instanceof File ) || ( source instanceof String ) ) {
-            File f = null;
-            if ( source instanceof File ) {
-                f = ( File ) source;
-            }
-            else {
-                f = new File( ( String ) source );
-            }
-            if ( !f.exists() ) {
-                throw new IOException( "[" + f.getAbsolutePath() + "] does not exist" );
-            }
-            else if ( !f.isFile() ) {
-                throw new IOException( "[" + f.getAbsolutePath() + "] is not a file" );
-            }
-            else if ( !f.canRead() ) {
-                throw new IOException( "[" + f.getAbsolutePath() + "] is not a readable" );
-            }
-            reader = new BufferedReader( new FileReader( f ) );
-        }
-        else if ( source instanceof InputStream ) {
-            reader = new BufferedReader( new InputStreamReader( ( InputStream ) source ) );
-        }
-        else if ( ( source instanceof StringBuffer ) || ( source instanceof StringBuilder ) ) {
-            reader = new BufferedReader( new StringReader( source.toString() ) );
-        }
-        else {
-            throw new IllegalArgumentException( "attempt to parse object of type [" + source.getClass()
-                    + "] (can only parse objects of type File/String, InputStream, StringBuffer, or StringBuilder)" );
-        }
-        return reader;
+    public final static Phylogeny[] readPhylogenies( final File file ) throws FileNotFoundException, IOException {
+        return PhylogenyMethods.readPhylogenies( ParserUtils.createParserDependingOnFileType( file, true ), file );
+    }
+
+    public final static Phylogeny[] readPhylogenies( final String file_name ) throws FileNotFoundException, IOException {
+        return readPhylogenies( new File( file_name ) );
      }
  }
diff --git a/forester/java/src/org/forester/surfacing/BasicProtein.java b/forester/java/src/org/forester/surfacing/BasicProtein.java

index 93949d1..e7c4171 100644 (file)
--- a/forester/java/src/org/forester/surfacing/BasicProtein.java
+++ b/forester/java/src/org/forester/surfacing/BasicProtein.java
@@ -31,6 +31,8 @@ import java.util.List;
  import java.util.SortedSet;
  import java.util.TreeSet;
  
+// Note: when implementing any "equals" method need to keep in mind that
+// proteins could have the same name and/or id!
  public class BasicProtein implements Protein {
  
      private final ProteinId    _id;
diff --git a/forester/java/src/org/forester/surfacing/SurfacingUtil.java b/forester/java/src/org/forester/surfacing/SurfacingUtil.java

index eef4ff9..409a5d1 100644 (file)
--- a/forester/java/src/org/forester/surfacing/SurfacingUtil.java
+++ b/forester/java/src/org/forester/surfacing/SurfacingUtil.java
@@ -893,11 +893,17 @@ public final class SurfacingUtil {
              final PhylogenyNode n = it.next();
              if ( ForesterUtil.isEmpty( n.getName() )
                      && ( !n.getNodeData().isHasTaxonomy() || ForesterUtil.isEmpty( n.getNodeData().getTaxonomy()
-                            .getScientificName() ) ) ) {
+                            .getScientificName() ) )
+                    && ( !n.getNodeData().isHasTaxonomy() || ForesterUtil.isEmpty( n.getNodeData().getTaxonomy()
+                            .getCommonName() ) ) ) {
                  if ( n.getParent() != null ) {
                      names.append( " " );
                      names.append( n.getParent().getName() );
                  }
+                final List l = n.getAllExternalDescendants();
+                for( final Object object : l ) {
+                    System.out.println( l.toString() );
+                }
                  ++c;
              }
          }
author	cmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
	Sat, 19 Nov 2011 03:38:18 +0000 (03:38 +0000)
committer	cmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
	Sat, 19 Nov 2011 03:38:18 +0000 (03:38 +0000)
forester/java/src/org/forester/application/surfacing.java		patch \| blob \| history
forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java		patch \| blob \| history
forester/java/src/org/forester/io/parsers/util/ParserUtils.java		patch \| blob \| history
forester/java/src/org/forester/surfacing/BasicProtein.java		patch \| blob \| history
forester/java/src/org/forester/surfacing/SurfacingUtil.java		patch \| blob \| history