(no commit message)

[jalview.git] / forester / java / src / org / forester / io / parsers / HmmPfamOutputParser.java
diff --git a/forester/java/src/org/forester/io/parsers/HmmPfamOutputParser.java b/forester/java/src/org/forester/io/parsers/HmmPfamOutputParser.java

index 800606c..96729a4 100644 (file)
--- a/forester/java/src/org/forester/io/parsers/HmmPfamOutputParser.java
+++ b/forester/java/src/org/forester/io/parsers/HmmPfamOutputParser.java
@@ -22,7 +22,7 @@
  // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
  //
  // Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
  
  package org.forester.io.parsers;
  
@@ -40,12 +40,10 @@ import java.util.SortedSet;
  import java.util.TreeMap;
  import java.util.TreeSet;
  
+import org.forester.protein.BasicDomain;
  import org.forester.protein.BasicProtein;
  import org.forester.protein.Domain;
-import org.forester.protein.DomainId;
  import org.forester.protein.Protein;
-import org.forester.surfacing.BasicDomain;
-import org.forester.surfacing.SurfacingUtil;
  import org.forester.util.ForesterUtil;
  
  public final class HmmPfamOutputParser {
@@ -62,11 +60,10 @@ public final class HmmPfamOutputParser {
      private static final ReturnType RETURN_TYPE_DEFAULT         = ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN;
      private static final boolean    IGNORE_DUFS_DEFAULT         = false;
      private static final int        MAX_ALLOWED_OVERLAP_DEFAULT = -1;
-    private final Set<DomainId>     _filter;
+    private final Set<String>       _filter;
      private final FilterType        _filter_type;
      private final File              _input_file;
      private final String            _species;
-    private final String            _model_type;
      private double                  _e_value_maximum;
      private Map<String, String>     _individual_domain_score_cutoffs;
      private boolean                 _ignore_dufs;
@@ -85,7 +82,7 @@ public final class HmmPfamOutputParser {
      private int                     _domains_ignored_due_to_e_value;
      private int                     _domains_ignored_due_to_individual_score_cutoff;
      private int                     _domains_stored;
-    private SortedSet<DomainId>     _domains_stored_set;
+    private SortedSet<String>       _domains_stored_set;
      private long                    _time;
      private int                     _domains_ignored_due_to_negative_domain_filter;
      private Map<String, Integer>    _domains_ignored_due_to_negative_domain_filter_counts_map;
@@ -95,7 +92,6 @@ public final class HmmPfamOutputParser {
      public HmmPfamOutputParser( final File input_file, final String species, final String model_type ) {
          _input_file = input_file;
          _species = species;
-        _model_type = model_type;
          _filter = null;
          _filter_type = FilterType.NONE;
          init();
@@ -103,12 +99,10 @@ public final class HmmPfamOutputParser {
  
      public HmmPfamOutputParser( final File input_file,
                                  final String species,
-                                final String model_type,
-                                final Set<DomainId> filter,
+                                final Set<String> filter,
                                  final FilterType filter_type ) {
          _input_file = input_file;
          _species = species;
-        _model_type = model_type;
          _filter = filter;
          _filter_type = filter_type;
          init();
@@ -125,7 +119,7 @@ public final class HmmPfamOutputParser {
  
      private void addProtein( final List<Protein> proteins, final Protein current_protein ) {
          if ( ( getFilterType() == FilterType.POSITIVE_PROTEIN ) || ( getFilterType() == FilterType.NEGATIVE_PROTEIN ) ) {
-            final Set<DomainId> domain_ids_in_protein = new HashSet<DomainId>();
+            final Set<String> domain_ids_in_protein = new HashSet<String>();
              for( final Domain d : current_protein.getProteinDomains() ) {
                  domain_ids_in_protein.add( d.getDomainId() );
              }
@@ -192,7 +186,7 @@ public final class HmmPfamOutputParser {
          return _domains_stored;
      }
  
-    public SortedSet<DomainId> getDomainsStoredSet() {
+    public SortedSet<String> getDomainsStoredSet() {
          return _domains_stored_set;
      }
  
@@ -200,7 +194,7 @@ public final class HmmPfamOutputParser {
          return _e_value_maximum;
      }
  
-    private Set<DomainId> getFilter() {
+    private Set<String> getFilter() {
          return _filter;
      }
  
@@ -220,10 +214,6 @@ public final class HmmPfamOutputParser {
          return _max_allowed_overlap;
      }
  
-    private String getModelType() {
-        return _model_type;
-    }
-
      public int getProteinsEncountered() {
          return _proteins_encountered;
      }
@@ -262,7 +252,7 @@ public final class HmmPfamOutputParser {
      }
  
      private void intitCounts() {
-        setDomainsStoredSet( new TreeSet<DomainId>() );
+        setDomainsStoredSet( new TreeSet<String>() );
          setDomainsEncountered( 0 );
          setProteinsEncountered( 0 );
          setProteinsIgnoredDueToFilter( 0 );
@@ -343,8 +333,8 @@ public final class HmmPfamOutputParser {
                      }
                      else if ( isVerbose() ) {
                          ForesterUtil.printWarningMessage( getClass().getName(), "query \"" + query
-                                + "\" is not unique [line " + line_number + "] in ["
-                                + getInputFile().getCanonicalPath() + "]" );
+                                                          + "\" is not unique [line " + line_number + "] in ["
+                                                          + getInputFile().getCanonicalPath() + "]" );
                      }
                  }
                  else {
@@ -409,9 +399,9 @@ public final class HmmPfamOutputParser {
                      if ( ( getMaxAllowedOverlap() != HmmPfamOutputParser.MAX_ALLOWED_OVERLAP_DEFAULT )
                              || isIgnoreEngulfedDomains() ) {
                          final int domains_count = current_protein.getNumberOfProteinDomains();
-                        current_protein = SurfacingUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
-                                                                                  isIgnoreEngulfedDomains(),
-                                                                                  current_protein );
+                        current_protein = ForesterUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
+                                                                                 isIgnoreEngulfedDomains(),
+                                                                                 current_protein );
                          final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains();
                          _domains_stored -= domains_removed;
                          _domains_ignored_due_to_overlap += domains_removed;
@@ -438,38 +428,36 @@ public final class HmmPfamOutputParser {
                  int to = -1;
                  double e_value = -1;
                  double score = -1;
-                boolean is_complete_hmm_match = false;
-                boolean is_complete_query_match = false;
                  try {
                      from = Integer.valueOf( from_str ).intValue();
                  }
                  catch ( final NumberFormatException e ) {
                      throw new IOException( "could not parse seq-f from \"" + line + "\" [line " + line_number
-                            + "] in [" + getInputFile().getCanonicalPath() + "]" );
+                                           + "] in [" + getInputFile().getCanonicalPath() + "]" );
                  }
                  try {
                      to = Integer.valueOf( to_str ).intValue();
                  }
                  catch ( final NumberFormatException e ) {
                      throw new IOException( "could not parse seq-t from \"" + line + "\" [line " + line_number
-                            + "] in [" + getInputFile().getCanonicalPath() + "]" );
+                                           + "] in [" + getInputFile().getCanonicalPath() + "]" );
                  }
                  try {
                      score = Double.valueOf( score_str ).doubleValue();
                  }
                  catch ( final NumberFormatException e ) {
                      throw new IOException( "could not parse score from \"" + line + "\" [line " + line_number
-                            + "] in [" + getInputFile().getCanonicalPath() + "]" );
+                                           + "] in [" + getInputFile().getCanonicalPath() + "]" );
                  }
                  try {
                      e_value = Double.valueOf( e_value_str ).doubleValue();
                  }
                  catch ( final NumberFormatException e ) {
                      throw new IOException( "could not parse E-value from \"" + line + "\" [line " + line_number
-                            + "] in [" + getInputFile().getCanonicalPath() + "]" );
+                                           + "] in [" + getInputFile().getCanonicalPath() + "]" );
                  }
                  if ( hmm_match_str.equals( "[]" ) ) {
-                    is_complete_hmm_match = true;
+                    //is_complete_hmm_match = true;
                  }
                  else if ( !( hmm_match_str.equals( ".]" ) || hmm_match_str.equals( "[." ) || hmm_match_str
                          .equals( ".." ) ) ) {
@@ -477,7 +465,7 @@ public final class HmmPfamOutputParser {
                              + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
                  }
                  if ( query_match_str.equals( ".." ) ) {
-                    is_complete_query_match = true;
+                    // is_complete_query_match = true;
                  }
                  else if ( !( query_match_str.equals( ".]" ) || query_match_str.equals( "[." ) || query_match_str
                          .equals( "[]" ) ) ) {
@@ -498,14 +486,14 @@ public final class HmmPfamOutputParser {
                  }
                  catch ( final NumberFormatException e ) {
                      throw new IOException( "could not parse domain number from \"" + line + "\" [line " + line_number
-                            + "] in [" + getInputFile().getCanonicalPath() + "]" );
+                                           + "] in [" + getInputFile().getCanonicalPath() + "]" );
                  }
                  try {
                      total = Integer.valueOf( ( total_str ) ).intValue();
                  }
                  catch ( final NumberFormatException e ) {
                      throw new IOException( "could not parse domain count from \"" + line + "\" [line " + line_number
-                            + "] in [" + getInputFile().getCanonicalPath() + "]" );
+                                           + "] in [" + getInputFile().getCanonicalPath() + "]" );
                  }
                  ++_domains_encountered;
                  boolean failed_cutoff = false;
@@ -518,7 +506,7 @@ public final class HmmPfamOutputParser {
                      }
                      else {
                          throw new IOException( "could not find a score cutoff value for domain id \"" + id
-                                + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
+                                               + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
                      }
                  }
                  final String uc_id = id.toUpperCase();
@@ -539,8 +527,7 @@ public final class HmmPfamOutputParser {
                      ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToVirusLikeIdCountsMap(), id );
                      ++_domains_ignored_due_to_virus_like_id;
                  }
-                else if ( ( getFilterType() == FilterType.NEGATIVE_DOMAIN )
-                        && getFilter().contains( new DomainId( id ) ) ) {
+                else if ( ( getFilterType() == FilterType.NEGATIVE_DOMAIN ) && getFilter().contains( id ) ) {
                      ++_domains_ignored_due_to_negative_domain_filter;
                      ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToNegativeDomainFilterCountsMap(), id );
                  }
@@ -608,7 +595,7 @@ public final class HmmPfamOutputParser {
          _domains_stored = domains_stored;
      }
  
-    private void setDomainsStoredSet( final SortedSet<DomainId> _storeddomains_stored ) {
+    private void setDomainsStoredSet( final SortedSet<String> _storeddomains_stored ) {
          _domains_stored_set = _storeddomains_stored;
      }
  
@@ -626,8 +613,8 @@ public final class HmmPfamOutputParser {
      /**
       * To ignore domains which are completely engulfed by domains (individual
       * ones or stretches of overlapping ones) with better support values.
-     * 
-     * 
+     *
+     *
       * @param ignored_engulfed_domains
       */
      public void setIgnoreEngulfedDomains( final boolean ignore_engulfed_domains ) {
@@ -641,7 +628,7 @@ public final class HmmPfamOutputParser {
      /**
       * Sets the individual domain score cutoff values (for example, gathering
       * thresholds from Pfam). Domain ids are the keys, cutoffs the values.
-     * 
+     *
       * @param individual_domain_score_cutoffs
       */
      public void setIndividualDomainScoreCutoffs( final Map<String, String> individual_domain_score_cutoffs ) {