in progress
[jalview.git] / forester / java / src / org / forester / io / parsers / HmmPfamOutputParser.java
index 42f94ee..eda7e55 100644 (file)
@@ -6,7 +6,7 @@
 // Copyright (C) 2008-2009 Christian M. Zmasek
 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
 // All rights reserved
-// 
+//
 // This library is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
 // License as published by the Free Software Foundation; either
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 // Lesser General Public License for more details.
-// 
+//
 // You should have received a copy of the GNU Lesser General Public
 // License along with this library; if not, write to the Free Software
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 //
 // Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
 
 package org.forester.io.parsers;
 
@@ -40,12 +40,10 @@ import java.util.SortedSet;
 import java.util.TreeMap;
 import java.util.TreeSet;
 
-import org.forester.surfacing.BasicDomain;
-import org.forester.surfacing.BasicProtein;
-import org.forester.surfacing.Domain;
-import org.forester.surfacing.DomainId;
-import org.forester.surfacing.Protein;
-import org.forester.surfacing.SurfacingUtil;
+import org.forester.protein.BasicDomain;
+import org.forester.protein.BasicProtein;
+import org.forester.protein.Domain;
+import org.forester.protein.Protein;
 import org.forester.util.ForesterUtil;
 
 public final class HmmPfamOutputParser {
@@ -62,7 +60,7 @@ public final class HmmPfamOutputParser {
     private static final ReturnType RETURN_TYPE_DEFAULT         = ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN;
     private static final boolean    IGNORE_DUFS_DEFAULT         = false;
     private static final int        MAX_ALLOWED_OVERLAP_DEFAULT = -1;
-    private final Set<DomainId>     _filter;
+    private final Set<String>       _filter;
     private final FilterType        _filter_type;
     private final File              _input_file;
     private final String            _species;
@@ -85,7 +83,7 @@ public final class HmmPfamOutputParser {
     private int                     _domains_ignored_due_to_e_value;
     private int                     _domains_ignored_due_to_individual_score_cutoff;
     private int                     _domains_stored;
-    private SortedSet<DomainId>     _domains_stored_set;
+    private SortedSet<String>       _domains_stored_set;
     private long                    _time;
     private int                     _domains_ignored_due_to_negative_domain_filter;
     private Map<String, Integer>    _domains_ignored_due_to_negative_domain_filter_counts_map;
@@ -104,7 +102,7 @@ public final class HmmPfamOutputParser {
     public HmmPfamOutputParser( final File input_file,
                                 final String species,
                                 final String model_type,
-                                final Set<DomainId> filter,
+                                final Set<String> filter,
                                 final FilterType filter_type ) {
         _input_file = input_file;
         _species = species;
@@ -125,7 +123,7 @@ public final class HmmPfamOutputParser {
 
     private void addProtein( final List<Protein> proteins, final Protein current_protein ) {
         if ( ( getFilterType() == FilterType.POSITIVE_PROTEIN ) || ( getFilterType() == FilterType.NEGATIVE_PROTEIN ) ) {
-            final Set<DomainId> domain_ids_in_protein = new HashSet<DomainId>();
+            final Set<String> domain_ids_in_protein = new HashSet<String>();
             for( final Domain d : current_protein.getProteinDomains() ) {
                 domain_ids_in_protein.add( d.getDomainId() );
             }
@@ -192,7 +190,7 @@ public final class HmmPfamOutputParser {
         return _domains_stored;
     }
 
-    public SortedSet<DomainId> getDomainsStoredSet() {
+    public SortedSet<String> getDomainsStoredSet() {
         return _domains_stored_set;
     }
 
@@ -200,7 +198,7 @@ public final class HmmPfamOutputParser {
         return _e_value_maximum;
     }
 
-    private Set<DomainId> getFilter() {
+    private Set<String> getFilter() {
         return _filter;
     }
 
@@ -262,7 +260,7 @@ public final class HmmPfamOutputParser {
     }
 
     private void intitCounts() {
-        setDomainsStoredSet( new TreeSet<DomainId>() );
+        setDomainsStoredSet( new TreeSet<String>() );
         setDomainsEncountered( 0 );
         setProteinsEncountered( 0 );
         setProteinsIgnoredDueToFilter( 0 );
@@ -355,7 +353,7 @@ public final class HmmPfamOutputParser {
                             + getInputFile().getCanonicalPath() + "]" );
                 }
                 if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) {
-                    current_protein = new BasicProtein( query, getSpecies() );
+                    current_protein = new BasicProtein( query, getSpecies(), 0 );
                 }
                 else {
                     throw new IllegalArgumentException( "unknown return type" );
@@ -376,7 +374,7 @@ public final class HmmPfamOutputParser {
                 if ( was_not_unique ) {
                     if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) {
                         current_protein = new BasicProtein( current_protein.getProteinId() + " "
-                                + line.substring( 13 ).trim(), getSpecies() );
+                                + line.substring( 13 ).trim(), getSpecies(), 0 );
                     }
                 }
                 else {
@@ -409,9 +407,9 @@ public final class HmmPfamOutputParser {
                     if ( ( getMaxAllowedOverlap() != HmmPfamOutputParser.MAX_ALLOWED_OVERLAP_DEFAULT )
                             || isIgnoreEngulfedDomains() ) {
                         final int domains_count = current_protein.getNumberOfProteinDomains();
-                        current_protein = SurfacingUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
-                                                                                  isIgnoreEngulfedDomains(),
-                                                                                  current_protein );
+                        current_protein = ForesterUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
+                                                                                 isIgnoreEngulfedDomains(),
+                                                                                 current_protein );
                         final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains();
                         _domains_stored -= domains_removed;
                         _domains_ignored_due_to_overlap += domains_removed;
@@ -539,8 +537,7 @@ public final class HmmPfamOutputParser {
                     ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToVirusLikeIdCountsMap(), id );
                     ++_domains_ignored_due_to_virus_like_id;
                 }
-                else if ( ( getFilterType() == FilterType.NEGATIVE_DOMAIN )
-                        && getFilter().contains( new DomainId( id ) ) ) {
+                else if ( ( getFilterType() == FilterType.NEGATIVE_DOMAIN ) && getFilter().contains( id ) ) {
                     ++_domains_ignored_due_to_negative_domain_filter;
                     ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToNegativeDomainFilterCountsMap(), id );
                 }
@@ -608,7 +605,7 @@ public final class HmmPfamOutputParser {
         _domains_stored = domains_stored;
     }
 
-    private void setDomainsStoredSet( final SortedSet<DomainId> _storeddomains_stored ) {
+    private void setDomainsStoredSet( final SortedSet<String> _storeddomains_stored ) {
         _domains_stored_set = _storeddomains_stored;
     }