X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fio%2Fparsers%2FHmmPfamOutputParser.java;h=534eadcf208687a2432bfab536a89c0de634542c;hb=e22f363c7273fc888ef911ad1f20d520a6bf030e;hp=15d164a724dbbb8a3ee7f6e9d463661187acf9fa;hpb=eee996a6476a1e3d84c07f8f690dcde3ff4b2ef5;p=jalview.git diff --git a/forester/java/src/org/forester/io/parsers/HmmPfamOutputParser.java b/forester/java/src/org/forester/io/parsers/HmmPfamOutputParser.java index 15d164a..534eadc 100644 --- a/forester/java/src/org/forester/io/parsers/HmmPfamOutputParser.java +++ b/forester/java/src/org/forester/io/parsers/HmmPfamOutputParser.java @@ -22,7 +22,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers; @@ -40,12 +40,10 @@ import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; -import org.forester.surfacing.BasicDomain; -import org.forester.surfacing.BasicProtein; -import org.forester.surfacing.Domain; -import org.forester.surfacing.DomainId; -import org.forester.surfacing.Protein; -import org.forester.surfacing.SurfacingUtil; +import org.forester.protein.BasicDomain; +import org.forester.protein.BasicProtein; +import org.forester.protein.Domain; +import org.forester.protein.Protein; import org.forester.util.ForesterUtil; public final class HmmPfamOutputParser { @@ -62,11 +60,10 @@ public final class HmmPfamOutputParser { private static final ReturnType RETURN_TYPE_DEFAULT = ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN; private static final boolean IGNORE_DUFS_DEFAULT = false; private static final int MAX_ALLOWED_OVERLAP_DEFAULT = -1; - private final Set _filter; + private final Set _filter; private final FilterType _filter_type; private final File _input_file; private final String _species; - private final String _model_type; private double _e_value_maximum; private Map _individual_domain_score_cutoffs; private boolean _ignore_dufs; @@ -85,7 +82,7 @@ public final class HmmPfamOutputParser { private int _domains_ignored_due_to_e_value; private int _domains_ignored_due_to_individual_score_cutoff; private int _domains_stored; - private SortedSet _domains_stored_set; + private SortedSet _domains_stored_set; private long _time; private int _domains_ignored_due_to_negative_domain_filter; private Map _domains_ignored_due_to_negative_domain_filter_counts_map; @@ -95,7 +92,6 @@ public final class HmmPfamOutputParser { public HmmPfamOutputParser( final File input_file, final String species, final String model_type ) { _input_file = input_file; _species = species; - _model_type = model_type; _filter = null; _filter_type = FilterType.NONE; init(); @@ -103,12 +99,10 @@ public final class HmmPfamOutputParser { public HmmPfamOutputParser( final File input_file, final String species, - final String model_type, - final Set filter, + final Set filter, final FilterType filter_type ) { _input_file = input_file; _species = species; - _model_type = model_type; _filter = filter; _filter_type = filter_type; init(); @@ -125,7 +119,7 @@ public final class HmmPfamOutputParser { private void addProtein( final List proteins, final Protein current_protein ) { if ( ( getFilterType() == FilterType.POSITIVE_PROTEIN ) || ( getFilterType() == FilterType.NEGATIVE_PROTEIN ) ) { - final Set domain_ids_in_protein = new HashSet(); + final Set domain_ids_in_protein = new HashSet(); for( final Domain d : current_protein.getProteinDomains() ) { domain_ids_in_protein.add( d.getDomainId() ); } @@ -192,7 +186,7 @@ public final class HmmPfamOutputParser { return _domains_stored; } - public SortedSet getDomainsStoredSet() { + public SortedSet getDomainsStoredSet() { return _domains_stored_set; } @@ -200,7 +194,7 @@ public final class HmmPfamOutputParser { return _e_value_maximum; } - private Set getFilter() { + private Set getFilter() { return _filter; } @@ -220,10 +214,6 @@ public final class HmmPfamOutputParser { return _max_allowed_overlap; } - private String getModelType() { - return _model_type; - } - public int getProteinsEncountered() { return _proteins_encountered; } @@ -262,7 +252,7 @@ public final class HmmPfamOutputParser { } private void intitCounts() { - setDomainsStoredSet( new TreeSet() ); + setDomainsStoredSet( new TreeSet() ); setDomainsEncountered( 0 ); setProteinsEncountered( 0 ); setProteinsIgnoredDueToFilter( 0 ); @@ -355,7 +345,7 @@ public final class HmmPfamOutputParser { + getInputFile().getCanonicalPath() + "]" ); } if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) { - current_protein = new BasicProtein( query, getSpecies() ); + current_protein = new BasicProtein( query, getSpecies(), 0 ); } else { throw new IllegalArgumentException( "unknown return type" ); @@ -376,7 +366,7 @@ public final class HmmPfamOutputParser { if ( was_not_unique ) { if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) { current_protein = new BasicProtein( current_protein.getProteinId() + " " - + line.substring( 13 ).trim(), getSpecies() ); + + line.substring( 13 ).trim(), getSpecies(), 0 ); } } else { @@ -409,9 +399,9 @@ public final class HmmPfamOutputParser { if ( ( getMaxAllowedOverlap() != HmmPfamOutputParser.MAX_ALLOWED_OVERLAP_DEFAULT ) || isIgnoreEngulfedDomains() ) { final int domains_count = current_protein.getNumberOfProteinDomains(); - current_protein = SurfacingUtil.removeOverlappingDomains( getMaxAllowedOverlap(), - isIgnoreEngulfedDomains(), - current_protein ); + current_protein = ForesterUtil.removeOverlappingDomains( getMaxAllowedOverlap(), + isIgnoreEngulfedDomains(), + current_protein ); final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains(); _domains_stored -= domains_removed; _domains_ignored_due_to_overlap += domains_removed; @@ -438,8 +428,6 @@ public final class HmmPfamOutputParser { int to = -1; double e_value = -1; double score = -1; - boolean is_complete_hmm_match = false; - boolean is_complete_query_match = false; try { from = Integer.valueOf( from_str ).intValue(); } @@ -469,7 +457,7 @@ public final class HmmPfamOutputParser { + "] in [" + getInputFile().getCanonicalPath() + "]" ); } if ( hmm_match_str.equals( "[]" ) ) { - is_complete_hmm_match = true; + //is_complete_hmm_match = true; } else if ( !( hmm_match_str.equals( ".]" ) || hmm_match_str.equals( "[." ) || hmm_match_str .equals( ".." ) ) ) { @@ -477,7 +465,7 @@ public final class HmmPfamOutputParser { + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } if ( query_match_str.equals( ".." ) ) { - is_complete_query_match = true; + // is_complete_query_match = true; } else if ( !( query_match_str.equals( ".]" ) || query_match_str.equals( "[." ) || query_match_str .equals( "[]" ) ) ) { @@ -539,8 +527,7 @@ public final class HmmPfamOutputParser { ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToVirusLikeIdCountsMap(), id ); ++_domains_ignored_due_to_virus_like_id; } - else if ( ( getFilterType() == FilterType.NEGATIVE_DOMAIN ) - && getFilter().contains( new DomainId( id ) ) ) { + else if ( ( getFilterType() == FilterType.NEGATIVE_DOMAIN ) && getFilter().contains( id ) ) { ++_domains_ignored_due_to_negative_domain_filter; ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToNegativeDomainFilterCountsMap(), id ); } @@ -608,7 +595,7 @@ public final class HmmPfamOutputParser { _domains_stored = domains_stored; } - private void setDomainsStoredSet( final SortedSet _storeddomains_stored ) { + private void setDomainsStoredSet( final SortedSet _storeddomains_stored ) { _domains_stored_set = _storeddomains_stored; }