X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fio%2Fparsers%2FHmmPfamOutputParser.java;h=96729a43c7d1c17dc1ee8056c53d8606430e6fc5;hb=665e671efec73fcb36a9aac45f119330f290fa81;hp=090a05263ba37f01a0965698d476153bfe3733e1;hpb=656be28debec520e0e35a8b311114398a40ea366;p=jalview.git diff --git a/forester/java/src/org/forester/io/parsers/HmmPfamOutputParser.java b/forester/java/src/org/forester/io/parsers/HmmPfamOutputParser.java index 090a052..96729a4 100644 --- a/forester/java/src/org/forester/io/parsers/HmmPfamOutputParser.java +++ b/forester/java/src/org/forester/io/parsers/HmmPfamOutputParser.java @@ -43,9 +43,7 @@ import java.util.TreeSet; import org.forester.protein.BasicDomain; import org.forester.protein.BasicProtein; import org.forester.protein.Domain; -import org.forester.protein.DomainId; import org.forester.protein.Protein; -import org.forester.surfacing.SurfacingUtil; import org.forester.util.ForesterUtil; public final class HmmPfamOutputParser { @@ -62,11 +60,10 @@ public final class HmmPfamOutputParser { private static final ReturnType RETURN_TYPE_DEFAULT = ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN; private static final boolean IGNORE_DUFS_DEFAULT = false; private static final int MAX_ALLOWED_OVERLAP_DEFAULT = -1; - private final Set _filter; + private final Set _filter; private final FilterType _filter_type; private final File _input_file; private final String _species; - private final String _model_type; private double _e_value_maximum; private Map _individual_domain_score_cutoffs; private boolean _ignore_dufs; @@ -85,7 +82,7 @@ public final class HmmPfamOutputParser { private int _domains_ignored_due_to_e_value; private int _domains_ignored_due_to_individual_score_cutoff; private int _domains_stored; - private SortedSet _domains_stored_set; + private SortedSet _domains_stored_set; private long _time; private int _domains_ignored_due_to_negative_domain_filter; private Map _domains_ignored_due_to_negative_domain_filter_counts_map; @@ -95,7 +92,6 @@ public final class HmmPfamOutputParser { public HmmPfamOutputParser( final File input_file, final String species, final String model_type ) { _input_file = input_file; _species = species; - _model_type = model_type; _filter = null; _filter_type = FilterType.NONE; init(); @@ -103,12 +99,10 @@ public final class HmmPfamOutputParser { public HmmPfamOutputParser( final File input_file, final String species, - final String model_type, - final Set filter, + final Set filter, final FilterType filter_type ) { _input_file = input_file; _species = species; - _model_type = model_type; _filter = filter; _filter_type = filter_type; init(); @@ -125,7 +119,7 @@ public final class HmmPfamOutputParser { private void addProtein( final List proteins, final Protein current_protein ) { if ( ( getFilterType() == FilterType.POSITIVE_PROTEIN ) || ( getFilterType() == FilterType.NEGATIVE_PROTEIN ) ) { - final Set domain_ids_in_protein = new HashSet(); + final Set domain_ids_in_protein = new HashSet(); for( final Domain d : current_protein.getProteinDomains() ) { domain_ids_in_protein.add( d.getDomainId() ); } @@ -192,7 +186,7 @@ public final class HmmPfamOutputParser { return _domains_stored; } - public SortedSet getDomainsStoredSet() { + public SortedSet getDomainsStoredSet() { return _domains_stored_set; } @@ -200,7 +194,7 @@ public final class HmmPfamOutputParser { return _e_value_maximum; } - private Set getFilter() { + private Set getFilter() { return _filter; } @@ -220,10 +214,6 @@ public final class HmmPfamOutputParser { return _max_allowed_overlap; } - private String getModelType() { - return _model_type; - } - public int getProteinsEncountered() { return _proteins_encountered; } @@ -262,7 +252,7 @@ public final class HmmPfamOutputParser { } private void intitCounts() { - setDomainsStoredSet( new TreeSet() ); + setDomainsStoredSet( new TreeSet() ); setDomainsEncountered( 0 ); setProteinsEncountered( 0 ); setProteinsIgnoredDueToFilter( 0 ); @@ -343,8 +333,8 @@ public final class HmmPfamOutputParser { } else if ( isVerbose() ) { ForesterUtil.printWarningMessage( getClass().getName(), "query \"" + query - + "\" is not unique [line " + line_number + "] in [" - + getInputFile().getCanonicalPath() + "]" ); + + "\" is not unique [line " + line_number + "] in [" + + getInputFile().getCanonicalPath() + "]" ); } } else { @@ -409,9 +399,9 @@ public final class HmmPfamOutputParser { if ( ( getMaxAllowedOverlap() != HmmPfamOutputParser.MAX_ALLOWED_OVERLAP_DEFAULT ) || isIgnoreEngulfedDomains() ) { final int domains_count = current_protein.getNumberOfProteinDomains(); - current_protein = SurfacingUtil.removeOverlappingDomains( getMaxAllowedOverlap(), - isIgnoreEngulfedDomains(), - current_protein ); + current_protein = ForesterUtil.removeOverlappingDomains( getMaxAllowedOverlap(), + isIgnoreEngulfedDomains(), + current_protein ); final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains(); _domains_stored -= domains_removed; _domains_ignored_due_to_overlap += domains_removed; @@ -438,38 +428,36 @@ public final class HmmPfamOutputParser { int to = -1; double e_value = -1; double score = -1; - boolean is_complete_hmm_match = false; - boolean is_complete_query_match = false; try { from = Integer.valueOf( from_str ).intValue(); } catch ( final NumberFormatException e ) { throw new IOException( "could not parse seq-f from \"" + line + "\" [line " + line_number - + "] in [" + getInputFile().getCanonicalPath() + "]" ); + + "] in [" + getInputFile().getCanonicalPath() + "]" ); } try { to = Integer.valueOf( to_str ).intValue(); } catch ( final NumberFormatException e ) { throw new IOException( "could not parse seq-t from \"" + line + "\" [line " + line_number - + "] in [" + getInputFile().getCanonicalPath() + "]" ); + + "] in [" + getInputFile().getCanonicalPath() + "]" ); } try { score = Double.valueOf( score_str ).doubleValue(); } catch ( final NumberFormatException e ) { throw new IOException( "could not parse score from \"" + line + "\" [line " + line_number - + "] in [" + getInputFile().getCanonicalPath() + "]" ); + + "] in [" + getInputFile().getCanonicalPath() + "]" ); } try { e_value = Double.valueOf( e_value_str ).doubleValue(); } catch ( final NumberFormatException e ) { throw new IOException( "could not parse E-value from \"" + line + "\" [line " + line_number - + "] in [" + getInputFile().getCanonicalPath() + "]" ); + + "] in [" + getInputFile().getCanonicalPath() + "]" ); } if ( hmm_match_str.equals( "[]" ) ) { - is_complete_hmm_match = true; + //is_complete_hmm_match = true; } else if ( !( hmm_match_str.equals( ".]" ) || hmm_match_str.equals( "[." ) || hmm_match_str .equals( ".." ) ) ) { @@ -477,7 +465,7 @@ public final class HmmPfamOutputParser { + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } if ( query_match_str.equals( ".." ) ) { - is_complete_query_match = true; + // is_complete_query_match = true; } else if ( !( query_match_str.equals( ".]" ) || query_match_str.equals( "[." ) || query_match_str .equals( "[]" ) ) ) { @@ -498,14 +486,14 @@ public final class HmmPfamOutputParser { } catch ( final NumberFormatException e ) { throw new IOException( "could not parse domain number from \"" + line + "\" [line " + line_number - + "] in [" + getInputFile().getCanonicalPath() + "]" ); + + "] in [" + getInputFile().getCanonicalPath() + "]" ); } try { total = Integer.valueOf( ( total_str ) ).intValue(); } catch ( final NumberFormatException e ) { throw new IOException( "could not parse domain count from \"" + line + "\" [line " + line_number - + "] in [" + getInputFile().getCanonicalPath() + "]" ); + + "] in [" + getInputFile().getCanonicalPath() + "]" ); } ++_domains_encountered; boolean failed_cutoff = false; @@ -518,7 +506,7 @@ public final class HmmPfamOutputParser { } else { throw new IOException( "could not find a score cutoff value for domain id \"" + id - + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); + + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" ); } } final String uc_id = id.toUpperCase(); @@ -539,8 +527,7 @@ public final class HmmPfamOutputParser { ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToVirusLikeIdCountsMap(), id ); ++_domains_ignored_due_to_virus_like_id; } - else if ( ( getFilterType() == FilterType.NEGATIVE_DOMAIN ) - && getFilter().contains( new DomainId( id ) ) ) { + else if ( ( getFilterType() == FilterType.NEGATIVE_DOMAIN ) && getFilter().contains( id ) ) { ++_domains_ignored_due_to_negative_domain_filter; ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToNegativeDomainFilterCountsMap(), id ); } @@ -608,7 +595,7 @@ public final class HmmPfamOutputParser { _domains_stored = domains_stored; } - private void setDomainsStoredSet( final SortedSet _storeddomains_stored ) { + private void setDomainsStoredSet( final SortedSet _storeddomains_stored ) { _domains_stored_set = _storeddomains_stored; } @@ -626,8 +613,8 @@ public final class HmmPfamOutputParser { /** * To ignore domains which are completely engulfed by domains (individual * ones or stretches of overlapping ones) with better support values. - * - * + * + * * @param ignored_engulfed_domains */ public void setIgnoreEngulfedDomains( final boolean ignore_engulfed_domains ) { @@ -641,7 +628,7 @@ public final class HmmPfamOutputParser { /** * Sets the individual domain score cutoff values (for example, gathering * thresholds from Pfam). Domain ids are the keys, cutoffs the values. - * + * * @param individual_domain_score_cutoffs */ public void setIndividualDomainScoreCutoffs( final Map individual_domain_score_cutoffs ) {