X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fio%2Fparsers%2FHmmscanPerDomainTableParser.java;h=f063866e2e44d17870f873085c4ca91f659eabeb;hb=64731196184662d30d794bc339a5ecd567cd5e86;hp=44950097e8e7f82a37af49d2af73caf9fd9ece0d;hpb=87d34f343d0262cd0c009c6f1da058a5a217bc64;p=jalview.git diff --git a/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java b/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java index 4495009..f063866 100644 --- a/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java +++ b/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java @@ -45,7 +45,6 @@ import org.forester.protein.BasicDomain; import org.forester.protein.BasicProtein; import org.forester.protein.Domain; import org.forester.protein.Protein; -import org.forester.surfacing.SurfacingUtil; import org.forester.util.ForesterUtil; public final class HmmscanPerDomainTableParser { @@ -64,11 +63,13 @@ public final class HmmscanPerDomainTableParser { private static final boolean IGNORE_DUFS_DEFAULT = false; private static final int MAX_ALLOWED_OVERLAP_DEFAULT = -1; private static final boolean IGNORE_REPLACED_RRMS = false; + private static final boolean IGNORE_hGDE_amylase = true; //TODO eventually remove me, added 10/22/13 private final Set _filter; private final FilterType _filter_type; private final File _input_file; private final String _species; - private double _e_value_maximum; + private double _fs_e_value_maximum; + private double _i_e_value_maximum; private Map _individual_score_cutoffs; private boolean _ignore_dufs; private boolean _ignore_virus_like_ids; @@ -81,7 +82,8 @@ public final class HmmscanPerDomainTableParser { private int _domains_encountered; private int _domains_ignored_due_to_duf; private int _domains_ignored_due_to_overlap; - private int _domains_ignored_due_to_e_value; + private int _domains_ignored_due_to_fs_e_value; + private int _domains_ignored_due_to_i_e_value; private int _domains_ignored_due_to_individual_score_cutoff; private int _domains_stored; private SortedSet _domains_stored_set; @@ -164,9 +166,9 @@ public final class HmmscanPerDomainTableParser { if ( ( getMaxAllowedOverlap() != HmmscanPerDomainTableParser.MAX_ALLOWED_OVERLAP_DEFAULT ) || isIgnoreEngulfedDomains() ) { final int domains_count = current_protein.getNumberOfProteinDomains(); - current_protein = SurfacingUtil.removeOverlappingDomains( getMaxAllowedOverlap(), - isIgnoreEngulfedDomains(), - current_protein ); + current_protein = ForesterUtil.removeOverlappingDomains( getMaxAllowedOverlap(), + isIgnoreEngulfedDomains(), + current_protein ); final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains(); _domains_stored -= domains_removed; _domains_ignored_due_to_overlap += domains_removed; @@ -207,8 +209,12 @@ public final class HmmscanPerDomainTableParser { return _domains_ignored_due_to_duf; } - public int getDomainsIgnoredDueToEval() { - return _domains_ignored_due_to_e_value; + public int getDomainsIgnoredDueToIEval() { + return _domains_ignored_due_to_i_e_value; + } + + public int getDomainsIgnoredDueToFsEval() { + return _domains_ignored_due_to_fs_e_value; } public int getDomainsIgnoredDueToIndividualScoreCutoff() { @@ -243,8 +249,12 @@ public final class HmmscanPerDomainTableParser { return _domains_stored_set; } - private double getEValueMaximum() { - return _e_value_maximum; + private double getFsEValueMaximum() { + return _fs_e_value_maximum; + } + + private double getIEValueMaximum() { + return _i_e_value_maximum; } private Set getFilter() { @@ -296,7 +306,8 @@ public final class HmmscanPerDomainTableParser { } private void init() { - _e_value_maximum = HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT; + _fs_e_value_maximum = HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT; + _i_e_value_maximum = HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT; setIgnoreDufs( HmmscanPerDomainTableParser.IGNORE_DUFS_DEFAULT ); setReturnType( HmmscanPerDomainTableParser.RETURN_TYPE_DEFAULT ); _max_allowed_overlap = HmmscanPerDomainTableParser.MAX_ALLOWED_OVERLAP_DEFAULT; @@ -313,7 +324,8 @@ public final class HmmscanPerDomainTableParser { setProteinsIgnoredDueToFilter( 0 ); setDomainsIgnoredDueToNegativeFilter( 0 ); setDomainsIgnoredDueToDuf( 0 ); - setDomainsIgnoredDueToEval( 0 ); + setDomainsIgnoredDueToFsEval( 0 ); + setDomainsIgnoredDueToIEval( 0 ); setDomainsIgnoredDueToIndividualScoreCutoff( 0 ); setDomainsIgnoredDueToVirusLikeId( 0 ); setDomainsIgnoredDueToOverlap( 0 ); @@ -440,9 +452,13 @@ public final class HmmscanPerDomainTableParser { else if ( ali_from == ali_to ) { //Ignore } - else if ( ( getEValueMaximum() != HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT ) - && ( fs_e_value > getEValueMaximum() ) ) { - ++_domains_ignored_due_to_e_value; + else if ( ( getFsEValueMaximum() != HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT ) + && ( fs_e_value > getFsEValueMaximum() ) ) { + ++_domains_ignored_due_to_fs_e_value; + } + else if ( ( getIEValueMaximum() != HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT ) + && ( i_e_value > getIEValueMaximum() ) ) { + ++_domains_ignored_due_to_i_e_value; } else if ( isIgnoreDufs() && uc_id.startsWith( "DUF" ) ) { ++_domains_ignored_due_to_duf; @@ -451,6 +467,8 @@ public final class HmmscanPerDomainTableParser { && ( uc_id.contains( "RRM_1" ) || uc_id.contains( "RRM_3" ) || uc_id.contains( "RRM_5" ) || uc_id .contains( "RRM_6" ) ) ) { } + else if ( IGNORE_hGDE_amylase && ( uc_id.equals( "hGDE_amylase" ) ) ) { + } else if ( isIgnoreVirusLikeIds() && ( uc_id.contains( VIR ) || uc_id.contains( PHAGE ) || uc_id.contains( RETRO ) || uc_id.contains( TRANSPOS ) || uc_id.startsWith( RV ) || uc_id.startsWith( GAG ) @@ -469,8 +487,6 @@ public final class HmmscanPerDomainTableParser { ali_to, ( short ) domain_number, ( short ) total_domains, - fs_e_value, - fs_score, i_e_value, domain_score ); current_protein.addProteinDomain( pd ); @@ -522,8 +538,12 @@ public final class HmmscanPerDomainTableParser { _domains_ignored_due_to_duf = domains_ignored_due_to_duf; } - private void setDomainsIgnoredDueToEval( final int domains_ignored_due_to_e_value ) { - _domains_ignored_due_to_e_value = domains_ignored_due_to_e_value; + private void setDomainsIgnoredDueToFsEval( final int domains_ignored_due_to_fs_e_value ) { + _domains_ignored_due_to_fs_e_value = domains_ignored_due_to_fs_e_value; + } + + private void setDomainsIgnoredDueToIEval( final int domains_ignored_due_to_i_e_value ) { + _domains_ignored_due_to_i_e_value = domains_ignored_due_to_i_e_value; } private void setDomainsIgnoredDueToIndividualScoreCutoff( final int domains_ignored_due_to_individual_score_cutoff ) { @@ -558,11 +578,18 @@ public final class HmmscanPerDomainTableParser { _domains_stored_set = _storeddomains_stored; } - public void setEValueMaximum( final double e_value_maximum ) { - if ( e_value_maximum < 0.0 ) { + public void setFsEValueMaximum( final double fs_e_value_maximum ) { + if ( fs_e_value_maximum < 0.0 ) { + throw new IllegalArgumentException( "attempt to set the maximum E-value to a negative value" ); + } + _fs_e_value_maximum = fs_e_value_maximum; + } + + public void setIEValueMaximum( final double i_e_value_maximum ) { + if ( i_e_value_maximum < 0.0 ) { throw new IllegalArgumentException( "attempt to set the maximum E-value to a negative value" ); } - _e_value_maximum = e_value_maximum; + _i_e_value_maximum = i_e_value_maximum; } public void setIgnoreDufs( final boolean ignore_dufs ) {