X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fio%2Fparsers%2FHmmscanPerDomainTableParser.java;h=44950097e8e7f82a37af49d2af73caf9fd9ece0d;hb=a648fae3c8d0402dbdafa379ff3d42bbea96633d;hp=7d31a11649f872508be0eaea7af3ebd3e872804f;hpb=94fc8a7809432b86c41a2ae5f3ab4977cd5d5599;p=jalview.git diff --git a/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java b/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java index 7d31a11..4495009 100644 --- a/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java +++ b/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java @@ -23,7 +23,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers; @@ -41,11 +41,10 @@ import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; -import org.forester.surfacing.BasicDomain; -import org.forester.surfacing.BasicProtein; -import org.forester.surfacing.Domain; -import org.forester.surfacing.DomainId; -import org.forester.surfacing.Protein; +import org.forester.protein.BasicDomain; +import org.forester.protein.BasicProtein; +import org.forester.protein.Domain; +import org.forester.protein.Protein; import org.forester.surfacing.SurfacingUtil; import org.forester.util.ForesterUtil; @@ -64,7 +63,8 @@ public final class HmmscanPerDomainTableParser { private static final ReturnType RETURN_TYPE_DEFAULT = ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN; private static final boolean IGNORE_DUFS_DEFAULT = false; private static final int MAX_ALLOWED_OVERLAP_DEFAULT = -1; - private final Set _filter; + private static final boolean IGNORE_REPLACED_RRMS = false; + private final Set _filter; private final FilterType _filter_type; private final File _input_file; private final String _species; @@ -84,7 +84,7 @@ public final class HmmscanPerDomainTableParser { private int _domains_ignored_due_to_e_value; private int _domains_ignored_due_to_individual_score_cutoff; private int _domains_stored; - private SortedSet _domains_stored_set; + private SortedSet _domains_stored_set; private long _time; private int _domains_ignored_due_to_negative_domain_filter; private Map _domains_ignored_due_to_negative_domain_filter_counts_map; @@ -120,7 +120,7 @@ public final class HmmscanPerDomainTableParser { public HmmscanPerDomainTableParser( final File input_file, final String species, - final Set filter, + final Set filter, final FilterType filter_type, final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to ) { _input_file = input_file; @@ -134,7 +134,7 @@ public final class HmmscanPerDomainTableParser { public HmmscanPerDomainTableParser( final File input_file, final String species, - final Set filter, + final Set filter, final FilterType filter_type, final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to, final boolean allow_proteins_with_same_name ) { @@ -172,7 +172,7 @@ public final class HmmscanPerDomainTableParser { _domains_ignored_due_to_overlap += domains_removed; } if ( ( getFilterType() == FilterType.POSITIVE_PROTEIN ) || ( getFilterType() == FilterType.NEGATIVE_PROTEIN ) ) { - final Set domain_ids_in_protein = new HashSet(); + final Set domain_ids_in_protein = new HashSet(); for( final Domain d : current_protein.getProteinDomains() ) { domain_ids_in_protein.add( d.getDomainId() ); } @@ -239,7 +239,7 @@ public final class HmmscanPerDomainTableParser { return _domains_stored; } - public SortedSet getDomainsStoredSet() { + public SortedSet getDomainsStoredSet() { return _domains_stored_set; } @@ -247,7 +247,7 @@ public final class HmmscanPerDomainTableParser { return _e_value_maximum; } - private Set getFilter() { + private Set getFilter() { return _filter; } @@ -307,7 +307,7 @@ public final class HmmscanPerDomainTableParser { } private void intitCounts() { - setDomainsStoredSet( new TreeSet() ); + setDomainsStoredSet( new TreeSet() ); setDomainsEncountered( 0 ); setProteinsEncountered( 0 ); setProteinsIgnoredDueToFilter( 0 ); @@ -407,7 +407,7 @@ public final class HmmscanPerDomainTableParser { addProtein( proteins, current_protein ); } if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) { - current_protein = new BasicProtein( query, getSpecies() ); + current_protein = new BasicProtein( query, getSpecies(), qlen ); } else { throw new IllegalArgumentException( "unknown return type" ); @@ -447,6 +447,10 @@ public final class HmmscanPerDomainTableParser { else if ( isIgnoreDufs() && uc_id.startsWith( "DUF" ) ) { ++_domains_ignored_due_to_duf; } + else if ( IGNORE_REPLACED_RRMS + && ( uc_id.contains( "RRM_1" ) || uc_id.contains( "RRM_3" ) || uc_id.contains( "RRM_5" ) || uc_id + .contains( "RRM_6" ) ) ) { + } else if ( isIgnoreVirusLikeIds() && ( uc_id.contains( VIR ) || uc_id.contains( PHAGE ) || uc_id.contains( RETRO ) || uc_id.contains( TRANSPOS ) || uc_id.startsWith( RV ) || uc_id.startsWith( GAG ) @@ -454,8 +458,7 @@ public final class HmmscanPerDomainTableParser { ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToVirusLikeIdCountsMap(), target_id ); ++_domains_ignored_due_to_virus_like_id; } - else if ( ( getFilterType() == FilterType.NEGATIVE_DOMAIN ) - && getFilter().contains( new DomainId( target_id ) ) ) { + else if ( ( getFilterType() == FilterType.NEGATIVE_DOMAIN ) && getFilter().contains( target_id ) ) { ++_domains_ignored_due_to_negative_domain_filter; ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToNegativeDomainFilterCountsMap(), target_id ); } @@ -551,7 +554,7 @@ public final class HmmscanPerDomainTableParser { _domains_stored = domains_stored; } - private void setDomainsStoredSet( final SortedSet _storeddomains_stored ) { + private void setDomainsStoredSet( final SortedSet _storeddomains_stored ) { _domains_stored_set = _storeddomains_stored; }