import org.forester.protein.BasicDomain;
import org.forester.protein.BasicProtein;
import org.forester.protein.Domain;
-import org.forester.protein.DomainId;
import org.forester.protein.Protein;
-import org.forester.surfacing.SurfacingUtil;
import org.forester.util.ForesterUtil;
public final class HmmPfamOutputParser {
private static final ReturnType RETURN_TYPE_DEFAULT = ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN;
private static final boolean IGNORE_DUFS_DEFAULT = false;
private static final int MAX_ALLOWED_OVERLAP_DEFAULT = -1;
- private final Set<DomainId> _filter;
+ private final Set<String> _filter;
private final FilterType _filter_type;
private final File _input_file;
private final String _species;
- private final String _model_type;
private double _e_value_maximum;
private Map<String, String> _individual_domain_score_cutoffs;
private boolean _ignore_dufs;
private int _domains_ignored_due_to_e_value;
private int _domains_ignored_due_to_individual_score_cutoff;
private int _domains_stored;
- private SortedSet<DomainId> _domains_stored_set;
+ private SortedSet<String> _domains_stored_set;
private long _time;
private int _domains_ignored_due_to_negative_domain_filter;
private Map<String, Integer> _domains_ignored_due_to_negative_domain_filter_counts_map;
public HmmPfamOutputParser( final File input_file, final String species, final String model_type ) {
_input_file = input_file;
_species = species;
- _model_type = model_type;
_filter = null;
_filter_type = FilterType.NONE;
init();
public HmmPfamOutputParser( final File input_file,
final String species,
- final String model_type,
- final Set<DomainId> filter,
+ final Set<String> filter,
final FilterType filter_type ) {
_input_file = input_file;
_species = species;
- _model_type = model_type;
_filter = filter;
_filter_type = filter_type;
init();
private void addProtein( final List<Protein> proteins, final Protein current_protein ) {
if ( ( getFilterType() == FilterType.POSITIVE_PROTEIN ) || ( getFilterType() == FilterType.NEGATIVE_PROTEIN ) ) {
- final Set<DomainId> domain_ids_in_protein = new HashSet<DomainId>();
+ final Set<String> domain_ids_in_protein = new HashSet<String>();
for( final Domain d : current_protein.getProteinDomains() ) {
domain_ids_in_protein.add( d.getDomainId() );
}
return _domains_stored;
}
- public SortedSet<DomainId> getDomainsStoredSet() {
+ public SortedSet<String> getDomainsStoredSet() {
return _domains_stored_set;
}
return _e_value_maximum;
}
- private Set<DomainId> getFilter() {
+ private Set<String> getFilter() {
return _filter;
}
return _max_allowed_overlap;
}
- private String getModelType() {
- return _model_type;
- }
-
public int getProteinsEncountered() {
return _proteins_encountered;
}
}
private void intitCounts() {
- setDomainsStoredSet( new TreeSet<DomainId>() );
+ setDomainsStoredSet( new TreeSet<String>() );
setDomainsEncountered( 0 );
setProteinsEncountered( 0 );
setProteinsIgnoredDueToFilter( 0 );
if ( ( getMaxAllowedOverlap() != HmmPfamOutputParser.MAX_ALLOWED_OVERLAP_DEFAULT )
|| isIgnoreEngulfedDomains() ) {
final int domains_count = current_protein.getNumberOfProteinDomains();
- current_protein = SurfacingUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
- isIgnoreEngulfedDomains(),
- current_protein );
+ current_protein = ForesterUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
+ isIgnoreEngulfedDomains(),
+ current_protein );
final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains();
_domains_stored -= domains_removed;
_domains_ignored_due_to_overlap += domains_removed;
int to = -1;
double e_value = -1;
double score = -1;
- boolean is_complete_hmm_match = false;
- boolean is_complete_query_match = false;
try {
from = Integer.valueOf( from_str ).intValue();
}
+ "] in [" + getInputFile().getCanonicalPath() + "]" );
}
if ( hmm_match_str.equals( "[]" ) ) {
- is_complete_hmm_match = true;
+ //is_complete_hmm_match = true;
}
else if ( !( hmm_match_str.equals( ".]" ) || hmm_match_str.equals( "[." ) || hmm_match_str
.equals( ".." ) ) ) {
+ line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
}
if ( query_match_str.equals( ".." ) ) {
- is_complete_query_match = true;
+ // is_complete_query_match = true;
}
else if ( !( query_match_str.equals( ".]" ) || query_match_str.equals( "[." ) || query_match_str
.equals( "[]" ) ) ) {
ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToVirusLikeIdCountsMap(), id );
++_domains_ignored_due_to_virus_like_id;
}
- else if ( ( getFilterType() == FilterType.NEGATIVE_DOMAIN )
- && getFilter().contains( new DomainId( id ) ) ) {
+ else if ( ( getFilterType() == FilterType.NEGATIVE_DOMAIN ) && getFilter().contains( id ) ) {
++_domains_ignored_due_to_negative_domain_filter;
ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToNegativeDomainFilterCountsMap(), id );
}
_domains_stored = domains_stored;
}
- private void setDomainsStoredSet( final SortedSet<DomainId> _storeddomains_stored ) {
+ private void setDomainsStoredSet( final SortedSet<String> _storeddomains_stored ) {
_domains_stored_set = _storeddomains_stored;
}