import org.forester.protein.BasicProtein;
import org.forester.protein.Domain;
import org.forester.protein.Protein;
-import org.forester.surfacing.SurfacingUtil;
import org.forester.util.ForesterUtil;
public final class HmmPfamOutputParser {
private final FilterType _filter_type;
private final File _input_file;
private final String _species;
- private final String _model_type;
private double _e_value_maximum;
private Map<String, String> _individual_domain_score_cutoffs;
private boolean _ignore_dufs;
public HmmPfamOutputParser( final File input_file, final String species, final String model_type ) {
_input_file = input_file;
_species = species;
- _model_type = model_type;
_filter = null;
_filter_type = FilterType.NONE;
init();
public HmmPfamOutputParser( final File input_file,
final String species,
- final String model_type,
final Set<String> filter,
final FilterType filter_type ) {
_input_file = input_file;
_species = species;
- _model_type = model_type;
_filter = filter;
_filter_type = filter_type;
init();
return _max_allowed_overlap;
}
- private String getModelType() {
- return _model_type;
- }
-
public int getProteinsEncountered() {
return _proteins_encountered;
}
}
else if ( isVerbose() ) {
ForesterUtil.printWarningMessage( getClass().getName(), "query \"" + query
- + "\" is not unique [line " + line_number + "] in ["
- + getInputFile().getCanonicalPath() + "]" );
+ + "\" is not unique [line " + line_number + "] in ["
+ + getInputFile().getCanonicalPath() + "]" );
}
}
else {
if ( ( getMaxAllowedOverlap() != HmmPfamOutputParser.MAX_ALLOWED_OVERLAP_DEFAULT )
|| isIgnoreEngulfedDomains() ) {
final int domains_count = current_protein.getNumberOfProteinDomains();
- current_protein = SurfacingUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
- isIgnoreEngulfedDomains(),
- current_protein );
+ current_protein = ForesterUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
+ isIgnoreEngulfedDomains(),
+ current_protein );
final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains();
_domains_stored -= domains_removed;
_domains_ignored_due_to_overlap += domains_removed;
int to = -1;
double e_value = -1;
double score = -1;
- boolean is_complete_hmm_match = false;
- boolean is_complete_query_match = false;
try {
from = Integer.valueOf( from_str ).intValue();
}
catch ( final NumberFormatException e ) {
throw new IOException( "could not parse seq-f from \"" + line + "\" [line " + line_number
- + "] in [" + getInputFile().getCanonicalPath() + "]" );
+ + "] in [" + getInputFile().getCanonicalPath() + "]" );
}
try {
to = Integer.valueOf( to_str ).intValue();
}
catch ( final NumberFormatException e ) {
throw new IOException( "could not parse seq-t from \"" + line + "\" [line " + line_number
- + "] in [" + getInputFile().getCanonicalPath() + "]" );
+ + "] in [" + getInputFile().getCanonicalPath() + "]" );
}
try {
score = Double.valueOf( score_str ).doubleValue();
}
catch ( final NumberFormatException e ) {
throw new IOException( "could not parse score from \"" + line + "\" [line " + line_number
- + "] in [" + getInputFile().getCanonicalPath() + "]" );
+ + "] in [" + getInputFile().getCanonicalPath() + "]" );
}
try {
e_value = Double.valueOf( e_value_str ).doubleValue();
}
catch ( final NumberFormatException e ) {
throw new IOException( "could not parse E-value from \"" + line + "\" [line " + line_number
- + "] in [" + getInputFile().getCanonicalPath() + "]" );
+ + "] in [" + getInputFile().getCanonicalPath() + "]" );
}
if ( hmm_match_str.equals( "[]" ) ) {
- is_complete_hmm_match = true;
+ //is_complete_hmm_match = true;
}
else if ( !( hmm_match_str.equals( ".]" ) || hmm_match_str.equals( "[." ) || hmm_match_str
.equals( ".." ) ) ) {
+ line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
}
if ( query_match_str.equals( ".." ) ) {
- is_complete_query_match = true;
+ // is_complete_query_match = true;
}
else if ( !( query_match_str.equals( ".]" ) || query_match_str.equals( "[." ) || query_match_str
.equals( "[]" ) ) ) {
}
catch ( final NumberFormatException e ) {
throw new IOException( "could not parse domain number from \"" + line + "\" [line " + line_number
- + "] in [" + getInputFile().getCanonicalPath() + "]" );
+ + "] in [" + getInputFile().getCanonicalPath() + "]" );
}
try {
total = Integer.valueOf( ( total_str ) ).intValue();
}
catch ( final NumberFormatException e ) {
throw new IOException( "could not parse domain count from \"" + line + "\" [line " + line_number
- + "] in [" + getInputFile().getCanonicalPath() + "]" );
+ + "] in [" + getInputFile().getCanonicalPath() + "]" );
}
++_domains_encountered;
boolean failed_cutoff = false;
}
else {
throw new IOException( "could not find a score cutoff value for domain id \"" + id
- + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
+ + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
}
}
final String uc_id = id.toUpperCase();
/**
* To ignore domains which are completely engulfed by domains (individual
* ones or stretches of overlapping ones) with better support values.
- *
- *
+ *
+ *
* @param ignored_engulfed_domains
*/
public void setIgnoreEngulfedDomains( final boolean ignore_engulfed_domains ) {
/**
* Sets the individual domain score cutoff values (for example, gathering
* thresholds from Pfam). Domain ids are the keys, cutoffs the values.
- *
+ *
* @param individual_domain_score_cutoffs
*/
public void setIndividualDomainScoreCutoffs( final Map<String, String> individual_domain_score_cutoffs ) {