import org.forester.protein.BasicDomain;
import org.forester.protein.BasicProtein;
import org.forester.protein.Domain;
import org.forester.protein.BasicDomain;
import org.forester.protein.BasicProtein;
import org.forester.protein.Domain;
private static final ReturnType RETURN_TYPE_DEFAULT = ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN;
private static final boolean IGNORE_DUFS_DEFAULT = false;
private static final int MAX_ALLOWED_OVERLAP_DEFAULT = -1;
private static final ReturnType RETURN_TYPE_DEFAULT = ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN;
private static final boolean IGNORE_DUFS_DEFAULT = false;
private static final int MAX_ALLOWED_OVERLAP_DEFAULT = -1;
private final FilterType _filter_type;
private final File _input_file;
private final String _species;
private final FilterType _filter_type;
private final File _input_file;
private final String _species;
private double _e_value_maximum;
private Map<String, String> _individual_domain_score_cutoffs;
private boolean _ignore_dufs;
private double _e_value_maximum;
private Map<String, String> _individual_domain_score_cutoffs;
private boolean _ignore_dufs;
private int _domains_ignored_due_to_e_value;
private int _domains_ignored_due_to_individual_score_cutoff;
private int _domains_stored;
private int _domains_ignored_due_to_e_value;
private int _domains_ignored_due_to_individual_score_cutoff;
private int _domains_stored;
private long _time;
private int _domains_ignored_due_to_negative_domain_filter;
private Map<String, Integer> _domains_ignored_due_to_negative_domain_filter_counts_map;
private long _time;
private int _domains_ignored_due_to_negative_domain_filter;
private Map<String, Integer> _domains_ignored_due_to_negative_domain_filter_counts_map;
public HmmPfamOutputParser( final File input_file, final String species, final String model_type ) {
_input_file = input_file;
_species = species;
public HmmPfamOutputParser( final File input_file, final String species, final String model_type ) {
_input_file = input_file;
_species = species;
private void addProtein( final List<Protein> proteins, final Protein current_protein ) {
if ( ( getFilterType() == FilterType.POSITIVE_PROTEIN ) || ( getFilterType() == FilterType.NEGATIVE_PROTEIN ) ) {
private void addProtein( final List<Protein> proteins, final Protein current_protein ) {
if ( ( getFilterType() == FilterType.POSITIVE_PROTEIN ) || ( getFilterType() == FilterType.NEGATIVE_PROTEIN ) ) {
for( final Domain d : current_protein.getProteinDomains() ) {
domain_ids_in_protein.add( d.getDomainId() );
}
for( final Domain d : current_protein.getProteinDomains() ) {
domain_ids_in_protein.add( d.getDomainId() );
}
setDomainsEncountered( 0 );
setProteinsEncountered( 0 );
setProteinsIgnoredDueToFilter( 0 );
setDomainsEncountered( 0 );
setProteinsEncountered( 0 );
setProteinsIgnoredDueToFilter( 0 );
}
else if ( isVerbose() ) {
ForesterUtil.printWarningMessage( getClass().getName(), "query \"" + query
}
else if ( isVerbose() ) {
ForesterUtil.printWarningMessage( getClass().getName(), "query \"" + query
- + "\" is not unique [line " + line_number + "] in ["
- + getInputFile().getCanonicalPath() + "]" );
+ + "\" is not unique [line " + line_number + "] in ["
+ + getInputFile().getCanonicalPath() + "]" );
if ( ( getMaxAllowedOverlap() != HmmPfamOutputParser.MAX_ALLOWED_OVERLAP_DEFAULT )
|| isIgnoreEngulfedDomains() ) {
final int domains_count = current_protein.getNumberOfProteinDomains();
if ( ( getMaxAllowedOverlap() != HmmPfamOutputParser.MAX_ALLOWED_OVERLAP_DEFAULT )
|| isIgnoreEngulfedDomains() ) {
final int domains_count = current_protein.getNumberOfProteinDomains();
- current_protein = SurfacingUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
- isIgnoreEngulfedDomains(),
- current_protein );
+ current_protein = ForesterUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
+ isIgnoreEngulfedDomains(),
+ current_protein );
final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains();
_domains_stored -= domains_removed;
_domains_ignored_due_to_overlap += domains_removed;
final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains();
_domains_stored -= domains_removed;
_domains_ignored_due_to_overlap += domains_removed;
try {
from = Integer.valueOf( from_str ).intValue();
}
catch ( final NumberFormatException e ) {
throw new IOException( "could not parse seq-f from \"" + line + "\" [line " + line_number
try {
from = Integer.valueOf( from_str ).intValue();
}
catch ( final NumberFormatException e ) {
throw new IOException( "could not parse seq-f from \"" + line + "\" [line " + line_number
}
try {
to = Integer.valueOf( to_str ).intValue();
}
catch ( final NumberFormatException e ) {
throw new IOException( "could not parse seq-t from \"" + line + "\" [line " + line_number
}
try {
to = Integer.valueOf( to_str ).intValue();
}
catch ( final NumberFormatException e ) {
throw new IOException( "could not parse seq-t from \"" + line + "\" [line " + line_number
}
try {
score = Double.valueOf( score_str ).doubleValue();
}
catch ( final NumberFormatException e ) {
throw new IOException( "could not parse score from \"" + line + "\" [line " + line_number
}
try {
score = Double.valueOf( score_str ).doubleValue();
}
catch ( final NumberFormatException e ) {
throw new IOException( "could not parse score from \"" + line + "\" [line " + line_number
}
try {
e_value = Double.valueOf( e_value_str ).doubleValue();
}
catch ( final NumberFormatException e ) {
throw new IOException( "could not parse E-value from \"" + line + "\" [line " + line_number
}
try {
e_value = Double.valueOf( e_value_str ).doubleValue();
}
catch ( final NumberFormatException e ) {
throw new IOException( "could not parse E-value from \"" + line + "\" [line " + line_number
+ line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
}
if ( query_match_str.equals( ".." ) ) {
+ line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
}
if ( query_match_str.equals( ".." ) ) {
}
catch ( final NumberFormatException e ) {
throw new IOException( "could not parse domain number from \"" + line + "\" [line " + line_number
}
catch ( final NumberFormatException e ) {
throw new IOException( "could not parse domain number from \"" + line + "\" [line " + line_number
}
try {
total = Integer.valueOf( ( total_str ) ).intValue();
}
catch ( final NumberFormatException e ) {
throw new IOException( "could not parse domain count from \"" + line + "\" [line " + line_number
}
try {
total = Integer.valueOf( ( total_str ) ).intValue();
}
catch ( final NumberFormatException e ) {
throw new IOException( "could not parse domain count from \"" + line + "\" [line " + line_number
ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToVirusLikeIdCountsMap(), id );
++_domains_ignored_due_to_virus_like_id;
}
ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToVirusLikeIdCountsMap(), id );
++_domains_ignored_due_to_virus_like_id;
}
++_domains_ignored_due_to_negative_domain_filter;
ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToNegativeDomainFilterCountsMap(), id );
}
++_domains_ignored_due_to_negative_domain_filter;
ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToNegativeDomainFilterCountsMap(), id );
}
/**
* To ignore domains which are completely engulfed by domains (individual
* ones or stretches of overlapping ones) with better support values.
/**
* To ignore domains which are completely engulfed by domains (individual
* ones or stretches of overlapping ones) with better support values.
* @param ignored_engulfed_domains
*/
public void setIgnoreEngulfedDomains( final boolean ignore_engulfed_domains ) {
* @param ignored_engulfed_domains
*/
public void setIgnoreEngulfedDomains( final boolean ignore_engulfed_domains ) {
/**
* Sets the individual domain score cutoff values (for example, gathering
* thresholds from Pfam). Domain ids are the keys, cutoffs the values.
/**
* Sets the individual domain score cutoff values (for example, gathering
* thresholds from Pfam). Domain ids are the keys, cutoffs the values.
* @param individual_domain_score_cutoffs
*/
public void setIndividualDomainScoreCutoffs( final Map<String, String> individual_domain_score_cutoffs ) {
* @param individual_domain_score_cutoffs
*/
public void setIndividualDomainScoreCutoffs( final Map<String, String> individual_domain_score_cutoffs ) {