// Copyright (C) 2008-2009 Christian M. Zmasek
// Copyright (C) 2008-2009 Burnham Institute for Medical Research
// All rights reserved
// Copyright (C) 2008-2009 Christian M. Zmasek
// Copyright (C) 2008-2009 Burnham Institute for Medical Research
// All rights reserved
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
// Contact: phylosoft @ gmail . com
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
// Contact: phylosoft @ gmail . com
-import org.forester.surfacing.BasicDomain;
-import org.forester.surfacing.BasicProtein;
-import org.forester.surfacing.Domain;
-import org.forester.surfacing.DomainId;
-import org.forester.surfacing.Protein;
-import org.forester.surfacing.SurfacingUtil;
+import org.forester.protein.BasicDomain;
+import org.forester.protein.BasicProtein;
+import org.forester.protein.Domain;
+import org.forester.protein.Protein;
private static final ReturnType RETURN_TYPE_DEFAULT = ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN;
private static final boolean IGNORE_DUFS_DEFAULT = false;
private static final int MAX_ALLOWED_OVERLAP_DEFAULT = -1;
private static final ReturnType RETURN_TYPE_DEFAULT = ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN;
private static final boolean IGNORE_DUFS_DEFAULT = false;
private static final int MAX_ALLOWED_OVERLAP_DEFAULT = -1;
private final FilterType _filter_type;
private final File _input_file;
private final String _species;
private final FilterType _filter_type;
private final File _input_file;
private final String _species;
private double _e_value_maximum;
private Map<String, String> _individual_domain_score_cutoffs;
private boolean _ignore_dufs;
private double _e_value_maximum;
private Map<String, String> _individual_domain_score_cutoffs;
private boolean _ignore_dufs;
private int _domains_ignored_due_to_e_value;
private int _domains_ignored_due_to_individual_score_cutoff;
private int _domains_stored;
private int _domains_ignored_due_to_e_value;
private int _domains_ignored_due_to_individual_score_cutoff;
private int _domains_stored;
private long _time;
private int _domains_ignored_due_to_negative_domain_filter;
private Map<String, Integer> _domains_ignored_due_to_negative_domain_filter_counts_map;
private long _time;
private int _domains_ignored_due_to_negative_domain_filter;
private Map<String, Integer> _domains_ignored_due_to_negative_domain_filter_counts_map;
public HmmPfamOutputParser( final File input_file, final String species, final String model_type ) {
_input_file = input_file;
_species = species;
public HmmPfamOutputParser( final File input_file, final String species, final String model_type ) {
_input_file = input_file;
_species = species;
private void addProtein( final List<Protein> proteins, final Protein current_protein ) {
if ( ( getFilterType() == FilterType.POSITIVE_PROTEIN ) || ( getFilterType() == FilterType.NEGATIVE_PROTEIN ) ) {
private void addProtein( final List<Protein> proteins, final Protein current_protein ) {
if ( ( getFilterType() == FilterType.POSITIVE_PROTEIN ) || ( getFilterType() == FilterType.NEGATIVE_PROTEIN ) ) {
for( final Domain d : current_protein.getProteinDomains() ) {
domain_ids_in_protein.add( d.getDomainId() );
}
for( final Domain d : current_protein.getProteinDomains() ) {
domain_ids_in_protein.add( d.getDomainId() );
}
setDomainsEncountered( 0 );
setProteinsEncountered( 0 );
setProteinsIgnoredDueToFilter( 0 );
setDomainsEncountered( 0 );
setProteinsEncountered( 0 );
setProteinsIgnoredDueToFilter( 0 );
+ getInputFile().getCanonicalPath() + "]" );
}
if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) {
+ getInputFile().getCanonicalPath() + "]" );
}
if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) {
if ( was_not_unique ) {
if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) {
current_protein = new BasicProtein( current_protein.getProteinId() + " "
if ( was_not_unique ) {
if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) {
current_protein = new BasicProtein( current_protein.getProteinId() + " "
if ( ( getMaxAllowedOverlap() != HmmPfamOutputParser.MAX_ALLOWED_OVERLAP_DEFAULT )
|| isIgnoreEngulfedDomains() ) {
final int domains_count = current_protein.getNumberOfProteinDomains();
if ( ( getMaxAllowedOverlap() != HmmPfamOutputParser.MAX_ALLOWED_OVERLAP_DEFAULT )
|| isIgnoreEngulfedDomains() ) {
final int domains_count = current_protein.getNumberOfProteinDomains();
- current_protein = SurfacingUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
- isIgnoreEngulfedDomains(),
- current_protein );
+ current_protein = ForesterUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
+ isIgnoreEngulfedDomains(),
+ current_protein );
final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains();
_domains_stored -= domains_removed;
_domains_ignored_due_to_overlap += domains_removed;
final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains();
_domains_stored -= domains_removed;
_domains_ignored_due_to_overlap += domains_removed;
+ line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
}
if ( query_match_str.equals( ".." ) ) {
+ line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
}
if ( query_match_str.equals( ".." ) ) {
ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToVirusLikeIdCountsMap(), id );
++_domains_ignored_due_to_virus_like_id;
}
ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToVirusLikeIdCountsMap(), id );
++_domains_ignored_due_to_virus_like_id;
}
++_domains_ignored_due_to_negative_domain_filter;
ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToNegativeDomainFilterCountsMap(), id );
}
++_domains_ignored_due_to_negative_domain_filter;
ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToNegativeDomainFilterCountsMap(), id );
}