clean up
[jalview.git] / forester / java / src / org / forester / io / parsers / HmmPfamOutputParser.java
1 // $Id:
2 //
3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
5 //
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
8 // All rights reserved
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 //
24 // Contact: phylosoft @ gmail . com
25 // WWW: www.phylosoft.org/forester
26
27 package org.forester.io.parsers;
28
29 import java.io.BufferedReader;
30 import java.io.File;
31 import java.io.FileReader;
32 import java.io.IOException;
33 import java.util.ArrayList;
34 import java.util.Date;
35 import java.util.HashSet;
36 import java.util.List;
37 import java.util.Map;
38 import java.util.Set;
39 import java.util.SortedSet;
40 import java.util.TreeMap;
41 import java.util.TreeSet;
42
43 import org.forester.surfacing.BasicDomain;
44 import org.forester.surfacing.BasicProtein;
45 import org.forester.surfacing.Domain;
46 import org.forester.surfacing.DomainId;
47 import org.forester.surfacing.Protein;
48 import org.forester.surfacing.SurfacingUtil;
49 import org.forester.util.ForesterUtil;
50
51 public final class HmmPfamOutputParser {
52
53     private static final String     RETRO                       = "RETRO";
54     private static final String     PHAGE                       = "PHAGE";
55     private static final String     VIR                         = "VIR";
56     private static final String     TRANSPOS                    = "TRANSPOS";
57     private static final String     RV                          = "RV";
58     private static final String     GAG                         = "GAG_";
59     private static final String     HCV                         = "HCV_";                                                    // New. Added on Jun 11, after 1st submission.
60     private static final String     HERPES                      = "Herpes_";                                                 // New. Added on Jun 11, after 1st submission.
61     private static final int        E_VALUE_MAXIMUM_DEFAULT     = -1;
62     private static final ReturnType RETURN_TYPE_DEFAULT         = ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN;
63     private static final boolean    IGNORE_DUFS_DEFAULT         = false;
64     private static final int        MAX_ALLOWED_OVERLAP_DEFAULT = -1;
65     private final Set<DomainId>     _filter;
66     private final FilterType        _filter_type;
67     private final File              _input_file;
68     private final String            _species;
69     private final String            _model_type;
70     private double                  _e_value_maximum;
71     private Map<String, String>     _individual_domain_score_cutoffs;
72     private boolean                 _ignore_dufs;
73     private boolean                 _ignore_virus_like_ids;
74     private boolean                 _allow_non_unique_query;
75     private boolean                 _verbose;
76     private int                     _max_allowed_overlap;
77     private boolean                 _ignore_engulfed_domains;
78     private ReturnType              _return_type;
79     private int                     _proteins_encountered;
80     private int                     _proteins_ignored_due_to_filter;
81     private int                     _proteins_stored;
82     private int                     _domains_encountered;
83     private int                     _domains_ignored_due_to_duf;
84     private int                     _domains_ignored_due_to_overlap;
85     private int                     _domains_ignored_due_to_e_value;
86     private int                     _domains_ignored_due_to_individual_score_cutoff;
87     private int                     _domains_stored;
88     private SortedSet<DomainId>     _domains_stored_set;
89     private long                    _time;
90     private int                     _domains_ignored_due_to_negative_domain_filter;
91     private Map<String, Integer>    _domains_ignored_due_to_negative_domain_filter_counts_map;
92     private int                     _domains_ignored_due_to_virus_like_id;
93     private Map<String, Integer>    _domains_ignored_due_to_virus_like_id_counts_map;
94
95     public HmmPfamOutputParser( final File input_file, final String species, final String model_type ) {
96         _input_file = input_file;
97         _species = species;
98         _model_type = model_type;
99         _filter = null;
100         _filter_type = FilterType.NONE;
101         init();
102     }
103
104     public HmmPfamOutputParser( final File input_file,
105                                 final String species,
106                                 final String model_type,
107                                 final Set<DomainId> filter,
108                                 final FilterType filter_type ) {
109         _input_file = input_file;
110         _species = species;
111         _model_type = model_type;
112         _filter = filter;
113         _filter_type = filter_type;
114         init();
115     }
116
117     private void actuallyAddProtein( final List<Protein> proteins, final Protein current_protein ) {
118         final List<Domain> l = current_protein.getProteinDomains();
119         for( final Domain d : l ) {
120             getDomainsStoredSet().add( d.getDomainId() );
121         }
122         proteins.add( current_protein );
123         ++_proteins_stored;
124     }
125
126     private void addProtein( final List<Protein> proteins, final Protein current_protein ) {
127         if ( ( getFilterType() == FilterType.POSITIVE_PROTEIN ) || ( getFilterType() == FilterType.NEGATIVE_PROTEIN ) ) {
128             final Set<DomainId> domain_ids_in_protein = new HashSet<DomainId>();
129             for( final Domain d : current_protein.getProteinDomains() ) {
130                 domain_ids_in_protein.add( d.getDomainId() );
131             }
132             domain_ids_in_protein.retainAll( getFilter() );
133             if ( getFilterType() == FilterType.POSITIVE_PROTEIN ) {
134                 if ( domain_ids_in_protein.size() > 0 ) {
135                     actuallyAddProtein( proteins, current_protein );
136                 }
137                 else {
138                     ++_proteins_ignored_due_to_filter;
139                 }
140             }
141             else {
142                 if ( domain_ids_in_protein.size() < 1 ) {
143                     actuallyAddProtein( proteins, current_protein );
144                 }
145                 else {
146                     ++_proteins_ignored_due_to_filter;
147                 }
148             }
149         }
150         else {
151             actuallyAddProtein( proteins, current_protein );
152         }
153     }
154
155     public int getDomainsEncountered() {
156         return _domains_encountered;
157     }
158
159     public int getDomainsIgnoredDueToDuf() {
160         return _domains_ignored_due_to_duf;
161     }
162
163     public int getDomainsIgnoredDueToEval() {
164         return _domains_ignored_due_to_e_value;
165     }
166
167     public int getDomainsIgnoredDueToIndividualScoreCutoff() {
168         return _domains_ignored_due_to_individual_score_cutoff;
169     }
170
171     public int getDomainsIgnoredDueToNegativeDomainFilter() {
172         return _domains_ignored_due_to_negative_domain_filter;
173     }
174
175     public Map<String, Integer> getDomainsIgnoredDueToNegativeDomainFilterCountsMap() {
176         return _domains_ignored_due_to_negative_domain_filter_counts_map;
177     }
178
179     public int getDomainsIgnoredDueToOverlap() {
180         return _domains_ignored_due_to_overlap;
181     }
182
183     public Map<String, Integer> getDomainsIgnoredDueToVirusLikeIdCountsMap() {
184         return _domains_ignored_due_to_virus_like_id_counts_map;
185     }
186
187     public int getDomainsIgnoredDueToVirusLikeIds() {
188         return _domains_ignored_due_to_virus_like_id;
189     }
190
191     public int getDomainsStored() {
192         return _domains_stored;
193     }
194
195     public SortedSet<DomainId> getDomainsStoredSet() {
196         return _domains_stored_set;
197     }
198
199     private double getEValueMaximum() {
200         return _e_value_maximum;
201     }
202
203     private Set<DomainId> getFilter() {
204         return _filter;
205     }
206
207     private FilterType getFilterType() {
208         return _filter_type;
209     }
210
211     private Map<String, String> getIndividualDomainScoreCutoffs() {
212         return _individual_domain_score_cutoffs;
213     }
214
215     private File getInputFile() {
216         return _input_file;
217     }
218
219     private int getMaxAllowedOverlap() {
220         return _max_allowed_overlap;
221     }
222
223     private String getModelType() {
224         return _model_type;
225     }
226
227     public int getProteinsEncountered() {
228         return _proteins_encountered;
229     }
230
231     public int getProteinsIgnoredDueToFilter() {
232         return _proteins_ignored_due_to_filter;
233     }
234
235     public int getProteinsStored() {
236         return _proteins_stored;
237     }
238
239     private ReturnType getReturnType() {
240         return _return_type;
241     }
242
243     private String getSpecies() {
244         return _species;
245     }
246
247     public long getTime() {
248         return _time;
249     }
250
251     private void init() {
252         _e_value_maximum = HmmPfamOutputParser.E_VALUE_MAXIMUM_DEFAULT;
253         setIgnoreDufs( HmmPfamOutputParser.IGNORE_DUFS_DEFAULT );
254         setReturnType( HmmPfamOutputParser.RETURN_TYPE_DEFAULT );
255         _max_allowed_overlap = HmmPfamOutputParser.MAX_ALLOWED_OVERLAP_DEFAULT;
256         setIndividualDomainScoreCutoffs( null );
257         setIgnoreEngulfedDomains( false );
258         setIgnoreVirusLikeIds( false );
259         setAllowNonUniqueQuery( false );
260         setVerbose( false );
261         intitCounts();
262     }
263
264     private void intitCounts() {
265         setDomainsStoredSet( new TreeSet<DomainId>() );
266         setDomainsEncountered( 0 );
267         setProteinsEncountered( 0 );
268         setProteinsIgnoredDueToFilter( 0 );
269         setDomainsIgnoredDueToNegativeFilter( 0 );
270         setDomainsIgnoredDueToDuf( 0 );
271         setDomainsIgnoredDueToEval( 0 );
272         setDomainsIgnoredDueToIndividualScoreCutoff( 0 );
273         setDomainsIgnoredDueToVirusLikeId( 0 );
274         setDomainsIgnoredDueToOverlap( 0 );
275         setDomainsStored( 0 );
276         setProteinsStored( 0 );
277         setTime( 0 );
278         setDomainsIgnoredDueToVirusLikeIdCountsMap( new TreeMap<String, Integer>() );
279         setDomainsIgnoredDueToNegativeDomainFilterCountsMap( new TreeMap<String, Integer>() );
280     }
281
282     private boolean isAllowNonUniqueQuery() {
283         return _allow_non_unique_query;
284     }
285
286     private boolean isIgnoreDufs() {
287         return _ignore_dufs;
288     }
289
290     private boolean isIgnoreEngulfedDomains() {
291         return _ignore_engulfed_domains;
292     }
293
294     private boolean isIgnoreVirusLikeIds() {
295         return _ignore_virus_like_ids;
296     }
297
298     private boolean isVerbose() {
299         return _verbose;
300     }
301
302     public List<Protein> parse() throws IOException {
303         intitCounts();
304         final Set<String> queries = new HashSet<String>();
305         final String error = ForesterUtil.isReadableFile( getInputFile() );
306         if ( !ForesterUtil.isEmpty( error ) ) {
307             throw new IOException( error );
308         }
309         final BufferedReader br = new BufferedReader( new FileReader( getInputFile() ) );
310         String line;
311         final List<Protein> proteins = new ArrayList<Protein>();
312         Protein current_protein = null;
313         int line_number = 0;
314         boolean saw_double_slash = true;
315         boolean can_parse_domains = false;
316         boolean saw_parsed_for_domains = false;
317         boolean saw_query_sequence = false;
318         boolean was_not_unique = false;
319         final long start_time = new Date().getTime();
320         while ( ( line = br.readLine() ) != null ) {
321             line_number++;
322             if ( line.length() < 1 ) {
323                 continue;
324             }
325             else if ( line.startsWith( "Query sequence:" ) ) {
326                 ++_proteins_encountered;
327                 if ( !saw_double_slash ) {
328                     throw new IOException( "unexpected format [line " + line_number + "] in ["
329                             + getInputFile().getCanonicalPath() + "]" );
330                 }
331                 saw_double_slash = false;
332                 saw_query_sequence = true;
333                 was_not_unique = false;
334                 final String query = line.substring( 16 ).trim();
335                 if ( ForesterUtil.isEmpty( query ) ) {
336                     throw new IOException( "query sequence cannot be empty [line " + line_number + "] in ["
337                             + getInputFile().getCanonicalPath() + "]" );
338                 }
339                 if ( queries.contains( query ) ) {
340                     if ( !isAllowNonUniqueQuery() ) {
341                         throw new IOException( "query \"" + query + "\" is not unique [line " + line_number + "] in ["
342                                 + getInputFile().getCanonicalPath() + "]" );
343                     }
344                     else if ( isVerbose() ) {
345                         ForesterUtil.printWarningMessage( getClass().getName(), "query \"" + query
346                                 + "\" is not unique [line " + line_number + "] in ["
347                                 + getInputFile().getCanonicalPath() + "]" );
348                     }
349                 }
350                 else {
351                     queries.add( query );
352                 }
353                 if ( current_protein != null ) {
354                     throw new IOException( "unexpected format [line " + line_number + "] in ["
355                             + getInputFile().getCanonicalPath() + "]" );
356                 }
357                 if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) {
358                     current_protein = new BasicProtein( query, getSpecies() );
359                 }
360                 else {
361                     throw new IllegalArgumentException( "unknown return type" );
362                 }
363             }
364             else if ( line.startsWith( "Accession:" ) ) {
365                 if ( !saw_query_sequence || ( current_protein == null ) ) {
366                     throw new IOException( "unexpected format [line " + line_number + "] in ["
367                             + getInputFile().getCanonicalPath() + "]" );
368                 }
369                 ( ( BasicProtein ) current_protein ).setAccession( line.substring( 11 ).trim() );
370             }
371             else if ( line.startsWith( "Description:" ) ) {
372                 if ( !saw_query_sequence || ( current_protein == null ) ) {
373                     throw new IOException( "unexpected format [line " + line_number + "] in ["
374                             + getInputFile().getCanonicalPath() + "]" );
375                 }
376                 if ( was_not_unique ) {
377                     if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) {
378                         current_protein = new BasicProtein( current_protein.getProteinId() + " "
379                                 + line.substring( 13 ).trim(), getSpecies() );
380                     }
381                 }
382                 else {
383                     ( ( BasicProtein ) current_protein ).setDescription( line.substring( 13 ).trim() );
384                 }
385             }
386             else if ( line.startsWith( "Parsed for domains:" ) ) {
387                 if ( !saw_query_sequence ) {
388                     throw new IOException( "unexpected format [line " + line_number + "] in ["
389                             + getInputFile().getCanonicalPath() + "]" );
390                 }
391                 saw_query_sequence = false;
392                 saw_parsed_for_domains = true;
393             }
394             else if ( saw_parsed_for_domains && line.startsWith( "--------" ) ) {
395                 can_parse_domains = true;
396                 saw_parsed_for_domains = false;
397             }
398             else if ( line.startsWith( "Alignments of top-scoring domains:" ) ) {
399                 if ( !can_parse_domains ) {
400                     throw new IOException( "unexpected format [line " + line_number + "] in ["
401                             + getInputFile().getCanonicalPath() + "]" );
402                 }
403                 can_parse_domains = false;
404             }
405             else if ( line.startsWith( "//" ) ) {
406                 can_parse_domains = false;
407                 saw_double_slash = true;
408                 if ( current_protein.getProteinDomains().size() > 0 ) {
409                     if ( ( getMaxAllowedOverlap() != HmmPfamOutputParser.MAX_ALLOWED_OVERLAP_DEFAULT )
410                             || isIgnoreEngulfedDomains() ) {
411                         final int domains_count = current_protein.getNumberOfProteinDomains();
412                         current_protein = SurfacingUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
413                                                                                   isIgnoreEngulfedDomains(),
414                                                                                   current_protein );
415                         final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains();
416                         _domains_stored -= domains_removed;
417                         _domains_ignored_due_to_overlap += domains_removed;
418                     }
419                     addProtein( proteins, current_protein );
420                 }
421                 current_protein = null;
422             }
423             else if ( can_parse_domains && ( line.indexOf( "[no hits above thresholds]" ) == -1 ) ) {
424                 final String[] s = line.split( "\\s+" );
425                 if ( s.length != 10 ) {
426                     throw new IOException( "unexpected format in hmmpfam output:  \"" + line + "\" [line "
427                             + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
428                 }
429                 final String id = s[ 0 ];
430                 final String domain_count_str = s[ 1 ];
431                 final String from_str = s[ 2 ];
432                 final String to_str = s[ 3 ];
433                 final String query_match_str = s[ 4 ];
434                 final String hmm_match_str = s[ 7 ];
435                 final String score_str = s[ 8 ];
436                 final String e_value_str = s[ 9 ];
437                 int from = -1;
438                 int to = -1;
439                 double e_value = -1;
440                 double score = -1;
441                 boolean is_complete_hmm_match = false;
442                 boolean is_complete_query_match = false;
443                 try {
444                     from = Integer.valueOf( from_str ).intValue();
445                 }
446                 catch ( final NumberFormatException e ) {
447                     throw new IOException( "could not parse seq-f from \"" + line + "\" [line " + line_number
448                             + "] in [" + getInputFile().getCanonicalPath() + "]" );
449                 }
450                 try {
451                     to = Integer.valueOf( to_str ).intValue();
452                 }
453                 catch ( final NumberFormatException e ) {
454                     throw new IOException( "could not parse seq-t from \"" + line + "\" [line " + line_number
455                             + "] in [" + getInputFile().getCanonicalPath() + "]" );
456                 }
457                 try {
458                     score = Double.valueOf( score_str ).doubleValue();
459                 }
460                 catch ( final NumberFormatException e ) {
461                     throw new IOException( "could not parse score from \"" + line + "\" [line " + line_number
462                             + "] in [" + getInputFile().getCanonicalPath() + "]" );
463                 }
464                 try {
465                     e_value = Double.valueOf( e_value_str ).doubleValue();
466                 }
467                 catch ( final NumberFormatException e ) {
468                     throw new IOException( "could not parse E-value from \"" + line + "\" [line " + line_number
469                             + "] in [" + getInputFile().getCanonicalPath() + "]" );
470                 }
471                 if ( hmm_match_str.equals( "[]" ) ) {
472                     is_complete_hmm_match = true;
473                 }
474                 else if ( !( hmm_match_str.equals( ".]" ) || hmm_match_str.equals( "[." ) || hmm_match_str
475                         .equals( ".." ) ) ) {
476                     throw new IOException( "unexpected format in hmmpfam output:  \"" + line + "\" [line "
477                             + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
478                 }
479                 if ( query_match_str.equals( ".." ) ) {
480                     is_complete_query_match = true;
481                 }
482                 else if ( !( query_match_str.equals( ".]" ) || query_match_str.equals( "[." ) || query_match_str
483                         .equals( "[]" ) ) ) {
484                     throw new IOException( "unexpected format in hmmpfam output:  \"" + line + "\" [line "
485                             + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
486                 }
487                 final String[] ct = domain_count_str.split( "/" );
488                 if ( ct.length != 2 ) {
489                     throw new IOException( "unexpected format in hmmpfam output:  \"" + line + "\" [line "
490                             + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
491                 }
492                 final String number_str = ct[ 0 ];
493                 final String total_str = ct[ 1 ];
494                 int number = -1;
495                 int total = -1;
496                 try {
497                     number = Integer.valueOf( ( number_str ) ).intValue();
498                 }
499                 catch ( final NumberFormatException e ) {
500                     throw new IOException( "could not parse domain number from \"" + line + "\" [line " + line_number
501                             + "] in [" + getInputFile().getCanonicalPath() + "]" );
502                 }
503                 try {
504                     total = Integer.valueOf( ( total_str ) ).intValue();
505                 }
506                 catch ( final NumberFormatException e ) {
507                     throw new IOException( "could not parse domain count from \"" + line + "\" [line " + line_number
508                             + "] in [" + getInputFile().getCanonicalPath() + "]" );
509                 }
510                 ++_domains_encountered;
511                 boolean failed_cutoff = false;
512                 if ( getIndividualDomainScoreCutoffs() != null ) {
513                     if ( getIndividualDomainScoreCutoffs().containsKey( id ) ) {
514                         final double cutoff = Double.parseDouble( getIndividualDomainScoreCutoffs().get( id ) );
515                         if ( score < cutoff ) {
516                             failed_cutoff = true;
517                         }
518                     }
519                     else {
520                         throw new IOException( "could not find a score cutoff value for domain id \"" + id
521                                 + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
522                     }
523                 }
524                 final String uc_id = id.toUpperCase();
525                 if ( failed_cutoff ) {
526                     ++_domains_ignored_due_to_individual_score_cutoff;
527                 }
528                 else if ( ( getEValueMaximum() != HmmPfamOutputParser.E_VALUE_MAXIMUM_DEFAULT )
529                         && ( e_value > getEValueMaximum() ) ) {
530                     ++_domains_ignored_due_to_e_value;
531                 }
532                 else if ( isIgnoreDufs() && uc_id.startsWith( "DUF" ) ) {
533                     ++_domains_ignored_due_to_duf;
534                 }
535                 else if ( isIgnoreVirusLikeIds()
536                         && ( uc_id.contains( VIR ) || uc_id.contains( PHAGE ) || uc_id.contains( RETRO )
537                                 || uc_id.contains( TRANSPOS ) || uc_id.startsWith( RV ) || uc_id.startsWith( GAG )
538                                 || uc_id.startsWith( HCV ) || uc_id.startsWith( HERPES ) ) ) {
539                     ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToVirusLikeIdCountsMap(), id );
540                     ++_domains_ignored_due_to_virus_like_id;
541                 }
542                 else if ( ( getFilterType() == FilterType.NEGATIVE_DOMAIN )
543                         && getFilter().contains( new DomainId( id ) ) ) {
544                     ++_domains_ignored_due_to_negative_domain_filter;
545                     ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToNegativeDomainFilterCountsMap(), id );
546                 }
547                 else {
548                     final BasicDomain pd = new BasicDomain( id,
549                                                             from,
550                                                             to,
551                                                             ( short ) number,
552                                                             ( short ) total,
553                                                             e_value,
554                                                             score );
555                     current_protein.addProteinDomain( pd );
556                     ++_domains_stored;
557                 }
558             }
559         } // while ( ( line = br.readLine() ) != null )
560         setTime( new Date().getTime() - start_time );
561         if ( !saw_double_slash ) {
562             throw new IOException( "file ends unexpectedly [line " + line_number + "]" );
563         }
564         return proteins;
565     }
566
567     public void setAllowNonUniqueQuery( final boolean allow_non_unique_query ) {
568         _allow_non_unique_query = allow_non_unique_query;
569     }
570
571     private void setDomainsEncountered( final int domains_encountered ) {
572         _domains_encountered = domains_encountered;
573     }
574
575     private void setDomainsIgnoredDueToDuf( final int domains_ignored_due_to_duf ) {
576         _domains_ignored_due_to_duf = domains_ignored_due_to_duf;
577     }
578
579     public void setDomainsIgnoredDueToEval( final int domains_ignored_due_to_e_value ) {
580         _domains_ignored_due_to_e_value = domains_ignored_due_to_e_value;
581     }
582
583     public void setDomainsIgnoredDueToIndividualScoreCutoff( final int domains_ignored_due_to_individual_score_cutoff ) {
584         _domains_ignored_due_to_individual_score_cutoff = domains_ignored_due_to_individual_score_cutoff;
585     }
586
587     private void setDomainsIgnoredDueToNegativeDomainFilterCountsMap( final Map<String, Integer> domains_ignored_due_to_negative_domain_filter_counts_map ) {
588         _domains_ignored_due_to_negative_domain_filter_counts_map = domains_ignored_due_to_negative_domain_filter_counts_map;
589     }
590
591     private void setDomainsIgnoredDueToNegativeFilter( final int domains_ignored_due_to_negative_domain_filter ) {
592         _domains_ignored_due_to_negative_domain_filter = domains_ignored_due_to_negative_domain_filter;
593     }
594
595     private void setDomainsIgnoredDueToOverlap( final int domains_ignored_due_to_overlap ) {
596         _domains_ignored_due_to_overlap = domains_ignored_due_to_overlap;
597     }
598
599     private void setDomainsIgnoredDueToVirusLikeId( final int i ) {
600         _domains_ignored_due_to_virus_like_id = i;
601     }
602
603     private void setDomainsIgnoredDueToVirusLikeIdCountsMap( final Map<String, Integer> domains_ignored_due_to_virus_like_id_counts_map ) {
604         _domains_ignored_due_to_virus_like_id_counts_map = domains_ignored_due_to_virus_like_id_counts_map;
605     }
606
607     private void setDomainsStored( final int domains_stored ) {
608         _domains_stored = domains_stored;
609     }
610
611     private void setDomainsStoredSet( final SortedSet<DomainId> _storeddomains_stored ) {
612         _domains_stored_set = _storeddomains_stored;
613     }
614
615     public void setEValueMaximum( final double e_value_maximum ) {
616         if ( e_value_maximum < 0.0 ) {
617             throw new IllegalArgumentException( "attempt to set the maximum E-value to a negative value" );
618         }
619         _e_value_maximum = e_value_maximum;
620     }
621
622     public void setIgnoreDufs( final boolean ignore_dufs ) {
623         _ignore_dufs = ignore_dufs;
624     }
625
626     /**
627      * To ignore domains which are completely engulfed by domains (individual
628      * ones or stretches of overlapping ones) with better support values.
629      * 
630      * 
631      * @param ignored_engulfed_domains
632      */
633     public void setIgnoreEngulfedDomains( final boolean ignore_engulfed_domains ) {
634         _ignore_engulfed_domains = ignore_engulfed_domains;
635     }
636
637     public void setIgnoreVirusLikeIds( final boolean ignore_virus_like_ids ) {
638         _ignore_virus_like_ids = ignore_virus_like_ids;
639     }
640
641     /**
642      * Sets the individual domain score cutoff values (for example, gathering
643      * thresholds from Pfam). Domain ids are the keys, cutoffs the values.
644      * 
645      * @param individual_domain_score_cutoffs
646      */
647     public void setIndividualDomainScoreCutoffs( final Map<String, String> individual_domain_score_cutoffs ) {
648         _individual_domain_score_cutoffs = individual_domain_score_cutoffs;
649     }
650
651     public void setMaxAllowedOverlap( final int max_allowed_overlap ) {
652         if ( max_allowed_overlap < 0 ) {
653             throw new IllegalArgumentException( "Attempt to set max allowed overlap to less than zero." );
654         }
655         _max_allowed_overlap = max_allowed_overlap;
656     }
657
658     private void setProteinsEncountered( final int proteins_encountered ) {
659         _proteins_encountered = proteins_encountered;
660     }
661
662     private void setProteinsIgnoredDueToFilter( final int proteins_ignored_due_to_filter ) {
663         _proteins_ignored_due_to_filter = proteins_ignored_due_to_filter;
664     }
665
666     private void setProteinsStored( final int proteins_stored ) {
667         _proteins_stored = proteins_stored;
668     }
669
670     public void setReturnType( final ReturnType return_type ) {
671         _return_type = return_type;
672     }
673
674     private void setTime( final long time ) {
675         _time = time;
676     }
677
678     public void setVerbose( final boolean verbose ) {
679         _verbose = verbose;
680     }
681
682     public static enum FilterType {
683         NONE, POSITIVE_PROTEIN, NEGATIVE_PROTEIN, NEGATIVE_DOMAIN
684     }
685
686     public static enum ReturnType {
687         UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN
688     }
689 }