inprogress
[jalview.git] / forester / java / src / org / forester / io / parsers / HmmPfamOutputParser.java
1 // $Id:
2 //
3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
5 //
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
8 // All rights reserved
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 //
24 // Contact: phylosoft @ gmail . com
25 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26
27 package org.forester.io.parsers;
28
29 import java.io.BufferedReader;
30 import java.io.File;
31 import java.io.FileReader;
32 import java.io.IOException;
33 import java.util.ArrayList;
34 import java.util.Date;
35 import java.util.HashSet;
36 import java.util.List;
37 import java.util.Map;
38 import java.util.Set;
39 import java.util.SortedSet;
40 import java.util.TreeMap;
41 import java.util.TreeSet;
42
43 import org.forester.protein.BasicDomain;
44 import org.forester.protein.BasicProtein;
45 import org.forester.protein.Domain;
46 import org.forester.protein.Protein;
47 import org.forester.util.ForesterUtil;
48
49 public final class HmmPfamOutputParser {
50
51     private static final String     RETRO                       = "RETRO";
52     private static final String     PHAGE                       = "PHAGE";
53     private static final String     VIR                         = "VIR";
54     private static final String     TRANSPOS                    = "TRANSPOS";
55     private static final String     RV                          = "RV";
56     private static final String     GAG                         = "GAG_";
57     private static final String     HCV                         = "HCV_";                                                    // New. Added on Jun 11, after 1st submission.
58     private static final String     HERPES                      = "Herpes_";                                                 // New. Added on Jun 11, after 1st submission.
59     private static final int        E_VALUE_MAXIMUM_DEFAULT     = -1;
60     private static final ReturnType RETURN_TYPE_DEFAULT         = ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN;
61     private static final boolean    IGNORE_DUFS_DEFAULT         = false;
62     private static final int        MAX_ALLOWED_OVERLAP_DEFAULT = -1;
63     private final Set<String>       _filter;
64     private final FilterType        _filter_type;
65     private final File              _input_file;
66     private final String            _species;
67     private final String            _model_type;
68     private double                  _e_value_maximum;
69     private Map<String, String>     _individual_domain_score_cutoffs;
70     private boolean                 _ignore_dufs;
71     private boolean                 _ignore_virus_like_ids;
72     private boolean                 _allow_non_unique_query;
73     private boolean                 _verbose;
74     private int                     _max_allowed_overlap;
75     private boolean                 _ignore_engulfed_domains;
76     private ReturnType              _return_type;
77     private int                     _proteins_encountered;
78     private int                     _proteins_ignored_due_to_filter;
79     private int                     _proteins_stored;
80     private int                     _domains_encountered;
81     private int                     _domains_ignored_due_to_duf;
82     private int                     _domains_ignored_due_to_overlap;
83     private int                     _domains_ignored_due_to_e_value;
84     private int                     _domains_ignored_due_to_individual_score_cutoff;
85     private int                     _domains_stored;
86     private SortedSet<String>       _domains_stored_set;
87     private long                    _time;
88     private int                     _domains_ignored_due_to_negative_domain_filter;
89     private Map<String, Integer>    _domains_ignored_due_to_negative_domain_filter_counts_map;
90     private int                     _domains_ignored_due_to_virus_like_id;
91     private Map<String, Integer>    _domains_ignored_due_to_virus_like_id_counts_map;
92
93     public HmmPfamOutputParser( final File input_file, final String species, final String model_type ) {
94         _input_file = input_file;
95         _species = species;
96         _model_type = model_type;
97         _filter = null;
98         _filter_type = FilterType.NONE;
99         init();
100     }
101
102     public HmmPfamOutputParser( final File input_file,
103                                 final String species,
104                                 final String model_type,
105                                 final Set<String> filter,
106                                 final FilterType filter_type ) {
107         _input_file = input_file;
108         _species = species;
109         _model_type = model_type;
110         _filter = filter;
111         _filter_type = filter_type;
112         init();
113     }
114
115     private void actuallyAddProtein( final List<Protein> proteins, final Protein current_protein ) {
116         final List<Domain> l = current_protein.getProteinDomains();
117         for( final Domain d : l ) {
118             getDomainsStoredSet().add( d.getDomainId() );
119         }
120         proteins.add( current_protein );
121         ++_proteins_stored;
122     }
123
124     private void addProtein( final List<Protein> proteins, final Protein current_protein ) {
125         if ( ( getFilterType() == FilterType.POSITIVE_PROTEIN ) || ( getFilterType() == FilterType.NEGATIVE_PROTEIN ) ) {
126             final Set<String> domain_ids_in_protein = new HashSet<String>();
127             for( final Domain d : current_protein.getProteinDomains() ) {
128                 domain_ids_in_protein.add( d.getDomainId() );
129             }
130             domain_ids_in_protein.retainAll( getFilter() );
131             if ( getFilterType() == FilterType.POSITIVE_PROTEIN ) {
132                 if ( domain_ids_in_protein.size() > 0 ) {
133                     actuallyAddProtein( proteins, current_protein );
134                 }
135                 else {
136                     ++_proteins_ignored_due_to_filter;
137                 }
138             }
139             else {
140                 if ( domain_ids_in_protein.size() < 1 ) {
141                     actuallyAddProtein( proteins, current_protein );
142                 }
143                 else {
144                     ++_proteins_ignored_due_to_filter;
145                 }
146             }
147         }
148         else {
149             actuallyAddProtein( proteins, current_protein );
150         }
151     }
152
153     public int getDomainsEncountered() {
154         return _domains_encountered;
155     }
156
157     public int getDomainsIgnoredDueToDuf() {
158         return _domains_ignored_due_to_duf;
159     }
160
161     public int getDomainsIgnoredDueToEval() {
162         return _domains_ignored_due_to_e_value;
163     }
164
165     public int getDomainsIgnoredDueToIndividualScoreCutoff() {
166         return _domains_ignored_due_to_individual_score_cutoff;
167     }
168
169     public int getDomainsIgnoredDueToNegativeDomainFilter() {
170         return _domains_ignored_due_to_negative_domain_filter;
171     }
172
173     public Map<String, Integer> getDomainsIgnoredDueToNegativeDomainFilterCountsMap() {
174         return _domains_ignored_due_to_negative_domain_filter_counts_map;
175     }
176
177     public int getDomainsIgnoredDueToOverlap() {
178         return _domains_ignored_due_to_overlap;
179     }
180
181     public Map<String, Integer> getDomainsIgnoredDueToVirusLikeIdCountsMap() {
182         return _domains_ignored_due_to_virus_like_id_counts_map;
183     }
184
185     public int getDomainsIgnoredDueToVirusLikeIds() {
186         return _domains_ignored_due_to_virus_like_id;
187     }
188
189     public int getDomainsStored() {
190         return _domains_stored;
191     }
192
193     public SortedSet<String> getDomainsStoredSet() {
194         return _domains_stored_set;
195     }
196
197     private double getEValueMaximum() {
198         return _e_value_maximum;
199     }
200
201     private Set<String> getFilter() {
202         return _filter;
203     }
204
205     private FilterType getFilterType() {
206         return _filter_type;
207     }
208
209     private Map<String, String> getIndividualDomainScoreCutoffs() {
210         return _individual_domain_score_cutoffs;
211     }
212
213     private File getInputFile() {
214         return _input_file;
215     }
216
217     private int getMaxAllowedOverlap() {
218         return _max_allowed_overlap;
219     }
220
221     private String getModelType() {
222         return _model_type;
223     }
224
225     public int getProteinsEncountered() {
226         return _proteins_encountered;
227     }
228
229     public int getProteinsIgnoredDueToFilter() {
230         return _proteins_ignored_due_to_filter;
231     }
232
233     public int getProteinsStored() {
234         return _proteins_stored;
235     }
236
237     private ReturnType getReturnType() {
238         return _return_type;
239     }
240
241     private String getSpecies() {
242         return _species;
243     }
244
245     public long getTime() {
246         return _time;
247     }
248
249     private void init() {
250         _e_value_maximum = HmmPfamOutputParser.E_VALUE_MAXIMUM_DEFAULT;
251         setIgnoreDufs( HmmPfamOutputParser.IGNORE_DUFS_DEFAULT );
252         setReturnType( HmmPfamOutputParser.RETURN_TYPE_DEFAULT );
253         _max_allowed_overlap = HmmPfamOutputParser.MAX_ALLOWED_OVERLAP_DEFAULT;
254         setIndividualDomainScoreCutoffs( null );
255         setIgnoreEngulfedDomains( false );
256         setIgnoreVirusLikeIds( false );
257         setAllowNonUniqueQuery( false );
258         setVerbose( false );
259         intitCounts();
260     }
261
262     private void intitCounts() {
263         setDomainsStoredSet( new TreeSet<String>() );
264         setDomainsEncountered( 0 );
265         setProteinsEncountered( 0 );
266         setProteinsIgnoredDueToFilter( 0 );
267         setDomainsIgnoredDueToNegativeFilter( 0 );
268         setDomainsIgnoredDueToDuf( 0 );
269         setDomainsIgnoredDueToEval( 0 );
270         setDomainsIgnoredDueToIndividualScoreCutoff( 0 );
271         setDomainsIgnoredDueToVirusLikeId( 0 );
272         setDomainsIgnoredDueToOverlap( 0 );
273         setDomainsStored( 0 );
274         setProteinsStored( 0 );
275         setTime( 0 );
276         setDomainsIgnoredDueToVirusLikeIdCountsMap( new TreeMap<String, Integer>() );
277         setDomainsIgnoredDueToNegativeDomainFilterCountsMap( new TreeMap<String, Integer>() );
278     }
279
280     private boolean isAllowNonUniqueQuery() {
281         return _allow_non_unique_query;
282     }
283
284     private boolean isIgnoreDufs() {
285         return _ignore_dufs;
286     }
287
288     private boolean isIgnoreEngulfedDomains() {
289         return _ignore_engulfed_domains;
290     }
291
292     private boolean isIgnoreVirusLikeIds() {
293         return _ignore_virus_like_ids;
294     }
295
296     private boolean isVerbose() {
297         return _verbose;
298     }
299
300     public List<Protein> parse() throws IOException {
301         intitCounts();
302         final Set<String> queries = new HashSet<String>();
303         final String error = ForesterUtil.isReadableFile( getInputFile() );
304         if ( !ForesterUtil.isEmpty( error ) ) {
305             throw new IOException( error );
306         }
307         final BufferedReader br = new BufferedReader( new FileReader( getInputFile() ) );
308         String line;
309         final List<Protein> proteins = new ArrayList<Protein>();
310         Protein current_protein = null;
311         int line_number = 0;
312         boolean saw_double_slash = true;
313         boolean can_parse_domains = false;
314         boolean saw_parsed_for_domains = false;
315         boolean saw_query_sequence = false;
316         boolean was_not_unique = false;
317         final long start_time = new Date().getTime();
318         while ( ( line = br.readLine() ) != null ) {
319             line_number++;
320             if ( line.length() < 1 ) {
321                 continue;
322             }
323             else if ( line.startsWith( "Query sequence:" ) ) {
324                 ++_proteins_encountered;
325                 if ( !saw_double_slash ) {
326                     throw new IOException( "unexpected format [line " + line_number + "] in ["
327                             + getInputFile().getCanonicalPath() + "]" );
328                 }
329                 saw_double_slash = false;
330                 saw_query_sequence = true;
331                 was_not_unique = false;
332                 final String query = line.substring( 16 ).trim();
333                 if ( ForesterUtil.isEmpty( query ) ) {
334                     throw new IOException( "query sequence cannot be empty [line " + line_number + "] in ["
335                             + getInputFile().getCanonicalPath() + "]" );
336                 }
337                 if ( queries.contains( query ) ) {
338                     if ( !isAllowNonUniqueQuery() ) {
339                         throw new IOException( "query \"" + query + "\" is not unique [line " + line_number + "] in ["
340                                 + getInputFile().getCanonicalPath() + "]" );
341                     }
342                     else if ( isVerbose() ) {
343                         ForesterUtil.printWarningMessage( getClass().getName(), "query \"" + query
344                                 + "\" is not unique [line " + line_number + "] in ["
345                                 + getInputFile().getCanonicalPath() + "]" );
346                     }
347                 }
348                 else {
349                     queries.add( query );
350                 }
351                 if ( current_protein != null ) {
352                     throw new IOException( "unexpected format [line " + line_number + "] in ["
353                             + getInputFile().getCanonicalPath() + "]" );
354                 }
355                 if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) {
356                     current_protein = new BasicProtein( query, getSpecies(), 0 );
357                 }
358                 else {
359                     throw new IllegalArgumentException( "unknown return type" );
360                 }
361             }
362             else if ( line.startsWith( "Accession:" ) ) {
363                 if ( !saw_query_sequence || ( current_protein == null ) ) {
364                     throw new IOException( "unexpected format [line " + line_number + "] in ["
365                             + getInputFile().getCanonicalPath() + "]" );
366                 }
367                 ( ( BasicProtein ) current_protein ).setAccession( line.substring( 11 ).trim() );
368             }
369             else if ( line.startsWith( "Description:" ) ) {
370                 if ( !saw_query_sequence || ( current_protein == null ) ) {
371                     throw new IOException( "unexpected format [line " + line_number + "] in ["
372                             + getInputFile().getCanonicalPath() + "]" );
373                 }
374                 if ( was_not_unique ) {
375                     if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) {
376                         current_protein = new BasicProtein( current_protein.getProteinId() + " "
377                                 + line.substring( 13 ).trim(), getSpecies(), 0 );
378                     }
379                 }
380                 else {
381                     ( ( BasicProtein ) current_protein ).setDescription( line.substring( 13 ).trim() );
382                 }
383             }
384             else if ( line.startsWith( "Parsed for domains:" ) ) {
385                 if ( !saw_query_sequence ) {
386                     throw new IOException( "unexpected format [line " + line_number + "] in ["
387                             + getInputFile().getCanonicalPath() + "]" );
388                 }
389                 saw_query_sequence = false;
390                 saw_parsed_for_domains = true;
391             }
392             else if ( saw_parsed_for_domains && line.startsWith( "--------" ) ) {
393                 can_parse_domains = true;
394                 saw_parsed_for_domains = false;
395             }
396             else if ( line.startsWith( "Alignments of top-scoring domains:" ) ) {
397                 if ( !can_parse_domains ) {
398                     throw new IOException( "unexpected format [line " + line_number + "] in ["
399                             + getInputFile().getCanonicalPath() + "]" );
400                 }
401                 can_parse_domains = false;
402             }
403             else if ( line.startsWith( "//" ) ) {
404                 can_parse_domains = false;
405                 saw_double_slash = true;
406                 if ( current_protein.getProteinDomains().size() > 0 ) {
407                     if ( ( getMaxAllowedOverlap() != HmmPfamOutputParser.MAX_ALLOWED_OVERLAP_DEFAULT )
408                             || isIgnoreEngulfedDomains() ) {
409                         final int domains_count = current_protein.getNumberOfProteinDomains();
410                         current_protein = ForesterUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
411                                                                                  isIgnoreEngulfedDomains(),
412                                                                                  current_protein );
413                         final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains();
414                         _domains_stored -= domains_removed;
415                         _domains_ignored_due_to_overlap += domains_removed;
416                     }
417                     addProtein( proteins, current_protein );
418                 }
419                 current_protein = null;
420             }
421             else if ( can_parse_domains && ( line.indexOf( "[no hits above thresholds]" ) == -1 ) ) {
422                 final String[] s = line.split( "\\s+" );
423                 if ( s.length != 10 ) {
424                     throw new IOException( "unexpected format in hmmpfam output:  \"" + line + "\" [line "
425                             + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
426                 }
427                 final String id = s[ 0 ];
428                 final String domain_count_str = s[ 1 ];
429                 final String from_str = s[ 2 ];
430                 final String to_str = s[ 3 ];
431                 final String query_match_str = s[ 4 ];
432                 final String hmm_match_str = s[ 7 ];
433                 final String score_str = s[ 8 ];
434                 final String e_value_str = s[ 9 ];
435                 int from = -1;
436                 int to = -1;
437                 double e_value = -1;
438                 double score = -1;
439                 boolean is_complete_hmm_match = false;
440                 boolean is_complete_query_match = false;
441                 try {
442                     from = Integer.valueOf( from_str ).intValue();
443                 }
444                 catch ( final NumberFormatException e ) {
445                     throw new IOException( "could not parse seq-f from \"" + line + "\" [line " + line_number
446                             + "] in [" + getInputFile().getCanonicalPath() + "]" );
447                 }
448                 try {
449                     to = Integer.valueOf( to_str ).intValue();
450                 }
451                 catch ( final NumberFormatException e ) {
452                     throw new IOException( "could not parse seq-t from \"" + line + "\" [line " + line_number
453                             + "] in [" + getInputFile().getCanonicalPath() + "]" );
454                 }
455                 try {
456                     score = Double.valueOf( score_str ).doubleValue();
457                 }
458                 catch ( final NumberFormatException e ) {
459                     throw new IOException( "could not parse score from \"" + line + "\" [line " + line_number
460                             + "] in [" + getInputFile().getCanonicalPath() + "]" );
461                 }
462                 try {
463                     e_value = Double.valueOf( e_value_str ).doubleValue();
464                 }
465                 catch ( final NumberFormatException e ) {
466                     throw new IOException( "could not parse E-value from \"" + line + "\" [line " + line_number
467                             + "] in [" + getInputFile().getCanonicalPath() + "]" );
468                 }
469                 if ( hmm_match_str.equals( "[]" ) ) {
470                     is_complete_hmm_match = true;
471                 }
472                 else if ( !( hmm_match_str.equals( ".]" ) || hmm_match_str.equals( "[." ) || hmm_match_str
473                         .equals( ".." ) ) ) {
474                     throw new IOException( "unexpected format in hmmpfam output:  \"" + line + "\" [line "
475                             + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
476                 }
477                 if ( query_match_str.equals( ".." ) ) {
478                     is_complete_query_match = true;
479                 }
480                 else if ( !( query_match_str.equals( ".]" ) || query_match_str.equals( "[." ) || query_match_str
481                         .equals( "[]" ) ) ) {
482                     throw new IOException( "unexpected format in hmmpfam output:  \"" + line + "\" [line "
483                             + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
484                 }
485                 final String[] ct = domain_count_str.split( "/" );
486                 if ( ct.length != 2 ) {
487                     throw new IOException( "unexpected format in hmmpfam output:  \"" + line + "\" [line "
488                             + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
489                 }
490                 final String number_str = ct[ 0 ];
491                 final String total_str = ct[ 1 ];
492                 int number = -1;
493                 int total = -1;
494                 try {
495                     number = Integer.valueOf( ( number_str ) ).intValue();
496                 }
497                 catch ( final NumberFormatException e ) {
498                     throw new IOException( "could not parse domain number from \"" + line + "\" [line " + line_number
499                             + "] in [" + getInputFile().getCanonicalPath() + "]" );
500                 }
501                 try {
502                     total = Integer.valueOf( ( total_str ) ).intValue();
503                 }
504                 catch ( final NumberFormatException e ) {
505                     throw new IOException( "could not parse domain count from \"" + line + "\" [line " + line_number
506                             + "] in [" + getInputFile().getCanonicalPath() + "]" );
507                 }
508                 ++_domains_encountered;
509                 boolean failed_cutoff = false;
510                 if ( getIndividualDomainScoreCutoffs() != null ) {
511                     if ( getIndividualDomainScoreCutoffs().containsKey( id ) ) {
512                         final double cutoff = Double.parseDouble( getIndividualDomainScoreCutoffs().get( id ) );
513                         if ( score < cutoff ) {
514                             failed_cutoff = true;
515                         }
516                     }
517                     else {
518                         throw new IOException( "could not find a score cutoff value for domain id \"" + id
519                                 + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
520                     }
521                 }
522                 final String uc_id = id.toUpperCase();
523                 if ( failed_cutoff ) {
524                     ++_domains_ignored_due_to_individual_score_cutoff;
525                 }
526                 else if ( ( getEValueMaximum() != HmmPfamOutputParser.E_VALUE_MAXIMUM_DEFAULT )
527                         && ( e_value > getEValueMaximum() ) ) {
528                     ++_domains_ignored_due_to_e_value;
529                 }
530                 else if ( isIgnoreDufs() && uc_id.startsWith( "DUF" ) ) {
531                     ++_domains_ignored_due_to_duf;
532                 }
533                 else if ( isIgnoreVirusLikeIds()
534                         && ( uc_id.contains( VIR ) || uc_id.contains( PHAGE ) || uc_id.contains( RETRO )
535                                 || uc_id.contains( TRANSPOS ) || uc_id.startsWith( RV ) || uc_id.startsWith( GAG )
536                                 || uc_id.startsWith( HCV ) || uc_id.startsWith( HERPES ) ) ) {
537                     ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToVirusLikeIdCountsMap(), id );
538                     ++_domains_ignored_due_to_virus_like_id;
539                 }
540                 else if ( ( getFilterType() == FilterType.NEGATIVE_DOMAIN ) && getFilter().contains( id ) ) {
541                     ++_domains_ignored_due_to_negative_domain_filter;
542                     ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToNegativeDomainFilterCountsMap(), id );
543                 }
544                 else {
545                     final BasicDomain pd = new BasicDomain( id,
546                                                             from,
547                                                             to,
548                                                             ( short ) number,
549                                                             ( short ) total,
550                                                             e_value,
551                                                             score );
552                     current_protein.addProteinDomain( pd );
553                     ++_domains_stored;
554                 }
555             }
556         } // while ( ( line = br.readLine() ) != null )
557         setTime( new Date().getTime() - start_time );
558         if ( !saw_double_slash ) {
559             throw new IOException( "file ends unexpectedly [line " + line_number + "]" );
560         }
561         return proteins;
562     }
563
564     public void setAllowNonUniqueQuery( final boolean allow_non_unique_query ) {
565         _allow_non_unique_query = allow_non_unique_query;
566     }
567
568     private void setDomainsEncountered( final int domains_encountered ) {
569         _domains_encountered = domains_encountered;
570     }
571
572     private void setDomainsIgnoredDueToDuf( final int domains_ignored_due_to_duf ) {
573         _domains_ignored_due_to_duf = domains_ignored_due_to_duf;
574     }
575
576     public void setDomainsIgnoredDueToEval( final int domains_ignored_due_to_e_value ) {
577         _domains_ignored_due_to_e_value = domains_ignored_due_to_e_value;
578     }
579
580     public void setDomainsIgnoredDueToIndividualScoreCutoff( final int domains_ignored_due_to_individual_score_cutoff ) {
581         _domains_ignored_due_to_individual_score_cutoff = domains_ignored_due_to_individual_score_cutoff;
582     }
583
584     private void setDomainsIgnoredDueToNegativeDomainFilterCountsMap( final Map<String, Integer> domains_ignored_due_to_negative_domain_filter_counts_map ) {
585         _domains_ignored_due_to_negative_domain_filter_counts_map = domains_ignored_due_to_negative_domain_filter_counts_map;
586     }
587
588     private void setDomainsIgnoredDueToNegativeFilter( final int domains_ignored_due_to_negative_domain_filter ) {
589         _domains_ignored_due_to_negative_domain_filter = domains_ignored_due_to_negative_domain_filter;
590     }
591
592     private void setDomainsIgnoredDueToOverlap( final int domains_ignored_due_to_overlap ) {
593         _domains_ignored_due_to_overlap = domains_ignored_due_to_overlap;
594     }
595
596     private void setDomainsIgnoredDueToVirusLikeId( final int i ) {
597         _domains_ignored_due_to_virus_like_id = i;
598     }
599
600     private void setDomainsIgnoredDueToVirusLikeIdCountsMap( final Map<String, Integer> domains_ignored_due_to_virus_like_id_counts_map ) {
601         _domains_ignored_due_to_virus_like_id_counts_map = domains_ignored_due_to_virus_like_id_counts_map;
602     }
603
604     private void setDomainsStored( final int domains_stored ) {
605         _domains_stored = domains_stored;
606     }
607
608     private void setDomainsStoredSet( final SortedSet<String> _storeddomains_stored ) {
609         _domains_stored_set = _storeddomains_stored;
610     }
611
612     public void setEValueMaximum( final double e_value_maximum ) {
613         if ( e_value_maximum < 0.0 ) {
614             throw new IllegalArgumentException( "attempt to set the maximum E-value to a negative value" );
615         }
616         _e_value_maximum = e_value_maximum;
617     }
618
619     public void setIgnoreDufs( final boolean ignore_dufs ) {
620         _ignore_dufs = ignore_dufs;
621     }
622
623     /**
624      * To ignore domains which are completely engulfed by domains (individual
625      * ones or stretches of overlapping ones) with better support values.
626      * 
627      * 
628      * @param ignored_engulfed_domains
629      */
630     public void setIgnoreEngulfedDomains( final boolean ignore_engulfed_domains ) {
631         _ignore_engulfed_domains = ignore_engulfed_domains;
632     }
633
634     public void setIgnoreVirusLikeIds( final boolean ignore_virus_like_ids ) {
635         _ignore_virus_like_ids = ignore_virus_like_ids;
636     }
637
638     /**
639      * Sets the individual domain score cutoff values (for example, gathering
640      * thresholds from Pfam). Domain ids are the keys, cutoffs the values.
641      * 
642      * @param individual_domain_score_cutoffs
643      */
644     public void setIndividualDomainScoreCutoffs( final Map<String, String> individual_domain_score_cutoffs ) {
645         _individual_domain_score_cutoffs = individual_domain_score_cutoffs;
646     }
647
648     public void setMaxAllowedOverlap( final int max_allowed_overlap ) {
649         if ( max_allowed_overlap < 0 ) {
650             throw new IllegalArgumentException( "Attempt to set max allowed overlap to less than zero." );
651         }
652         _max_allowed_overlap = max_allowed_overlap;
653     }
654
655     private void setProteinsEncountered( final int proteins_encountered ) {
656         _proteins_encountered = proteins_encountered;
657     }
658
659     private void setProteinsIgnoredDueToFilter( final int proteins_ignored_due_to_filter ) {
660         _proteins_ignored_due_to_filter = proteins_ignored_due_to_filter;
661     }
662
663     private void setProteinsStored( final int proteins_stored ) {
664         _proteins_stored = proteins_stored;
665     }
666
667     public void setReturnType( final ReturnType return_type ) {
668         _return_type = return_type;
669     }
670
671     private void setTime( final long time ) {
672         _time = time;
673     }
674
675     public void setVerbose( final boolean verbose ) {
676         _verbose = verbose;
677     }
678
679     public static enum FilterType {
680         NONE, POSITIVE_PROTEIN, NEGATIVE_PROTEIN, NEGATIVE_DOMAIN
681     }
682
683     public static enum ReturnType {
684         UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN
685     }
686 }