(no commit message)
[jalview.git] / forester / java / src / org / forester / io / parsers / HmmscanPerDomainTableParser.java
1 // $Id:
2 // $
3 //
4 // FORESTER -- software libraries and applications
5 // for evolutionary biology research and applications.
6 //
7 // Copyright (C) 2008-2009 Christian M. Zmasek
8 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // All rights reserved
10 //
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
15 //
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
20 //
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 //
25 // Contact: phylosoft @ gmail . com
26 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
27
28 package org.forester.io.parsers;
29
30 import java.io.BufferedReader;
31 import java.io.File;
32 import java.io.FileReader;
33 import java.io.IOException;
34 import java.util.ArrayList;
35 import java.util.Date;
36 import java.util.HashSet;
37 import java.util.List;
38 import java.util.Map;
39 import java.util.Set;
40 import java.util.SortedSet;
41 import java.util.TreeMap;
42 import java.util.TreeSet;
43
44 import org.forester.protein.BasicDomain;
45 import org.forester.protein.BasicProtein;
46 import org.forester.protein.Domain;
47 import org.forester.protein.Protein;
48 import org.forester.util.ForesterUtil;
49
50 public final class HmmscanPerDomainTableParser {
51
52     private static final String           RETRO                       = "RETRO";
53     private static final String           PHAGE                       = "PHAGE";
54     private static final String           VIR                         = "VIR";
55     private static final String           TRANSPOS                    = "TRANSPOS";
56     private static final String           RV                          = "RV";
57     private static final String           GAG                         = "GAG_";
58     private static final String           HCV                         = "HCV_";
59     private static final String           HERPES                      = "HERPES_";
60     private static final String           BACULO                      = "BACULO_";
61     private static final int              E_VALUE_MAXIMUM_DEFAULT     = -1;
62     private static final ReturnType       RETURN_TYPE_DEFAULT         = ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN;
63     private static final boolean          IGNORE_DUFS_DEFAULT         = false;
64     private static final int              MAX_ALLOWED_OVERLAP_DEFAULT = -1;
65     private static final boolean          IGNORE_REPLACED_RRMS        = false;
66     private static final boolean          IGNORE_hGDE_amylase         = true;                                                      //TODO eventually remove me, added 10/22/13
67     private final Set<String>             _filter;
68     private final FilterType              _filter_type;
69     private final File                    _input_file;
70     private final String                  _species;
71     private double                        _fs_e_value_maximum;
72     private double                        _i_e_value_maximum;
73     private Map<String, Double>           _individual_score_cutoffs;
74     private boolean                       _ignore_dufs;
75     private boolean                       _ignore_virus_like_ids;
76     private int                           _max_allowed_overlap;
77     private boolean                       _ignore_engulfed_domains;
78     private ReturnType                    _return_type;
79     private int                           _proteins_encountered;
80     private int                           _proteins_ignored_due_to_filter;
81     private int                           _proteins_stored;
82     private int                           _domains_encountered;
83     private int                           _domains_ignored_due_to_duf;
84     private int                           _domains_ignored_due_to_overlap;
85     private int                           _domains_ignored_due_to_fs_e_value;
86     private int                           _domains_ignored_due_to_i_e_value;
87     private int                           _domains_ignored_due_to_individual_score_cutoff;
88     private int                           _domains_stored;
89     private SortedSet<String>             _domains_stored_set;
90     private long                          _time;
91     private int                           _domains_ignored_due_to_negative_domain_filter;
92     private Map<String, Integer>          _domains_ignored_due_to_negative_domain_filter_counts_map;
93     private int                           _domains_ignored_due_to_virus_like_id;
94     private Map<String, Integer>          _domains_ignored_due_to_virus_like_id_counts_map;
95     private final INDIVIDUAL_SCORE_CUTOFF _ind_cutoff;
96     private final boolean                 _allow_proteins_with_same_name;
97
98     public HmmscanPerDomainTableParser( final File input_file,
99                                         final String species,
100                                         final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to ) {
101         _input_file = input_file;
102         _species = species;
103         _filter = null;
104         _filter_type = FilterType.NONE;
105         _ind_cutoff = individual_cutoff_applies_to;
106         _allow_proteins_with_same_name = false;
107         init();
108     }
109
110     public HmmscanPerDomainTableParser( final File input_file,
111                                         final String species,
112                                         final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to,
113                                         final boolean allow_proteins_with_same_name ) {
114         _input_file = input_file;
115         _species = species;
116         _filter = null;
117         _filter_type = FilterType.NONE;
118         _ind_cutoff = individual_cutoff_applies_to;
119         _allow_proteins_with_same_name = allow_proteins_with_same_name;
120         init();
121     }
122
123     public HmmscanPerDomainTableParser( final File input_file,
124                                         final String species,
125                                         final Set<String> filter,
126                                         final FilterType filter_type,
127                                         final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to ) {
128         _input_file = input_file;
129         _species = species;
130         _filter = filter;
131         _filter_type = filter_type;
132         _ind_cutoff = individual_cutoff_applies_to;
133         _allow_proteins_with_same_name = false;
134         init();
135     }
136
137     public HmmscanPerDomainTableParser( final File input_file,
138                                         final String species,
139                                         final Set<String> filter,
140                                         final FilterType filter_type,
141                                         final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to,
142                                         final boolean allow_proteins_with_same_name ) {
143         _input_file = input_file;
144         _species = species;
145         _filter = filter;
146         _filter_type = filter_type;
147         _ind_cutoff = individual_cutoff_applies_to;
148         _allow_proteins_with_same_name = allow_proteins_with_same_name;
149         init();
150     }
151
152     public boolean isAllowProteinsWithSameName() {
153         return _allow_proteins_with_same_name;
154     }
155
156     private void actuallyAddProtein( final List<Protein> proteins, final Protein current_protein ) {
157         final List<Domain> l = current_protein.getProteinDomains();
158         for( final Domain d : l ) {
159             getDomainsStoredSet().add( d.getDomainId() );
160         }
161         proteins.add( current_protein );
162         ++_proteins_stored;
163     }
164
165     private void addProtein( final List<Protein> proteins, Protein current_protein ) {
166         if ( ( getMaxAllowedOverlap() != HmmscanPerDomainTableParser.MAX_ALLOWED_OVERLAP_DEFAULT )
167                 || isIgnoreEngulfedDomains() ) {
168             final int domains_count = current_protein.getNumberOfProteinDomains();
169             current_protein = ForesterUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
170                                                                      isIgnoreEngulfedDomains(),
171                                                                      current_protein );
172             final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains();
173             _domains_stored -= domains_removed;
174             _domains_ignored_due_to_overlap += domains_removed;
175         }
176         if ( ( getFilterType() == FilterType.POSITIVE_PROTEIN ) || ( getFilterType() == FilterType.NEGATIVE_PROTEIN ) ) {
177             final Set<String> domain_ids_in_protein = new HashSet<String>();
178             for( final Domain d : current_protein.getProteinDomains() ) {
179                 domain_ids_in_protein.add( d.getDomainId() );
180             }
181             domain_ids_in_protein.retainAll( getFilter() );
182             if ( getFilterType() == FilterType.POSITIVE_PROTEIN ) {
183                 if ( domain_ids_in_protein.size() > 0 ) {
184                     actuallyAddProtein( proteins, current_protein );
185                 }
186                 else {
187                     ++_proteins_ignored_due_to_filter;
188                 }
189             }
190             else {
191                 if ( domain_ids_in_protein.size() < 1 ) {
192                     actuallyAddProtein( proteins, current_protein );
193                 }
194                 else {
195                     ++_proteins_ignored_due_to_filter;
196                 }
197             }
198         }
199         else {
200             actuallyAddProtein( proteins, current_protein );
201         }
202     }
203
204     public int getDomainsEncountered() {
205         return _domains_encountered;
206     }
207
208     public int getDomainsIgnoredDueToDuf() {
209         return _domains_ignored_due_to_duf;
210     }
211
212     public int getDomainsIgnoredDueToIEval() {
213         return _domains_ignored_due_to_i_e_value;
214     }
215
216     public int getDomainsIgnoredDueToFsEval() {
217         return _domains_ignored_due_to_fs_e_value;
218     }
219
220     public int getDomainsIgnoredDueToIndividualScoreCutoff() {
221         return _domains_ignored_due_to_individual_score_cutoff;
222     }
223
224     public int getDomainsIgnoredDueToNegativeDomainFilter() {
225         return _domains_ignored_due_to_negative_domain_filter;
226     }
227
228     public Map<String, Integer> getDomainsIgnoredDueToNegativeDomainFilterCountsMap() {
229         return _domains_ignored_due_to_negative_domain_filter_counts_map;
230     }
231
232     public int getDomainsIgnoredDueToOverlap() {
233         return _domains_ignored_due_to_overlap;
234     }
235
236     public Map<String, Integer> getDomainsIgnoredDueToVirusLikeIdCountsMap() {
237         return _domains_ignored_due_to_virus_like_id_counts_map;
238     }
239
240     public int getDomainsIgnoredDueToVirusLikeIds() {
241         return _domains_ignored_due_to_virus_like_id;
242     }
243
244     public int getDomainsStored() {
245         return _domains_stored;
246     }
247
248     public SortedSet<String> getDomainsStoredSet() {
249         return _domains_stored_set;
250     }
251
252     private double getFsEValueMaximum() {
253         return _fs_e_value_maximum;
254     }
255
256     private double getIEValueMaximum() {
257         return _i_e_value_maximum;
258     }
259
260     private Set<String> getFilter() {
261         return _filter;
262     }
263
264     private FilterType getFilterType() {
265         return _filter_type;
266     }
267
268     public INDIVIDUAL_SCORE_CUTOFF getIndividualCutoffAppliesTo() {
269         return _ind_cutoff;
270     }
271
272     private Map<String, Double> getIndividualScoreCutoffs() {
273         return _individual_score_cutoffs;
274     }
275
276     private File getInputFile() {
277         return _input_file;
278     }
279
280     private int getMaxAllowedOverlap() {
281         return _max_allowed_overlap;
282     }
283
284     public int getProteinsEncountered() {
285         return _proteins_encountered;
286     }
287
288     public int getProteinsIgnoredDueToFilter() {
289         return _proteins_ignored_due_to_filter;
290     }
291
292     public int getProteinsStored() {
293         return _proteins_stored;
294     }
295
296     private ReturnType getReturnType() {
297         return _return_type;
298     }
299
300     private String getSpecies() {
301         return _species;
302     }
303
304     public long getTime() {
305         return _time;
306     }
307
308     private void init() {
309         _fs_e_value_maximum = HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT;
310         _i_e_value_maximum = HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT;
311         setIgnoreDufs( HmmscanPerDomainTableParser.IGNORE_DUFS_DEFAULT );
312         setReturnType( HmmscanPerDomainTableParser.RETURN_TYPE_DEFAULT );
313         _max_allowed_overlap = HmmscanPerDomainTableParser.MAX_ALLOWED_OVERLAP_DEFAULT;
314         setIndividualScoreCutoffs( null );
315         setIgnoreEngulfedDomains( false );
316         setIgnoreVirusLikeIds( false );
317         intitCounts();
318     }
319
320     private void intitCounts() {
321         setDomainsStoredSet( new TreeSet<String>() );
322         setDomainsEncountered( 0 );
323         setProteinsEncountered( 0 );
324         setProteinsIgnoredDueToFilter( 0 );
325         setDomainsIgnoredDueToNegativeFilter( 0 );
326         setDomainsIgnoredDueToDuf( 0 );
327         setDomainsIgnoredDueToFsEval( 0 );
328         setDomainsIgnoredDueToIEval( 0 );
329         setDomainsIgnoredDueToIndividualScoreCutoff( 0 );
330         setDomainsIgnoredDueToVirusLikeId( 0 );
331         setDomainsIgnoredDueToOverlap( 0 );
332         setDomainsStored( 0 );
333         setProteinsStored( 0 );
334         setTime( 0 );
335         setDomainsIgnoredDueToVirusLikeIdCountsMap( new TreeMap<String, Integer>() );
336         setDomainsIgnoredDueToNegativeDomainFilterCountsMap( new TreeMap<String, Integer>() );
337     }
338
339     private boolean isIgnoreDufs() {
340         return _ignore_dufs;
341     }
342
343     private boolean isIgnoreEngulfedDomains() {
344         return _ignore_engulfed_domains;
345     }
346
347     private boolean isIgnoreVirusLikeIds() {
348         return _ignore_virus_like_ids;
349     }
350
351     public List<Protein> parse() throws IOException {
352         if ( ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.NONE )
353                 && ( ( getIndividualScoreCutoffs() == null ) || ( getIndividualScoreCutoffs().size() < 1 ) ) ) {
354             throw new RuntimeException( "attempt to use individual cuttoffs with having set them" );
355         }
356         intitCounts();
357         final Set<String> prev_queries = new HashSet<String>();
358         final String error = ForesterUtil.isReadableFile( getInputFile() );
359         if ( !ForesterUtil.isEmpty( error ) ) {
360             throw new IOException( error );
361         }
362         final BufferedReader br = new BufferedReader( new FileReader( getInputFile() ) );
363         String line;
364         final List<Protein> proteins = new ArrayList<Protein>();
365         Protein current_protein = null;
366         int line_number = 0;
367         final long start_time = new Date().getTime();
368         String prev_query = "";
369         int prev_qlen = -1;
370         while ( ( line = br.readLine() ) != null ) {
371             line_number++;
372             if ( ForesterUtil.isEmpty( line ) || line.startsWith( "#" ) ) {
373                 continue;
374             }
375             // 0                    1           2    3                      4           5      6        7      8      9  10  11        12        13     14    15      16  17      18  19      20  21  22
376             // #                                                                              --- full sequence --- -------------- this domain -------------   hmm coord   ali coord   env coord
377             // # target name        accession   tlen query name             accession   qlen   E-value  score  bias   #  of  c-Evalue  i-Evalue  score  bias  from    to  from    to  from    to  acc description of target
378             // #------------------- ---------- -----   -------------------- ---------- ----- --------- ------ ----- --- --- --------- --------- ------ ----- ----- ----- ----- ----- ----- ----- ---- ---------------------
379             // Ion_trans            PF00520.24   201 jgi|Nemve1|7|gw.28.1.1 -           1604  6.3e-169  557.4  95.3   1   4   1.5e-41     3e-38  130.8  11.1     3   171   140   307   139   346 0.81 Ion transport protein
380             // Ion_trans            PF00520.24   201 jgi|Nemve1|7|gw.28.1.1 -           1604  6.3e-169  557.4  95.3   2   4   9.1e-45   1.8e-41  141.3  13.1     4   200   479   664   476   665 0.97 Ion transport protein
381             // Ion_trans            PF00520.24   201 jgi|Nemve1|7|gw.28.1.1 -           1604  6.3e-169  557.4  95.3   3   4   5.2e-45     1e-41  142.1  14.0     1   201   900  1117   900  1117 0.96 Ion transport protein
382             // Ion_trans            PF00520.24   201 jgi|Nemve1|7|gw.28.1.1 -           1604  6.3e-169  557.4  95.3   4   4   9.2e-51   1.8e-47  160.9  11.3     1   201  1217  1423  1217  1423 0.97 Ion transport protein
383             // PKD_channel          PF08016.5    426 jgi|Nemve1|7|gw.28.1.1 -           1604   5.9e-19   67.4  70.5   1   8   0.00053       1.1    7.3   0.4   220   264   142   191   134   200 0.73 Polycystin cation channel
384             final String tokens[] = line.split( "\\s+" );
385             final String target_id = tokens[ 0 ];
386             final String target_acc = tokens[ 1 ];
387             final int tlen = parseInt( tokens[ 2 ], line_number, "tlen" );
388             final String query = tokens[ 3 ];
389             final String query_acc = tokens[ 4 ];
390             final int qlen = parseInt( tokens[ 5 ], line_number, "qlen" );
391             final double fs_e_value = parseDouble( tokens[ 6 ], line_number, "E-value" );
392             final double fs_score = parseDouble( tokens[ 7 ], line_number, "score" );
393             final int domain_number = parseInt( tokens[ 9 ], line_number, "count" );
394             final int total_domains = parseInt( tokens[ 10 ], line_number, "total" );
395             final double c_e_value = parseDouble( tokens[ 11 ], line_number, "c-Evalue" );
396             final double i_e_value = parseDouble( tokens[ 12 ], line_number, "i-Evalue" );
397             final double domain_score = parseDouble( tokens[ 13 ], line_number, "score" );
398             final int hmm_from = parseInt( tokens[ 15 ], line_number, "hmm from" );
399             final int hmm_to = parseInt( tokens[ 16 ], line_number, "hmm to" );
400             final int ali_from = parseInt( tokens[ 17 ], line_number, "ali from" );
401             final int ali_to = parseInt( tokens[ 18 ], line_number, "ali to" );
402             final int env_from = parseInt( tokens[ 19 ], line_number, "env from" );
403             final int env_to = parseInt( tokens[ 20 ], line_number, "env to" );
404             ++_domains_encountered;
405             if ( !query.equals( prev_query ) || ( qlen != prev_qlen ) ) {
406                 if ( !isAllowProteinsWithSameName() ) {
407                     if ( query.equals( prev_query ) ) {
408                         throw new IOException( "more than one protein named [" + query + "]" + " lengths: " + qlen
409                                                + ", " + prev_qlen );
410                     }
411                     if ( prev_queries.contains( query ) ) {
412                         throw new IOException( "more than one protein named [" + query + "]" );
413                     }
414                 }
415                 prev_query = query;
416                 prev_qlen = qlen;
417                 prev_queries.add( query );
418                 if ( ( current_protein != null ) && ( current_protein.getProteinDomains().size() > 0 ) ) {
419                     addProtein( proteins, current_protein );
420                 }
421                 if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) {
422                     current_protein = new BasicProtein( query, getSpecies(), qlen );
423                 }
424                 else {
425                     throw new IllegalArgumentException( "unknown return type" );
426                 }
427             }
428             boolean failed_cutoff = false;
429             if ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.NONE ) {
430                 if ( getIndividualScoreCutoffs().containsKey( target_id ) ) {
431                     final double cutoff = getIndividualScoreCutoffs().get( target_id );
432                     if ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.FULL_SEQUENCE ) {
433                         if ( fs_score < cutoff ) {
434                             failed_cutoff = true;
435                         }
436                     }
437                     else if ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.DOMAIN ) {
438                         if ( domain_score < cutoff ) {
439                             failed_cutoff = true;
440                         }
441                     }
442                 }
443                 else {
444                     throw new IOException( "could not find a score cutoff value for domain id \"" + target_id
445                                            + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
446                 }
447             }
448             final String uc_id = target_id.toUpperCase();
449             if ( failed_cutoff ) {
450                 ++_domains_ignored_due_to_individual_score_cutoff;
451             }
452             else if ( ali_from == ali_to ) {
453                 //Ignore
454             }
455             else if ( ( getFsEValueMaximum() != HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT )
456                     && ( fs_e_value > getFsEValueMaximum() ) ) {
457                 ++_domains_ignored_due_to_fs_e_value;
458             }
459             else if ( ( getIEValueMaximum() != HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT )
460                     && ( i_e_value > getIEValueMaximum() ) ) {
461                 ++_domains_ignored_due_to_i_e_value;
462             }
463             else if ( isIgnoreDufs() && uc_id.startsWith( "DUF" ) ) {
464                 ++_domains_ignored_due_to_duf;
465             }
466             else if ( IGNORE_REPLACED_RRMS
467                     && ( uc_id.contains( "RRM_1" ) || uc_id.contains( "RRM_3" ) || uc_id.contains( "RRM_5" ) || uc_id
468                             .contains( "RRM_6" ) ) ) {
469             }
470             else if ( IGNORE_hGDE_amylase && ( uc_id.equals( "hGDE_amylase" ) ) ) {
471             }
472             else if ( isIgnoreVirusLikeIds()
473                     && ( uc_id.contains( VIR ) || uc_id.contains( PHAGE ) || uc_id.contains( RETRO )
474                             || uc_id.contains( TRANSPOS ) || uc_id.startsWith( RV ) || uc_id.startsWith( GAG )
475                             || uc_id.startsWith( HCV ) || uc_id.startsWith( HERPES ) || uc_id.startsWith( BACULO ) ) ) {
476                 ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToVirusLikeIdCountsMap(), target_id );
477                 ++_domains_ignored_due_to_virus_like_id;
478             }
479             else if ( ( getFilterType() == FilterType.NEGATIVE_DOMAIN ) && getFilter().contains( target_id ) ) {
480                 ++_domains_ignored_due_to_negative_domain_filter;
481                 ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToNegativeDomainFilterCountsMap(), target_id );
482             }
483             else {
484                 try {
485                     final Domain pd = new BasicDomain( target_id,
486                                                        ali_from,
487                                                        ali_to,
488                                                        ( short ) domain_number,
489                                                        ( short ) total_domains,
490                                                        i_e_value,
491                                                        domain_score );
492                     current_protein.addProteinDomain( pd );
493                 }
494                 catch ( final IllegalArgumentException e ) {
495                     throw new IOException( "problem with domain parsing at line " + line_number + "[" + line + "]: "
496                             + e.getMessage() );
497                 }
498                 ++_domains_stored;
499             }
500         } // while ( ( line = br.readLine() ) != null )
501         if ( ( current_protein != null ) && ( current_protein.getProteinDomains().size() > 0 ) ) {
502             addProtein( proteins, current_protein );
503         }
504         setProteinsEncountered( prev_queries.size() );
505         setTime( new Date().getTime() - start_time );
506         return proteins;
507     }
508
509     private double parseDouble( final String double_str, final int line_number, final String label ) throws IOException {
510         double d = -1;
511         try {
512             d = Double.valueOf( double_str ).doubleValue();
513         }
514         catch ( final NumberFormatException e ) {
515             throw new IOException( "could not parse \" +label + \" from \"" + double_str + "\" [line " + line_number
516                                    + "] in [" + getInputFile().getCanonicalPath() + "]" );
517         }
518         return d;
519     }
520
521     private int parseInt( final String double_str, final int line_number, final String label ) throws IOException {
522         int i = -1;
523         try {
524             i = Integer.valueOf( double_str ).intValue();
525         }
526         catch ( final NumberFormatException e ) {
527             throw new IOException( "could not parse \"" + label + "\" from \"" + double_str + "\" [line " + line_number
528                                    + "] in [" + getInputFile().getCanonicalPath() + "]" );
529         }
530         return i;
531     }
532
533     private void setDomainsEncountered( final int domains_encountered ) {
534         _domains_encountered = domains_encountered;
535     }
536
537     private void setDomainsIgnoredDueToDuf( final int domains_ignored_due_to_duf ) {
538         _domains_ignored_due_to_duf = domains_ignored_due_to_duf;
539     }
540
541     private void setDomainsIgnoredDueToFsEval( final int domains_ignored_due_to_fs_e_value ) {
542         _domains_ignored_due_to_fs_e_value = domains_ignored_due_to_fs_e_value;
543     }
544
545     private void setDomainsIgnoredDueToIEval( final int domains_ignored_due_to_i_e_value ) {
546         _domains_ignored_due_to_i_e_value = domains_ignored_due_to_i_e_value;
547     }
548
549     private void setDomainsIgnoredDueToIndividualScoreCutoff( final int domains_ignored_due_to_individual_score_cutoff ) {
550         _domains_ignored_due_to_individual_score_cutoff = domains_ignored_due_to_individual_score_cutoff;
551     }
552
553     private void setDomainsIgnoredDueToNegativeDomainFilterCountsMap( final Map<String, Integer> domains_ignored_due_to_negative_domain_filter_counts_map ) {
554         _domains_ignored_due_to_negative_domain_filter_counts_map = domains_ignored_due_to_negative_domain_filter_counts_map;
555     }
556
557     private void setDomainsIgnoredDueToNegativeFilter( final int domains_ignored_due_to_negative_domain_filter ) {
558         _domains_ignored_due_to_negative_domain_filter = domains_ignored_due_to_negative_domain_filter;
559     }
560
561     private void setDomainsIgnoredDueToOverlap( final int domains_ignored_due_to_overlap ) {
562         _domains_ignored_due_to_overlap = domains_ignored_due_to_overlap;
563     }
564
565     private void setDomainsIgnoredDueToVirusLikeId( final int i ) {
566         _domains_ignored_due_to_virus_like_id = i;
567     }
568
569     private void setDomainsIgnoredDueToVirusLikeIdCountsMap( final Map<String, Integer> domains_ignored_due_to_virus_like_id_counts_map ) {
570         _domains_ignored_due_to_virus_like_id_counts_map = domains_ignored_due_to_virus_like_id_counts_map;
571     }
572
573     private void setDomainsStored( final int domains_stored ) {
574         _domains_stored = domains_stored;
575     }
576
577     private void setDomainsStoredSet( final SortedSet<String> _storeddomains_stored ) {
578         _domains_stored_set = _storeddomains_stored;
579     }
580
581     public void setFsEValueMaximum( final double fs_e_value_maximum ) {
582         if ( fs_e_value_maximum < 0.0 ) {
583             throw new IllegalArgumentException( "attempt to set the maximum E-value to a negative value" );
584         }
585         _fs_e_value_maximum = fs_e_value_maximum;
586     }
587
588     public void setIEValueMaximum( final double i_e_value_maximum ) {
589         if ( i_e_value_maximum < 0.0 ) {
590             throw new IllegalArgumentException( "attempt to set the maximum E-value to a negative value" );
591         }
592         _i_e_value_maximum = i_e_value_maximum;
593     }
594
595     public void setIgnoreDufs( final boolean ignore_dufs ) {
596         _ignore_dufs = ignore_dufs;
597     }
598
599     /**
600      * To ignore domains which are completely engulfed by domains (individual
601      * ones or stretches of overlapping ones) with better support values.
602      *
603      *
604      * @param ignored_engulfed_domains
605      */
606     public void setIgnoreEngulfedDomains( final boolean ignore_engulfed_domains ) {
607         _ignore_engulfed_domains = ignore_engulfed_domains;
608     }
609
610     public void setIgnoreVirusLikeIds( final boolean ignore_virus_like_ids ) {
611         _ignore_virus_like_ids = ignore_virus_like_ids;
612     }
613
614     /**
615      * Sets the individual  score cutoff values (for example, gathering
616      * thresholds from Pfam). Domain ids are the keys, cutoffs the values.
617      *
618      * @param individual_score_cutoffs
619      */
620     public void setIndividualScoreCutoffs( final Map<String, Double> individual_score_cutoffs ) {
621         _individual_score_cutoffs = individual_score_cutoffs;
622     }
623
624     public void setMaxAllowedOverlap( final int max_allowed_overlap ) {
625         if ( max_allowed_overlap < 0 ) {
626             throw new IllegalArgumentException( "Attempt to set max allowed overlap to less than zero." );
627         }
628         _max_allowed_overlap = max_allowed_overlap;
629     }
630
631     private void setProteinsEncountered( final int proteins_encountered ) {
632         _proteins_encountered = proteins_encountered;
633     }
634
635     private void setProteinsIgnoredDueToFilter( final int proteins_ignored_due_to_filter ) {
636         _proteins_ignored_due_to_filter = proteins_ignored_due_to_filter;
637     }
638
639     private void setProteinsStored( final int proteins_stored ) {
640         _proteins_stored = proteins_stored;
641     }
642
643     public void setReturnType( final ReturnType return_type ) {
644         _return_type = return_type;
645     }
646
647     private void setTime( final long time ) {
648         _time = time;
649     }
650
651     public static enum FilterType {
652         NONE, POSITIVE_PROTEIN, NEGATIVE_PROTEIN, NEGATIVE_DOMAIN
653     }
654
655     static public enum INDIVIDUAL_SCORE_CUTOFF {
656         FULL_SEQUENCE, DOMAIN, NONE;
657     }
658
659     public static enum ReturnType {
660         UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN
661     }
662 }