in progress
[jalview.git] / forester / java / src / org / forester / io / parsers / HmmscanPerDomainTableParser.java
1 // $Id:
2 // $
3 //
4 // FORESTER -- software libraries and applications
5 // for evolutionary biology research and applications.
6 //
7 // Copyright (C) 2008-2009 Christian M. Zmasek
8 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // All rights reserved
10 //
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
15 //
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
20 //
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 //
25 // Contact: phylosoft @ gmail . com
26 // WWW: www.phylosoft.org/forester
27
28 package org.forester.io.parsers;
29
30 import java.io.BufferedReader;
31 import java.io.File;
32 import java.io.FileReader;
33 import java.io.IOException;
34 import java.util.ArrayList;
35 import java.util.Date;
36 import java.util.HashSet;
37 import java.util.List;
38 import java.util.Map;
39 import java.util.Set;
40 import java.util.SortedSet;
41 import java.util.TreeMap;
42 import java.util.TreeSet;
43
44 import org.forester.surfacing.BasicDomain;
45 import org.forester.surfacing.BasicProtein;
46 import org.forester.surfacing.Domain;
47 import org.forester.surfacing.DomainId;
48 import org.forester.surfacing.Protein;
49 import org.forester.surfacing.SurfacingUtil;
50 import org.forester.util.ForesterUtil;
51
52 public final class HmmscanPerDomainTableParser {
53
54     private static final String           RETRO                       = "RETRO";
55     private static final String           PHAGE                       = "PHAGE";
56     private static final String           VIR                         = "VIR";
57     private static final String           TRANSPOS                    = "TRANSPOS";
58     private static final String           RV                          = "RV";
59     private static final String           GAG                         = "GAG_";
60     private static final String           HCV                         = "HCV_";
61     private static final String           HERPES                      = "HERPES_";
62     private static final String           BACULO                      = "BACULO_";
63     private static final int              E_VALUE_MAXIMUM_DEFAULT     = -1;
64     private static final ReturnType       RETURN_TYPE_DEFAULT         = ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN;
65     private static final boolean          IGNORE_DUFS_DEFAULT         = false;
66     private static final int              MAX_ALLOWED_OVERLAP_DEFAULT = -1;
67     private final Set<DomainId>           _filter;
68     private final FilterType              _filter_type;
69     private final File                    _input_file;
70     private final String                  _species;
71     private double                        _e_value_maximum;
72     private Map<String, Double>           _individual_score_cutoffs;
73     private boolean                       _ignore_dufs;
74     private boolean                       _ignore_virus_like_ids;
75     private int                           _max_allowed_overlap;
76     private boolean                       _ignore_engulfed_domains;
77     private ReturnType                    _return_type;
78     private int                           _proteins_encountered;
79     private int                           _proteins_ignored_due_to_filter;
80     private int                           _proteins_stored;
81     private int                           _domains_encountered;
82     private int                           _domains_ignored_due_to_duf;
83     private int                           _domains_ignored_due_to_overlap;
84     private int                           _domains_ignored_due_to_e_value;
85     private int                           _domains_ignored_due_to_individual_score_cutoff;
86     private int                           _domains_stored;
87     private SortedSet<DomainId>           _domains_stored_set;
88     private long                          _time;
89     private int                           _domains_ignored_due_to_negative_domain_filter;
90     private Map<String, Integer>          _domains_ignored_due_to_negative_domain_filter_counts_map;
91     private int                           _domains_ignored_due_to_virus_like_id;
92     private Map<String, Integer>          _domains_ignored_due_to_virus_like_id_counts_map;
93     private final INDIVIDUAL_SCORE_CUTOFF _ind_cutoff;
94
95     public HmmscanPerDomainTableParser( final File input_file,
96                                         final String species,
97                                         final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to ) {
98         _input_file = input_file;
99         _species = species;
100         _filter = null;
101         _filter_type = FilterType.NONE;
102         _ind_cutoff = individual_cutoff_applies_to;
103         init();
104     }
105
106     public HmmscanPerDomainTableParser( final File input_file,
107                                         final String species,
108                                         final Set<DomainId> filter,
109                                         final FilterType filter_type,
110                                         final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to ) {
111         _input_file = input_file;
112         _species = species;
113         _filter = filter;
114         _filter_type = filter_type;
115         _ind_cutoff = individual_cutoff_applies_to;
116         init();
117     }
118
119     private void actuallyAddProtein( final List<Protein> proteins, final Protein current_protein ) {
120         final List<Domain> l = current_protein.getProteinDomains();
121         for( final Domain d : l ) {
122             getDomainsStoredSet().add( d.getDomainId() );
123         }
124         proteins.add( current_protein );
125         ++_proteins_stored;
126     }
127
128     private void addProtein( final List<Protein> proteins, Protein current_protein ) {
129         if ( ( getMaxAllowedOverlap() != HmmscanPerDomainTableParser.MAX_ALLOWED_OVERLAP_DEFAULT )
130                 || isIgnoreEngulfedDomains() ) {
131             final int domains_count = current_protein.getNumberOfProteinDomains();
132             current_protein = SurfacingUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
133                                                                       isIgnoreEngulfedDomains(),
134                                                                       current_protein );
135             final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains();
136             _domains_stored -= domains_removed;
137             _domains_ignored_due_to_overlap += domains_removed;
138         }
139         if ( ( getFilterType() == FilterType.POSITIVE_PROTEIN ) || ( getFilterType() == FilterType.NEGATIVE_PROTEIN ) ) {
140             final Set<DomainId> domain_ids_in_protein = new HashSet<DomainId>();
141             for( final Domain d : current_protein.getProteinDomains() ) {
142                 domain_ids_in_protein.add( d.getDomainId() );
143             }
144             domain_ids_in_protein.retainAll( getFilter() );
145             if ( getFilterType() == FilterType.POSITIVE_PROTEIN ) {
146                 if ( domain_ids_in_protein.size() > 0 ) {
147                     actuallyAddProtein( proteins, current_protein );
148                 }
149                 else {
150                     ++_proteins_ignored_due_to_filter;
151                 }
152             }
153             else {
154                 if ( domain_ids_in_protein.size() < 1 ) {
155                     actuallyAddProtein( proteins, current_protein );
156                 }
157                 else {
158                     ++_proteins_ignored_due_to_filter;
159                 }
160             }
161         }
162         else {
163             actuallyAddProtein( proteins, current_protein );
164         }
165     }
166
167     public int getDomainsEncountered() {
168         return _domains_encountered;
169     }
170
171     public int getDomainsIgnoredDueToDuf() {
172         return _domains_ignored_due_to_duf;
173     }
174
175     public int getDomainsIgnoredDueToEval() {
176         return _domains_ignored_due_to_e_value;
177     }
178
179     public int getDomainsIgnoredDueToIndividualScoreCutoff() {
180         return _domains_ignored_due_to_individual_score_cutoff;
181     }
182
183     public int getDomainsIgnoredDueToNegativeDomainFilter() {
184         return _domains_ignored_due_to_negative_domain_filter;
185     }
186
187     public Map<String, Integer> getDomainsIgnoredDueToNegativeDomainFilterCountsMap() {
188         return _domains_ignored_due_to_negative_domain_filter_counts_map;
189     }
190
191     public int getDomainsIgnoredDueToOverlap() {
192         return _domains_ignored_due_to_overlap;
193     }
194
195     public Map<String, Integer> getDomainsIgnoredDueToVirusLikeIdCountsMap() {
196         return _domains_ignored_due_to_virus_like_id_counts_map;
197     }
198
199     public int getDomainsIgnoredDueToVirusLikeIds() {
200         return _domains_ignored_due_to_virus_like_id;
201     }
202
203     public int getDomainsStored() {
204         return _domains_stored;
205     }
206
207     public SortedSet<DomainId> getDomainsStoredSet() {
208         return _domains_stored_set;
209     }
210
211     private double getEValueMaximum() {
212         return _e_value_maximum;
213     }
214
215     private Set<DomainId> getFilter() {
216         return _filter;
217     }
218
219     private FilterType getFilterType() {
220         return _filter_type;
221     }
222
223     public INDIVIDUAL_SCORE_CUTOFF getIndividualCutoffAppliesTo() {
224         return _ind_cutoff;
225     }
226
227     private Map<String, Double> getIndividualScoreCutoffs() {
228         return _individual_score_cutoffs;
229     }
230
231     private File getInputFile() {
232         return _input_file;
233     }
234
235     private int getMaxAllowedOverlap() {
236         return _max_allowed_overlap;
237     }
238
239     public int getProteinsEncountered() {
240         return _proteins_encountered;
241     }
242
243     public int getProteinsIgnoredDueToFilter() {
244         return _proteins_ignored_due_to_filter;
245     }
246
247     public int getProteinsStored() {
248         return _proteins_stored;
249     }
250
251     private ReturnType getReturnType() {
252         return _return_type;
253     }
254
255     private String getSpecies() {
256         return _species;
257     }
258
259     public long getTime() {
260         return _time;
261     }
262
263     private void init() {
264         _e_value_maximum = HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT;
265         setIgnoreDufs( HmmscanPerDomainTableParser.IGNORE_DUFS_DEFAULT );
266         setReturnType( HmmscanPerDomainTableParser.RETURN_TYPE_DEFAULT );
267         _max_allowed_overlap = HmmscanPerDomainTableParser.MAX_ALLOWED_OVERLAP_DEFAULT;
268         setIndividualScoreCutoffs( null );
269         setIgnoreEngulfedDomains( false );
270         setIgnoreVirusLikeIds( false );
271         intitCounts();
272     }
273
274     private void intitCounts() {
275         setDomainsStoredSet( new TreeSet<DomainId>() );
276         setDomainsEncountered( 0 );
277         setProteinsEncountered( 0 );
278         setProteinsIgnoredDueToFilter( 0 );
279         setDomainsIgnoredDueToNegativeFilter( 0 );
280         setDomainsIgnoredDueToDuf( 0 );
281         setDomainsIgnoredDueToEval( 0 );
282         setDomainsIgnoredDueToIndividualScoreCutoff( 0 );
283         setDomainsIgnoredDueToVirusLikeId( 0 );
284         setDomainsIgnoredDueToOverlap( 0 );
285         setDomainsStored( 0 );
286         setProteinsStored( 0 );
287         setTime( 0 );
288         setDomainsIgnoredDueToVirusLikeIdCountsMap( new TreeMap<String, Integer>() );
289         setDomainsIgnoredDueToNegativeDomainFilterCountsMap( new TreeMap<String, Integer>() );
290     }
291
292     private boolean isIgnoreDufs() {
293         return _ignore_dufs;
294     }
295
296     private boolean isIgnoreEngulfedDomains() {
297         return _ignore_engulfed_domains;
298     }
299
300     private boolean isIgnoreVirusLikeIds() {
301         return _ignore_virus_like_ids;
302     }
303
304     public List<Protein> parse() throws IOException {
305         if ( ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.NONE )
306                 && ( ( getIndividualScoreCutoffs() == null ) || ( getIndividualScoreCutoffs().size() < 1 ) ) ) {
307             throw new RuntimeException( "attempt to use individual cuttoffs with having set them" );
308         }
309         intitCounts();
310         final Set<String> prev_queries = new HashSet<String>();
311         final String error = ForesterUtil.isReadableFile( getInputFile() );
312         if ( !ForesterUtil.isEmpty( error ) ) {
313             throw new IOException( error );
314         }
315         final BufferedReader br = new BufferedReader( new FileReader( getInputFile() ) );
316         String line;
317         final List<Protein> proteins = new ArrayList<Protein>();
318         Protein current_protein = null;
319         int line_number = 0;
320         final long start_time = new Date().getTime();
321         String prev_query = "";
322         int prev_qlen = -1;
323         while ( ( line = br.readLine() ) != null ) {
324             line_number++;
325             if ( ForesterUtil.isEmpty( line ) || line.startsWith( "#" ) ) {
326                 continue;
327             }
328             // 0                    1           2    3                      4           5      6        7      8      9  10  11        12        13     14    15      16  17      18  19      20  21  22      
329             // #                                                                              --- full sequence --- -------------- this domain -------------   hmm coord   ali coord   env coord
330             // # target name        accession   tlen query name             accession   qlen   E-value  score  bias   #  of  c-Evalue  i-Evalue  score  bias  from    to  from    to  from    to  acc description of target
331             // #------------------- ---------- -----   -------------------- ---------- ----- --------- ------ ----- --- --- --------- --------- ------ ----- ----- ----- ----- ----- ----- ----- ---- ---------------------
332             // Ion_trans            PF00520.24   201 jgi|Nemve1|7|gw.28.1.1 -           1604  6.3e-169  557.4  95.3   1   4   1.5e-41     3e-38  130.8  11.1     3   171   140   307   139   346 0.81 Ion transport protein
333             // Ion_trans            PF00520.24   201 jgi|Nemve1|7|gw.28.1.1 -           1604  6.3e-169  557.4  95.3   2   4   9.1e-45   1.8e-41  141.3  13.1     4   200   479   664   476   665 0.97 Ion transport protein
334             // Ion_trans            PF00520.24   201 jgi|Nemve1|7|gw.28.1.1 -           1604  6.3e-169  557.4  95.3   3   4   5.2e-45     1e-41  142.1  14.0     1   201   900  1117   900  1117 0.96 Ion transport protein
335             // Ion_trans            PF00520.24   201 jgi|Nemve1|7|gw.28.1.1 -           1604  6.3e-169  557.4  95.3   4   4   9.2e-51   1.8e-47  160.9  11.3     1   201  1217  1423  1217  1423 0.97 Ion transport protein
336             // PKD_channel          PF08016.5    426 jgi|Nemve1|7|gw.28.1.1 -           1604   5.9e-19   67.4  70.5   1   8   0.00053       1.1    7.3   0.4   220   264   142   191   134   200 0.73 Polycystin cation channel
337             final String tokens[] = line.split( "\\s+" );
338             final String target_id = tokens[ 0 ];
339             final String target_acc = tokens[ 1 ];
340             final int tlen = parseInt( tokens[ 2 ], line_number, "tlen" );
341             final String query = tokens[ 3 ];
342             final String query_acc = tokens[ 4 ];
343             final int qlen = parseInt( tokens[ 5 ], line_number, "qlen" );
344             final double fs_e_value = parseDouble( tokens[ 6 ], line_number, "E-value" );
345             final double fs_score = parseDouble( tokens[ 7 ], line_number, "score" );
346             final int domain_number = parseInt( tokens[ 9 ], line_number, "count" );
347             final int total_domains = parseInt( tokens[ 10 ], line_number, "total" );
348             final double c_e_value = parseDouble( tokens[ 11 ], line_number, "c-Evalue" );
349             final double i_e_value = parseDouble( tokens[ 12 ], line_number, "i-Evalue" );
350             final double domain_score = parseDouble( tokens[ 13 ], line_number, "score" );
351             final int hmm_from = parseInt( tokens[ 15 ], line_number, "hmm from" );
352             final int hmm_to = parseInt( tokens[ 16 ], line_number, "hmm to" );
353             final int ali_from = parseInt( tokens[ 17 ], line_number, "ali from" );
354             final int ali_to = parseInt( tokens[ 18 ], line_number, "ali to" );
355             final int env_from = parseInt( tokens[ 19 ], line_number, "env from" );
356             final int env_to = parseInt( tokens[ 20 ], line_number, "env to" );
357             ++_domains_encountered;
358             if ( !query.equals( prev_query ) || ( qlen != prev_qlen ) ) {
359                 if ( query.equals( prev_query ) ) {
360                     throw new IOException( "more than one protein named [" + query + "]" + " lengths: " + qlen + ", "
361                             + prev_qlen );
362                 }
363                 if ( prev_queries.contains( query ) ) {
364                     throw new IOException( "more than one protein named [" + query + "]" );
365                 }
366                 prev_query = query;
367                 prev_qlen = qlen;
368                 prev_queries.add( query );
369                 if ( ( current_protein != null ) && ( current_protein.getProteinDomains().size() > 0 ) ) {
370                     addProtein( proteins, current_protein );
371                 }
372                 if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) {
373                     current_protein = new BasicProtein( query, getSpecies() );
374                 }
375                 else {
376                     throw new IllegalArgumentException( "unknown return type" );
377                 }
378             }
379             boolean failed_cutoff = false;
380             if ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.NONE ) {
381                 if ( getIndividualScoreCutoffs().containsKey( target_id ) ) {
382                     final double cutoff = getIndividualScoreCutoffs().get( target_id );
383                     if ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.FULL_SEQUENCE ) {
384                         if ( fs_score < cutoff ) {
385                             failed_cutoff = true;
386                         }
387                     }
388                     else if ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.DOMAIN ) {
389                         if ( domain_score < cutoff ) {
390                             failed_cutoff = true;
391                         }
392                     }
393                 }
394                 else {
395                     throw new IOException( "could not find a score cutoff value for domain id \"" + target_id
396                             + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
397                 }
398             }
399             final String uc_id = target_id.toUpperCase();
400             if ( failed_cutoff ) {
401                 ++_domains_ignored_due_to_individual_score_cutoff;
402             }
403             else if ( ali_from == ali_to ) {
404                 //Ignore
405             }
406             else if ( ( getEValueMaximum() != HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT )
407                     && ( fs_e_value > getEValueMaximum() ) ) {
408                 ++_domains_ignored_due_to_e_value;
409             }
410             else if ( isIgnoreDufs() && uc_id.startsWith( "DUF" ) ) {
411                 ++_domains_ignored_due_to_duf;
412             }
413             else if ( isIgnoreVirusLikeIds()
414                     && ( uc_id.contains( VIR ) || uc_id.contains( PHAGE ) || uc_id.contains( RETRO )
415                             || uc_id.contains( TRANSPOS ) || uc_id.startsWith( RV ) || uc_id.startsWith( GAG )
416                             || uc_id.startsWith( HCV ) || uc_id.startsWith( HERPES ) || uc_id.startsWith( BACULO ) ) ) {
417                 ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToVirusLikeIdCountsMap(), target_id );
418                 ++_domains_ignored_due_to_virus_like_id;
419             }
420             else if ( ( getFilterType() == FilterType.NEGATIVE_DOMAIN )
421                     && getFilter().contains( new DomainId( target_id ) ) ) {
422                 ++_domains_ignored_due_to_negative_domain_filter;
423                 ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToNegativeDomainFilterCountsMap(), target_id );
424             }
425             else {
426                 try {
427                     final Domain pd = new BasicDomain( target_id,
428                                                        ali_from,
429                                                        ali_to,
430                                                        ( short ) domain_number,
431                                                        ( short ) total_domains,
432                                                        fs_e_value,
433                                                        fs_score,
434                                                        i_e_value,
435                                                        domain_score );
436                     current_protein.addProteinDomain( pd );
437                 }
438                 catch ( final IllegalArgumentException e ) {
439                     throw new IOException( "problem with domain parsing at line " + line_number + "[" + line + "]: "
440                             + e.getMessage() );
441                 }
442                 ++_domains_stored;
443             }
444         } // while ( ( line = br.readLine() ) != null )
445         if ( ( current_protein != null ) && ( current_protein.getProteinDomains().size() > 0 ) ) {
446             addProtein( proteins, current_protein );
447         }
448         setProteinsEncountered( prev_queries.size() );
449         setTime( new Date().getTime() - start_time );
450         return proteins;
451     }
452
453     private double parseDouble( final String double_str, final int line_number, final String label ) throws IOException {
454         double d = -1;
455         try {
456             d = Double.valueOf( double_str ).doubleValue();
457         }
458         catch ( final NumberFormatException e ) {
459             throw new IOException( "could not parse \" +label + \" from \"" + double_str + "\" [line " + line_number
460                     + "] in [" + getInputFile().getCanonicalPath() + "]" );
461         }
462         return d;
463     }
464
465     private int parseInt( final String double_str, final int line_number, final String label ) throws IOException {
466         int i = -1;
467         try {
468             i = Integer.valueOf( double_str ).intValue();
469         }
470         catch ( final NumberFormatException e ) {
471             throw new IOException( "could not parse \"" + label + "\" from \"" + double_str + "\" [line " + line_number
472                     + "] in [" + getInputFile().getCanonicalPath() + "]" );
473         }
474         return i;
475     }
476
477     private void setDomainsEncountered( final int domains_encountered ) {
478         _domains_encountered = domains_encountered;
479     }
480
481     private void setDomainsIgnoredDueToDuf( final int domains_ignored_due_to_duf ) {
482         _domains_ignored_due_to_duf = domains_ignored_due_to_duf;
483     }
484
485     private void setDomainsIgnoredDueToEval( final int domains_ignored_due_to_e_value ) {
486         _domains_ignored_due_to_e_value = domains_ignored_due_to_e_value;
487     }
488
489     private void setDomainsIgnoredDueToIndividualScoreCutoff( final int domains_ignored_due_to_individual_score_cutoff ) {
490         _domains_ignored_due_to_individual_score_cutoff = domains_ignored_due_to_individual_score_cutoff;
491     }
492
493     private void setDomainsIgnoredDueToNegativeDomainFilterCountsMap( final Map<String, Integer> domains_ignored_due_to_negative_domain_filter_counts_map ) {
494         _domains_ignored_due_to_negative_domain_filter_counts_map = domains_ignored_due_to_negative_domain_filter_counts_map;
495     }
496
497     private void setDomainsIgnoredDueToNegativeFilter( final int domains_ignored_due_to_negative_domain_filter ) {
498         _domains_ignored_due_to_negative_domain_filter = domains_ignored_due_to_negative_domain_filter;
499     }
500
501     private void setDomainsIgnoredDueToOverlap( final int domains_ignored_due_to_overlap ) {
502         _domains_ignored_due_to_overlap = domains_ignored_due_to_overlap;
503     }
504
505     private void setDomainsIgnoredDueToVirusLikeId( final int i ) {
506         _domains_ignored_due_to_virus_like_id = i;
507     }
508
509     private void setDomainsIgnoredDueToVirusLikeIdCountsMap( final Map<String, Integer> domains_ignored_due_to_virus_like_id_counts_map ) {
510         _domains_ignored_due_to_virus_like_id_counts_map = domains_ignored_due_to_virus_like_id_counts_map;
511     }
512
513     private void setDomainsStored( final int domains_stored ) {
514         _domains_stored = domains_stored;
515     }
516
517     private void setDomainsStoredSet( final SortedSet<DomainId> _storeddomains_stored ) {
518         _domains_stored_set = _storeddomains_stored;
519     }
520
521     public void setEValueMaximum( final double e_value_maximum ) {
522         if ( e_value_maximum < 0.0 ) {
523             throw new IllegalArgumentException( "attempt to set the maximum E-value to a negative value" );
524         }
525         _e_value_maximum = e_value_maximum;
526     }
527
528     public void setIgnoreDufs( final boolean ignore_dufs ) {
529         _ignore_dufs = ignore_dufs;
530     }
531
532     /**
533      * To ignore domains which are completely engulfed by domains (individual
534      * ones or stretches of overlapping ones) with better support values.
535      * 
536      * 
537      * @param ignored_engulfed_domains
538      */
539     public void setIgnoreEngulfedDomains( final boolean ignore_engulfed_domains ) {
540         _ignore_engulfed_domains = ignore_engulfed_domains;
541     }
542
543     public void setIgnoreVirusLikeIds( final boolean ignore_virus_like_ids ) {
544         _ignore_virus_like_ids = ignore_virus_like_ids;
545     }
546
547     /**
548      * Sets the individual  score cutoff values (for example, gathering
549      * thresholds from Pfam). Domain ids are the keys, cutoffs the values.
550      * 
551      * @param individual_score_cutoffs
552      */
553     public void setIndividualScoreCutoffs( final Map<String, Double> individual_score_cutoffs ) {
554         _individual_score_cutoffs = individual_score_cutoffs;
555     }
556
557     public void setMaxAllowedOverlap( final int max_allowed_overlap ) {
558         if ( max_allowed_overlap < 0 ) {
559             throw new IllegalArgumentException( "Attempt to set max allowed overlap to less than zero." );
560         }
561         _max_allowed_overlap = max_allowed_overlap;
562     }
563
564     private void setProteinsEncountered( final int proteins_encountered ) {
565         _proteins_encountered = proteins_encountered;
566     }
567
568     private void setProteinsIgnoredDueToFilter( final int proteins_ignored_due_to_filter ) {
569         _proteins_ignored_due_to_filter = proteins_ignored_due_to_filter;
570     }
571
572     private void setProteinsStored( final int proteins_stored ) {
573         _proteins_stored = proteins_stored;
574     }
575
576     public void setReturnType( final ReturnType return_type ) {
577         _return_type = return_type;
578     }
579
580     private void setTime( final long time ) {
581         _time = time;
582     }
583
584     public static enum FilterType {
585         NONE, POSITIVE_PROTEIN, NEGATIVE_PROTEIN, NEGATIVE_DOMAIN
586     }
587
588     static public enum INDIVIDUAL_SCORE_CUTOFF {
589         FULL_SEQUENCE, DOMAIN, NONE;
590     }
591
592     public static enum ReturnType {
593         UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN
594     }
595 }