hDGE_amylase missspelled issue
[jalview.git] / forester / java / src / org / forester / io / parsers / HmmscanPerDomainTableParser.java
1 // $Id:
2 // $
3 //
4 // FORESTER -- software libraries and applications
5 // for evolutionary biology research and applications.
6 //
7 // Copyright (C) 2008-2009 Christian M. Zmasek
8 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // All rights reserved
10 //
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
15 //
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
20 //
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 //
25 // Contact: phylosoft @ gmail . com
26 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
27
28 package org.forester.io.parsers;
29
30 import java.io.BufferedReader;
31 import java.io.File;
32 import java.io.FileReader;
33 import java.io.IOException;
34 import java.util.ArrayList;
35 import java.util.Date;
36 import java.util.HashSet;
37 import java.util.List;
38 import java.util.Map;
39 import java.util.Set;
40 import java.util.SortedSet;
41 import java.util.TreeMap;
42 import java.util.TreeSet;
43
44 import org.forester.protein.BasicDomain;
45 import org.forester.protein.BasicProtein;
46 import org.forester.protein.Domain;
47 import org.forester.protein.Protein;
48 import org.forester.surfacing.SurfacingUtil;
49 import org.forester.util.ForesterUtil;
50
51 public final class HmmscanPerDomainTableParser {
52
53     private static final String           RETRO                       = "RETRO";
54     private static final String           PHAGE                       = "PHAGE";
55     private static final String           VIR                         = "VIR";
56     private static final String           TRANSPOS                    = "TRANSPOS";
57     private static final String           RV                          = "RV";
58     private static final String           GAG                         = "GAG_";
59     private static final String           HCV                         = "HCV_";
60     private static final String           HERPES                      = "HERPES_";
61     private static final String           BACULO                      = "BACULO_";
62     private static final int              E_VALUE_MAXIMUM_DEFAULT     = -1;
63     private static final ReturnType       RETURN_TYPE_DEFAULT         = ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN;
64     private static final boolean          IGNORE_DUFS_DEFAULT         = false;
65     private static final int              MAX_ALLOWED_OVERLAP_DEFAULT = -1;
66     private static final boolean          IGNORE_REPLACED_RRMS        = false;
67     private static final boolean          IGNORE_hDGE_amylase         = true;                                                      //TODO eventually remove me, added 10/22/13
68     private final Set<String>             _filter;
69     private final FilterType              _filter_type;
70     private final File                    _input_file;
71     private final String                  _species;
72     private double                        _e_value_maximum;
73     private Map<String, Double>           _individual_score_cutoffs;
74     private boolean                       _ignore_dufs;
75     private boolean                       _ignore_virus_like_ids;
76     private int                           _max_allowed_overlap;
77     private boolean                       _ignore_engulfed_domains;
78     private ReturnType                    _return_type;
79     private int                           _proteins_encountered;
80     private int                           _proteins_ignored_due_to_filter;
81     private int                           _proteins_stored;
82     private int                           _domains_encountered;
83     private int                           _domains_ignored_due_to_duf;
84     private int                           _domains_ignored_due_to_overlap;
85     private int                           _domains_ignored_due_to_e_value;
86     private int                           _domains_ignored_due_to_individual_score_cutoff;
87     private int                           _domains_stored;
88     private SortedSet<String>             _domains_stored_set;
89     private long                          _time;
90     private int                           _domains_ignored_due_to_negative_domain_filter;
91     private Map<String, Integer>          _domains_ignored_due_to_negative_domain_filter_counts_map;
92     private int                           _domains_ignored_due_to_virus_like_id;
93     private Map<String, Integer>          _domains_ignored_due_to_virus_like_id_counts_map;
94     private final INDIVIDUAL_SCORE_CUTOFF _ind_cutoff;
95     private final boolean                 _allow_proteins_with_same_name;
96
97     public HmmscanPerDomainTableParser( final File input_file,
98                                         final String species,
99                                         final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to ) {
100         _input_file = input_file;
101         _species = species;
102         _filter = null;
103         _filter_type = FilterType.NONE;
104         _ind_cutoff = individual_cutoff_applies_to;
105         _allow_proteins_with_same_name = false;
106         init();
107     }
108
109     public HmmscanPerDomainTableParser( final File input_file,
110                                         final String species,
111                                         final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to,
112                                         final boolean allow_proteins_with_same_name ) {
113         _input_file = input_file;
114         _species = species;
115         _filter = null;
116         _filter_type = FilterType.NONE;
117         _ind_cutoff = individual_cutoff_applies_to;
118         _allow_proteins_with_same_name = allow_proteins_with_same_name;
119         init();
120     }
121
122     public HmmscanPerDomainTableParser( final File input_file,
123                                         final String species,
124                                         final Set<String> filter,
125                                         final FilterType filter_type,
126                                         final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to ) {
127         _input_file = input_file;
128         _species = species;
129         _filter = filter;
130         _filter_type = filter_type;
131         _ind_cutoff = individual_cutoff_applies_to;
132         _allow_proteins_with_same_name = false;
133         init();
134     }
135
136     public HmmscanPerDomainTableParser( final File input_file,
137                                         final String species,
138                                         final Set<String> filter,
139                                         final FilterType filter_type,
140                                         final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to,
141                                         final boolean allow_proteins_with_same_name ) {
142         _input_file = input_file;
143         _species = species;
144         _filter = filter;
145         _filter_type = filter_type;
146         _ind_cutoff = individual_cutoff_applies_to;
147         _allow_proteins_with_same_name = allow_proteins_with_same_name;
148         init();
149     }
150
151     public boolean isAllowProteinsWithSameName() {
152         return _allow_proteins_with_same_name;
153     }
154
155     private void actuallyAddProtein( final List<Protein> proteins, final Protein current_protein ) {
156         final List<Domain> l = current_protein.getProteinDomains();
157         for( final Domain d : l ) {
158             getDomainsStoredSet().add( d.getDomainId() );
159         }
160         proteins.add( current_protein );
161         ++_proteins_stored;
162     }
163
164     private void addProtein( final List<Protein> proteins, Protein current_protein ) {
165         if ( ( getMaxAllowedOverlap() != HmmscanPerDomainTableParser.MAX_ALLOWED_OVERLAP_DEFAULT )
166                 || isIgnoreEngulfedDomains() ) {
167             final int domains_count = current_protein.getNumberOfProteinDomains();
168             current_protein = SurfacingUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
169                                                                       isIgnoreEngulfedDomains(),
170                                                                       current_protein );
171             final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains();
172             _domains_stored -= domains_removed;
173             _domains_ignored_due_to_overlap += domains_removed;
174         }
175         if ( ( getFilterType() == FilterType.POSITIVE_PROTEIN ) || ( getFilterType() == FilterType.NEGATIVE_PROTEIN ) ) {
176             final Set<String> domain_ids_in_protein = new HashSet<String>();
177             for( final Domain d : current_protein.getProteinDomains() ) {
178                 domain_ids_in_protein.add( d.getDomainId() );
179             }
180             domain_ids_in_protein.retainAll( getFilter() );
181             if ( getFilterType() == FilterType.POSITIVE_PROTEIN ) {
182                 if ( domain_ids_in_protein.size() > 0 ) {
183                     actuallyAddProtein( proteins, current_protein );
184                 }
185                 else {
186                     ++_proteins_ignored_due_to_filter;
187                 }
188             }
189             else {
190                 if ( domain_ids_in_protein.size() < 1 ) {
191                     actuallyAddProtein( proteins, current_protein );
192                 }
193                 else {
194                     ++_proteins_ignored_due_to_filter;
195                 }
196             }
197         }
198         else {
199             actuallyAddProtein( proteins, current_protein );
200         }
201     }
202
203     public int getDomainsEncountered() {
204         return _domains_encountered;
205     }
206
207     public int getDomainsIgnoredDueToDuf() {
208         return _domains_ignored_due_to_duf;
209     }
210
211     public int getDomainsIgnoredDueToEval() {
212         return _domains_ignored_due_to_e_value;
213     }
214
215     public int getDomainsIgnoredDueToIndividualScoreCutoff() {
216         return _domains_ignored_due_to_individual_score_cutoff;
217     }
218
219     public int getDomainsIgnoredDueToNegativeDomainFilter() {
220         return _domains_ignored_due_to_negative_domain_filter;
221     }
222
223     public Map<String, Integer> getDomainsIgnoredDueToNegativeDomainFilterCountsMap() {
224         return _domains_ignored_due_to_negative_domain_filter_counts_map;
225     }
226
227     public int getDomainsIgnoredDueToOverlap() {
228         return _domains_ignored_due_to_overlap;
229     }
230
231     public Map<String, Integer> getDomainsIgnoredDueToVirusLikeIdCountsMap() {
232         return _domains_ignored_due_to_virus_like_id_counts_map;
233     }
234
235     public int getDomainsIgnoredDueToVirusLikeIds() {
236         return _domains_ignored_due_to_virus_like_id;
237     }
238
239     public int getDomainsStored() {
240         return _domains_stored;
241     }
242
243     public SortedSet<String> getDomainsStoredSet() {
244         return _domains_stored_set;
245     }
246
247     private double getEValueMaximum() {
248         return _e_value_maximum;
249     }
250
251     private Set<String> getFilter() {
252         return _filter;
253     }
254
255     private FilterType getFilterType() {
256         return _filter_type;
257     }
258
259     public INDIVIDUAL_SCORE_CUTOFF getIndividualCutoffAppliesTo() {
260         return _ind_cutoff;
261     }
262
263     private Map<String, Double> getIndividualScoreCutoffs() {
264         return _individual_score_cutoffs;
265     }
266
267     private File getInputFile() {
268         return _input_file;
269     }
270
271     private int getMaxAllowedOverlap() {
272         return _max_allowed_overlap;
273     }
274
275     public int getProteinsEncountered() {
276         return _proteins_encountered;
277     }
278
279     public int getProteinsIgnoredDueToFilter() {
280         return _proteins_ignored_due_to_filter;
281     }
282
283     public int getProteinsStored() {
284         return _proteins_stored;
285     }
286
287     private ReturnType getReturnType() {
288         return _return_type;
289     }
290
291     private String getSpecies() {
292         return _species;
293     }
294
295     public long getTime() {
296         return _time;
297     }
298
299     private void init() {
300         _e_value_maximum = HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT;
301         setIgnoreDufs( HmmscanPerDomainTableParser.IGNORE_DUFS_DEFAULT );
302         setReturnType( HmmscanPerDomainTableParser.RETURN_TYPE_DEFAULT );
303         _max_allowed_overlap = HmmscanPerDomainTableParser.MAX_ALLOWED_OVERLAP_DEFAULT;
304         setIndividualScoreCutoffs( null );
305         setIgnoreEngulfedDomains( false );
306         setIgnoreVirusLikeIds( false );
307         intitCounts();
308     }
309
310     private void intitCounts() {
311         setDomainsStoredSet( new TreeSet<String>() );
312         setDomainsEncountered( 0 );
313         setProteinsEncountered( 0 );
314         setProteinsIgnoredDueToFilter( 0 );
315         setDomainsIgnoredDueToNegativeFilter( 0 );
316         setDomainsIgnoredDueToDuf( 0 );
317         setDomainsIgnoredDueToEval( 0 );
318         setDomainsIgnoredDueToIndividualScoreCutoff( 0 );
319         setDomainsIgnoredDueToVirusLikeId( 0 );
320         setDomainsIgnoredDueToOverlap( 0 );
321         setDomainsStored( 0 );
322         setProteinsStored( 0 );
323         setTime( 0 );
324         setDomainsIgnoredDueToVirusLikeIdCountsMap( new TreeMap<String, Integer>() );
325         setDomainsIgnoredDueToNegativeDomainFilterCountsMap( new TreeMap<String, Integer>() );
326     }
327
328     private boolean isIgnoreDufs() {
329         return _ignore_dufs;
330     }
331
332     private boolean isIgnoreEngulfedDomains() {
333         return _ignore_engulfed_domains;
334     }
335
336     private boolean isIgnoreVirusLikeIds() {
337         return _ignore_virus_like_ids;
338     }
339
340     public List<Protein> parse() throws IOException {
341         if ( ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.NONE )
342                 && ( ( getIndividualScoreCutoffs() == null ) || ( getIndividualScoreCutoffs().size() < 1 ) ) ) {
343             throw new RuntimeException( "attempt to use individual cuttoffs with having set them" );
344         }
345         intitCounts();
346         final Set<String> prev_queries = new HashSet<String>();
347         final String error = ForesterUtil.isReadableFile( getInputFile() );
348         if ( !ForesterUtil.isEmpty( error ) ) {
349             throw new IOException( error );
350         }
351         final BufferedReader br = new BufferedReader( new FileReader( getInputFile() ) );
352         String line;
353         final List<Protein> proteins = new ArrayList<Protein>();
354         Protein current_protein = null;
355         int line_number = 0;
356         final long start_time = new Date().getTime();
357         String prev_query = "";
358         int prev_qlen = -1;
359         while ( ( line = br.readLine() ) != null ) {
360             line_number++;
361             if ( ForesterUtil.isEmpty( line ) || line.startsWith( "#" ) ) {
362                 continue;
363             }
364             // 0                    1           2    3                      4           5      6        7      8      9  10  11        12        13     14    15      16  17      18  19      20  21  22      
365             // #                                                                              --- full sequence --- -------------- this domain -------------   hmm coord   ali coord   env coord
366             // # target name        accession   tlen query name             accession   qlen   E-value  score  bias   #  of  c-Evalue  i-Evalue  score  bias  from    to  from    to  from    to  acc description of target
367             // #------------------- ---------- -----   -------------------- ---------- ----- --------- ------ ----- --- --- --------- --------- ------ ----- ----- ----- ----- ----- ----- ----- ---- ---------------------
368             // Ion_trans            PF00520.24   201 jgi|Nemve1|7|gw.28.1.1 -           1604  6.3e-169  557.4  95.3   1   4   1.5e-41     3e-38  130.8  11.1     3   171   140   307   139   346 0.81 Ion transport protein
369             // Ion_trans            PF00520.24   201 jgi|Nemve1|7|gw.28.1.1 -           1604  6.3e-169  557.4  95.3   2   4   9.1e-45   1.8e-41  141.3  13.1     4   200   479   664   476   665 0.97 Ion transport protein
370             // Ion_trans            PF00520.24   201 jgi|Nemve1|7|gw.28.1.1 -           1604  6.3e-169  557.4  95.3   3   4   5.2e-45     1e-41  142.1  14.0     1   201   900  1117   900  1117 0.96 Ion transport protein
371             // Ion_trans            PF00520.24   201 jgi|Nemve1|7|gw.28.1.1 -           1604  6.3e-169  557.4  95.3   4   4   9.2e-51   1.8e-47  160.9  11.3     1   201  1217  1423  1217  1423 0.97 Ion transport protein
372             // PKD_channel          PF08016.5    426 jgi|Nemve1|7|gw.28.1.1 -           1604   5.9e-19   67.4  70.5   1   8   0.00053       1.1    7.3   0.4   220   264   142   191   134   200 0.73 Polycystin cation channel
373             final String tokens[] = line.split( "\\s+" );
374             final String target_id = tokens[ 0 ];
375             final String target_acc = tokens[ 1 ];
376             final int tlen = parseInt( tokens[ 2 ], line_number, "tlen" );
377             final String query = tokens[ 3 ];
378             final String query_acc = tokens[ 4 ];
379             final int qlen = parseInt( tokens[ 5 ], line_number, "qlen" );
380             final double fs_e_value = parseDouble( tokens[ 6 ], line_number, "E-value" );
381             final double fs_score = parseDouble( tokens[ 7 ], line_number, "score" );
382             final int domain_number = parseInt( tokens[ 9 ], line_number, "count" );
383             final int total_domains = parseInt( tokens[ 10 ], line_number, "total" );
384             final double c_e_value = parseDouble( tokens[ 11 ], line_number, "c-Evalue" );
385             final double i_e_value = parseDouble( tokens[ 12 ], line_number, "i-Evalue" );
386             final double domain_score = parseDouble( tokens[ 13 ], line_number, "score" );
387             final int hmm_from = parseInt( tokens[ 15 ], line_number, "hmm from" );
388             final int hmm_to = parseInt( tokens[ 16 ], line_number, "hmm to" );
389             final int ali_from = parseInt( tokens[ 17 ], line_number, "ali from" );
390             final int ali_to = parseInt( tokens[ 18 ], line_number, "ali to" );
391             final int env_from = parseInt( tokens[ 19 ], line_number, "env from" );
392             final int env_to = parseInt( tokens[ 20 ], line_number, "env to" );
393             ++_domains_encountered;
394             if ( !query.equals( prev_query ) || ( qlen != prev_qlen ) ) {
395                 if ( !isAllowProteinsWithSameName() ) {
396                     if ( query.equals( prev_query ) ) {
397                         throw new IOException( "more than one protein named [" + query + "]" + " lengths: " + qlen
398                                 + ", " + prev_qlen );
399                     }
400                     if ( prev_queries.contains( query ) ) {
401                         throw new IOException( "more than one protein named [" + query + "]" );
402                     }
403                 }
404                 prev_query = query;
405                 prev_qlen = qlen;
406                 prev_queries.add( query );
407                 if ( ( current_protein != null ) && ( current_protein.getProteinDomains().size() > 0 ) ) {
408                     addProtein( proteins, current_protein );
409                 }
410                 if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) {
411                     current_protein = new BasicProtein( query, getSpecies(), qlen );
412                 }
413                 else {
414                     throw new IllegalArgumentException( "unknown return type" );
415                 }
416             }
417             boolean failed_cutoff = false;
418             if ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.NONE ) {
419                 if ( getIndividualScoreCutoffs().containsKey( target_id ) ) {
420                     final double cutoff = getIndividualScoreCutoffs().get( target_id );
421                     if ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.FULL_SEQUENCE ) {
422                         if ( fs_score < cutoff ) {
423                             failed_cutoff = true;
424                         }
425                     }
426                     else if ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.DOMAIN ) {
427                         if ( domain_score < cutoff ) {
428                             failed_cutoff = true;
429                         }
430                     }
431                 }
432                 else {
433                     throw new IOException( "could not find a score cutoff value for domain id \"" + target_id
434                             + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
435                 }
436             }
437             final String uc_id = target_id.toUpperCase();
438             if ( failed_cutoff ) {
439                 ++_domains_ignored_due_to_individual_score_cutoff;
440             }
441             else if ( ali_from == ali_to ) {
442                 //Ignore
443             }
444             else if ( ( getEValueMaximum() != HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT )
445                     && ( fs_e_value > getEValueMaximum() ) ) {
446                 ++_domains_ignored_due_to_e_value;
447             }
448             else if ( isIgnoreDufs() && uc_id.startsWith( "DUF" ) ) {
449                 ++_domains_ignored_due_to_duf;
450             }
451             else if ( IGNORE_REPLACED_RRMS
452                     && ( uc_id.contains( "RRM_1" ) || uc_id.contains( "RRM_3" ) || uc_id.contains( "RRM_5" ) || uc_id
453                             .contains( "RRM_6" ) ) ) {
454             }
455             else if ( IGNORE_hDGE_amylase && ( uc_id.equals( "hDGE_amylase" ) ) ) {
456             }
457             else if ( isIgnoreVirusLikeIds()
458                     && ( uc_id.contains( VIR ) || uc_id.contains( PHAGE ) || uc_id.contains( RETRO )
459                             || uc_id.contains( TRANSPOS ) || uc_id.startsWith( RV ) || uc_id.startsWith( GAG )
460                             || uc_id.startsWith( HCV ) || uc_id.startsWith( HERPES ) || uc_id.startsWith( BACULO ) ) ) {
461                 ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToVirusLikeIdCountsMap(), target_id );
462                 ++_domains_ignored_due_to_virus_like_id;
463             }
464             else if ( ( getFilterType() == FilterType.NEGATIVE_DOMAIN ) && getFilter().contains( target_id ) ) {
465                 ++_domains_ignored_due_to_negative_domain_filter;
466                 ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToNegativeDomainFilterCountsMap(), target_id );
467             }
468             else {
469                 try {
470                     final Domain pd = new BasicDomain( target_id,
471                                                        ali_from,
472                                                        ali_to,
473                                                        ( short ) domain_number,
474                                                        ( short ) total_domains,
475                                                        fs_e_value,
476                                                        fs_score,
477                                                        i_e_value,
478                                                        domain_score );
479                     current_protein.addProteinDomain( pd );
480                 }
481                 catch ( final IllegalArgumentException e ) {
482                     throw new IOException( "problem with domain parsing at line " + line_number + "[" + line + "]: "
483                             + e.getMessage() );
484                 }
485                 ++_domains_stored;
486             }
487         } // while ( ( line = br.readLine() ) != null )
488         if ( ( current_protein != null ) && ( current_protein.getProteinDomains().size() > 0 ) ) {
489             addProtein( proteins, current_protein );
490         }
491         setProteinsEncountered( prev_queries.size() );
492         setTime( new Date().getTime() - start_time );
493         return proteins;
494     }
495
496     private double parseDouble( final String double_str, final int line_number, final String label ) throws IOException {
497         double d = -1;
498         try {
499             d = Double.valueOf( double_str ).doubleValue();
500         }
501         catch ( final NumberFormatException e ) {
502             throw new IOException( "could not parse \" +label + \" from \"" + double_str + "\" [line " + line_number
503                     + "] in [" + getInputFile().getCanonicalPath() + "]" );
504         }
505         return d;
506     }
507
508     private int parseInt( final String double_str, final int line_number, final String label ) throws IOException {
509         int i = -1;
510         try {
511             i = Integer.valueOf( double_str ).intValue();
512         }
513         catch ( final NumberFormatException e ) {
514             throw new IOException( "could not parse \"" + label + "\" from \"" + double_str + "\" [line " + line_number
515                     + "] in [" + getInputFile().getCanonicalPath() + "]" );
516         }
517         return i;
518     }
519
520     private void setDomainsEncountered( final int domains_encountered ) {
521         _domains_encountered = domains_encountered;
522     }
523
524     private void setDomainsIgnoredDueToDuf( final int domains_ignored_due_to_duf ) {
525         _domains_ignored_due_to_duf = domains_ignored_due_to_duf;
526     }
527
528     private void setDomainsIgnoredDueToEval( final int domains_ignored_due_to_e_value ) {
529         _domains_ignored_due_to_e_value = domains_ignored_due_to_e_value;
530     }
531
532     private void setDomainsIgnoredDueToIndividualScoreCutoff( final int domains_ignored_due_to_individual_score_cutoff ) {
533         _domains_ignored_due_to_individual_score_cutoff = domains_ignored_due_to_individual_score_cutoff;
534     }
535
536     private void setDomainsIgnoredDueToNegativeDomainFilterCountsMap( final Map<String, Integer> domains_ignored_due_to_negative_domain_filter_counts_map ) {
537         _domains_ignored_due_to_negative_domain_filter_counts_map = domains_ignored_due_to_negative_domain_filter_counts_map;
538     }
539
540     private void setDomainsIgnoredDueToNegativeFilter( final int domains_ignored_due_to_negative_domain_filter ) {
541         _domains_ignored_due_to_negative_domain_filter = domains_ignored_due_to_negative_domain_filter;
542     }
543
544     private void setDomainsIgnoredDueToOverlap( final int domains_ignored_due_to_overlap ) {
545         _domains_ignored_due_to_overlap = domains_ignored_due_to_overlap;
546     }
547
548     private void setDomainsIgnoredDueToVirusLikeId( final int i ) {
549         _domains_ignored_due_to_virus_like_id = i;
550     }
551
552     private void setDomainsIgnoredDueToVirusLikeIdCountsMap( final Map<String, Integer> domains_ignored_due_to_virus_like_id_counts_map ) {
553         _domains_ignored_due_to_virus_like_id_counts_map = domains_ignored_due_to_virus_like_id_counts_map;
554     }
555
556     private void setDomainsStored( final int domains_stored ) {
557         _domains_stored = domains_stored;
558     }
559
560     private void setDomainsStoredSet( final SortedSet<String> _storeddomains_stored ) {
561         _domains_stored_set = _storeddomains_stored;
562     }
563
564     public void setEValueMaximum( final double e_value_maximum ) {
565         if ( e_value_maximum < 0.0 ) {
566             throw new IllegalArgumentException( "attempt to set the maximum E-value to a negative value" );
567         }
568         _e_value_maximum = e_value_maximum;
569     }
570
571     public void setIgnoreDufs( final boolean ignore_dufs ) {
572         _ignore_dufs = ignore_dufs;
573     }
574
575     /**
576      * To ignore domains which are completely engulfed by domains (individual
577      * ones or stretches of overlapping ones) with better support values.
578      * 
579      * 
580      * @param ignored_engulfed_domains
581      */
582     public void setIgnoreEngulfedDomains( final boolean ignore_engulfed_domains ) {
583         _ignore_engulfed_domains = ignore_engulfed_domains;
584     }
585
586     public void setIgnoreVirusLikeIds( final boolean ignore_virus_like_ids ) {
587         _ignore_virus_like_ids = ignore_virus_like_ids;
588     }
589
590     /**
591      * Sets the individual  score cutoff values (for example, gathering
592      * thresholds from Pfam). Domain ids are the keys, cutoffs the values.
593      * 
594      * @param individual_score_cutoffs
595      */
596     public void setIndividualScoreCutoffs( final Map<String, Double> individual_score_cutoffs ) {
597         _individual_score_cutoffs = individual_score_cutoffs;
598     }
599
600     public void setMaxAllowedOverlap( final int max_allowed_overlap ) {
601         if ( max_allowed_overlap < 0 ) {
602             throw new IllegalArgumentException( "Attempt to set max allowed overlap to less than zero." );
603         }
604         _max_allowed_overlap = max_allowed_overlap;
605     }
606
607     private void setProteinsEncountered( final int proteins_encountered ) {
608         _proteins_encountered = proteins_encountered;
609     }
610
611     private void setProteinsIgnoredDueToFilter( final int proteins_ignored_due_to_filter ) {
612         _proteins_ignored_due_to_filter = proteins_ignored_due_to_filter;
613     }
614
615     private void setProteinsStored( final int proteins_stored ) {
616         _proteins_stored = proteins_stored;
617     }
618
619     public void setReturnType( final ReturnType return_type ) {
620         _return_type = return_type;
621     }
622
623     private void setTime( final long time ) {
624         _time = time;
625     }
626
627     public static enum FilterType {
628         NONE, POSITIVE_PROTEIN, NEGATIVE_PROTEIN, NEGATIVE_DOMAIN
629     }
630
631     static public enum INDIVIDUAL_SCORE_CUTOFF {
632         FULL_SEQUENCE, DOMAIN, NONE;
633     }
634
635     public static enum ReturnType {
636         UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN
637     }
638 }