in progress
[jalview.git] / forester / java / src / org / forester / io / parsers / HmmPfamOutputParser.java
1 // $Id:
2 //
3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
5 //
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
8 // All rights reserved
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 //
24 // Contact: phylosoft @ gmail . com
25 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26
27 package org.forester.io.parsers;
28
29 import java.io.BufferedReader;
30 import java.io.File;
31 import java.io.FileReader;
32 import java.io.IOException;
33 import java.util.ArrayList;
34 import java.util.Date;
35 import java.util.HashSet;
36 import java.util.List;
37 import java.util.Map;
38 import java.util.Set;
39 import java.util.SortedSet;
40 import java.util.TreeMap;
41 import java.util.TreeSet;
42
43 import org.forester.protein.BasicDomain;
44 import org.forester.protein.BasicProtein;
45 import org.forester.protein.Domain;
46 import org.forester.protein.Protein;
47 import org.forester.surfacing.SurfacingUtil;
48 import org.forester.util.ForesterUtil;
49
50 public final class HmmPfamOutputParser {
51
52     private static final String     RETRO                       = "RETRO";
53     private static final String     PHAGE                       = "PHAGE";
54     private static final String     VIR                         = "VIR";
55     private static final String     TRANSPOS                    = "TRANSPOS";
56     private static final String     RV                          = "RV";
57     private static final String     GAG                         = "GAG_";
58     private static final String     HCV                         = "HCV_";                                                    // New. Added on Jun 11, after 1st submission.
59     private static final String     HERPES                      = "Herpes_";                                                 // New. Added on Jun 11, after 1st submission.
60     private static final int        E_VALUE_MAXIMUM_DEFAULT     = -1;
61     private static final ReturnType RETURN_TYPE_DEFAULT         = ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN;
62     private static final boolean    IGNORE_DUFS_DEFAULT         = false;
63     private static final int        MAX_ALLOWED_OVERLAP_DEFAULT = -1;
64     private final Set<String>       _filter;
65     private final FilterType        _filter_type;
66     private final File              _input_file;
67     private final String            _species;
68     private final String            _model_type;
69     private double                  _e_value_maximum;
70     private Map<String, String>     _individual_domain_score_cutoffs;
71     private boolean                 _ignore_dufs;
72     private boolean                 _ignore_virus_like_ids;
73     private boolean                 _allow_non_unique_query;
74     private boolean                 _verbose;
75     private int                     _max_allowed_overlap;
76     private boolean                 _ignore_engulfed_domains;
77     private ReturnType              _return_type;
78     private int                     _proteins_encountered;
79     private int                     _proteins_ignored_due_to_filter;
80     private int                     _proteins_stored;
81     private int                     _domains_encountered;
82     private int                     _domains_ignored_due_to_duf;
83     private int                     _domains_ignored_due_to_overlap;
84     private int                     _domains_ignored_due_to_e_value;
85     private int                     _domains_ignored_due_to_individual_score_cutoff;
86     private int                     _domains_stored;
87     private SortedSet<String>       _domains_stored_set;
88     private long                    _time;
89     private int                     _domains_ignored_due_to_negative_domain_filter;
90     private Map<String, Integer>    _domains_ignored_due_to_negative_domain_filter_counts_map;
91     private int                     _domains_ignored_due_to_virus_like_id;
92     private Map<String, Integer>    _domains_ignored_due_to_virus_like_id_counts_map;
93
94     public HmmPfamOutputParser( final File input_file, final String species, final String model_type ) {
95         _input_file = input_file;
96         _species = species;
97         _model_type = model_type;
98         _filter = null;
99         _filter_type = FilterType.NONE;
100         init();
101     }
102
103     public HmmPfamOutputParser( final File input_file,
104                                 final String species,
105                                 final String model_type,
106                                 final Set<String> filter,
107                                 final FilterType filter_type ) {
108         _input_file = input_file;
109         _species = species;
110         _model_type = model_type;
111         _filter = filter;
112         _filter_type = filter_type;
113         init();
114     }
115
116     private void actuallyAddProtein( final List<Protein> proteins, final Protein current_protein ) {
117         final List<Domain> l = current_protein.getProteinDomains();
118         for( final Domain d : l ) {
119             getDomainsStoredSet().add( d.getDomainId() );
120         }
121         proteins.add( current_protein );
122         ++_proteins_stored;
123     }
124
125     private void addProtein( final List<Protein> proteins, final Protein current_protein ) {
126         if ( ( getFilterType() == FilterType.POSITIVE_PROTEIN ) || ( getFilterType() == FilterType.NEGATIVE_PROTEIN ) ) {
127             final Set<String> domain_ids_in_protein = new HashSet<String>();
128             for( final Domain d : current_protein.getProteinDomains() ) {
129                 domain_ids_in_protein.add( d.getDomainId() );
130             }
131             domain_ids_in_protein.retainAll( getFilter() );
132             if ( getFilterType() == FilterType.POSITIVE_PROTEIN ) {
133                 if ( domain_ids_in_protein.size() > 0 ) {
134                     actuallyAddProtein( proteins, current_protein );
135                 }
136                 else {
137                     ++_proteins_ignored_due_to_filter;
138                 }
139             }
140             else {
141                 if ( domain_ids_in_protein.size() < 1 ) {
142                     actuallyAddProtein( proteins, current_protein );
143                 }
144                 else {
145                     ++_proteins_ignored_due_to_filter;
146                 }
147             }
148         }
149         else {
150             actuallyAddProtein( proteins, current_protein );
151         }
152     }
153
154     public int getDomainsEncountered() {
155         return _domains_encountered;
156     }
157
158     public int getDomainsIgnoredDueToDuf() {
159         return _domains_ignored_due_to_duf;
160     }
161
162     public int getDomainsIgnoredDueToEval() {
163         return _domains_ignored_due_to_e_value;
164     }
165
166     public int getDomainsIgnoredDueToIndividualScoreCutoff() {
167         return _domains_ignored_due_to_individual_score_cutoff;
168     }
169
170     public int getDomainsIgnoredDueToNegativeDomainFilter() {
171         return _domains_ignored_due_to_negative_domain_filter;
172     }
173
174     public Map<String, Integer> getDomainsIgnoredDueToNegativeDomainFilterCountsMap() {
175         return _domains_ignored_due_to_negative_domain_filter_counts_map;
176     }
177
178     public int getDomainsIgnoredDueToOverlap() {
179         return _domains_ignored_due_to_overlap;
180     }
181
182     public Map<String, Integer> getDomainsIgnoredDueToVirusLikeIdCountsMap() {
183         return _domains_ignored_due_to_virus_like_id_counts_map;
184     }
185
186     public int getDomainsIgnoredDueToVirusLikeIds() {
187         return _domains_ignored_due_to_virus_like_id;
188     }
189
190     public int getDomainsStored() {
191         return _domains_stored;
192     }
193
194     public SortedSet<String> getDomainsStoredSet() {
195         return _domains_stored_set;
196     }
197
198     private double getEValueMaximum() {
199         return _e_value_maximum;
200     }
201
202     private Set<String> getFilter() {
203         return _filter;
204     }
205
206     private FilterType getFilterType() {
207         return _filter_type;
208     }
209
210     private Map<String, String> getIndividualDomainScoreCutoffs() {
211         return _individual_domain_score_cutoffs;
212     }
213
214     private File getInputFile() {
215         return _input_file;
216     }
217
218     private int getMaxAllowedOverlap() {
219         return _max_allowed_overlap;
220     }
221
222     private String getModelType() {
223         return _model_type;
224     }
225
226     public int getProteinsEncountered() {
227         return _proteins_encountered;
228     }
229
230     public int getProteinsIgnoredDueToFilter() {
231         return _proteins_ignored_due_to_filter;
232     }
233
234     public int getProteinsStored() {
235         return _proteins_stored;
236     }
237
238     private ReturnType getReturnType() {
239         return _return_type;
240     }
241
242     private String getSpecies() {
243         return _species;
244     }
245
246     public long getTime() {
247         return _time;
248     }
249
250     private void init() {
251         _e_value_maximum = HmmPfamOutputParser.E_VALUE_MAXIMUM_DEFAULT;
252         setIgnoreDufs( HmmPfamOutputParser.IGNORE_DUFS_DEFAULT );
253         setReturnType( HmmPfamOutputParser.RETURN_TYPE_DEFAULT );
254         _max_allowed_overlap = HmmPfamOutputParser.MAX_ALLOWED_OVERLAP_DEFAULT;
255         setIndividualDomainScoreCutoffs( null );
256         setIgnoreEngulfedDomains( false );
257         setIgnoreVirusLikeIds( false );
258         setAllowNonUniqueQuery( false );
259         setVerbose( false );
260         intitCounts();
261     }
262
263     private void intitCounts() {
264         setDomainsStoredSet( new TreeSet<String>() );
265         setDomainsEncountered( 0 );
266         setProteinsEncountered( 0 );
267         setProteinsIgnoredDueToFilter( 0 );
268         setDomainsIgnoredDueToNegativeFilter( 0 );
269         setDomainsIgnoredDueToDuf( 0 );
270         setDomainsIgnoredDueToEval( 0 );
271         setDomainsIgnoredDueToIndividualScoreCutoff( 0 );
272         setDomainsIgnoredDueToVirusLikeId( 0 );
273         setDomainsIgnoredDueToOverlap( 0 );
274         setDomainsStored( 0 );
275         setProteinsStored( 0 );
276         setTime( 0 );
277         setDomainsIgnoredDueToVirusLikeIdCountsMap( new TreeMap<String, Integer>() );
278         setDomainsIgnoredDueToNegativeDomainFilterCountsMap( new TreeMap<String, Integer>() );
279     }
280
281     private boolean isAllowNonUniqueQuery() {
282         return _allow_non_unique_query;
283     }
284
285     private boolean isIgnoreDufs() {
286         return _ignore_dufs;
287     }
288
289     private boolean isIgnoreEngulfedDomains() {
290         return _ignore_engulfed_domains;
291     }
292
293     private boolean isIgnoreVirusLikeIds() {
294         return _ignore_virus_like_ids;
295     }
296
297     private boolean isVerbose() {
298         return _verbose;
299     }
300
301     public List<Protein> parse() throws IOException {
302         intitCounts();
303         final Set<String> queries = new HashSet<String>();
304         final String error = ForesterUtil.isReadableFile( getInputFile() );
305         if ( !ForesterUtil.isEmpty( error ) ) {
306             throw new IOException( error );
307         }
308         final BufferedReader br = new BufferedReader( new FileReader( getInputFile() ) );
309         String line;
310         final List<Protein> proteins = new ArrayList<Protein>();
311         Protein current_protein = null;
312         int line_number = 0;
313         boolean saw_double_slash = true;
314         boolean can_parse_domains = false;
315         boolean saw_parsed_for_domains = false;
316         boolean saw_query_sequence = false;
317         boolean was_not_unique = false;
318         final long start_time = new Date().getTime();
319         while ( ( line = br.readLine() ) != null ) {
320             line_number++;
321             if ( line.length() < 1 ) {
322                 continue;
323             }
324             else if ( line.startsWith( "Query sequence:" ) ) {
325                 ++_proteins_encountered;
326                 if ( !saw_double_slash ) {
327                     throw new IOException( "unexpected format [line " + line_number + "] in ["
328                             + getInputFile().getCanonicalPath() + "]" );
329                 }
330                 saw_double_slash = false;
331                 saw_query_sequence = true;
332                 was_not_unique = false;
333                 final String query = line.substring( 16 ).trim();
334                 if ( ForesterUtil.isEmpty( query ) ) {
335                     throw new IOException( "query sequence cannot be empty [line " + line_number + "] in ["
336                             + getInputFile().getCanonicalPath() + "]" );
337                 }
338                 if ( queries.contains( query ) ) {
339                     if ( !isAllowNonUniqueQuery() ) {
340                         throw new IOException( "query \"" + query + "\" is not unique [line " + line_number + "] in ["
341                                 + getInputFile().getCanonicalPath() + "]" );
342                     }
343                     else if ( isVerbose() ) {
344                         ForesterUtil.printWarningMessage( getClass().getName(), "query \"" + query
345                                 + "\" is not unique [line " + line_number + "] in ["
346                                 + getInputFile().getCanonicalPath() + "]" );
347                     }
348                 }
349                 else {
350                     queries.add( query );
351                 }
352                 if ( current_protein != null ) {
353                     throw new IOException( "unexpected format [line " + line_number + "] in ["
354                             + getInputFile().getCanonicalPath() + "]" );
355                 }
356                 if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) {
357                     current_protein = new BasicProtein( query, getSpecies(), 0 );
358                 }
359                 else {
360                     throw new IllegalArgumentException( "unknown return type" );
361                 }
362             }
363             else if ( line.startsWith( "Accession:" ) ) {
364                 if ( !saw_query_sequence || ( current_protein == null ) ) {
365                     throw new IOException( "unexpected format [line " + line_number + "] in ["
366                             + getInputFile().getCanonicalPath() + "]" );
367                 }
368                 ( ( BasicProtein ) current_protein ).setAccession( line.substring( 11 ).trim() );
369             }
370             else if ( line.startsWith( "Description:" ) ) {
371                 if ( !saw_query_sequence || ( current_protein == null ) ) {
372                     throw new IOException( "unexpected format [line " + line_number + "] in ["
373                             + getInputFile().getCanonicalPath() + "]" );
374                 }
375                 if ( was_not_unique ) {
376                     if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) {
377                         current_protein = new BasicProtein( current_protein.getProteinId() + " "
378                                 + line.substring( 13 ).trim(), getSpecies(), 0 );
379                     }
380                 }
381                 else {
382                     ( ( BasicProtein ) current_protein ).setDescription( line.substring( 13 ).trim() );
383                 }
384             }
385             else if ( line.startsWith( "Parsed for domains:" ) ) {
386                 if ( !saw_query_sequence ) {
387                     throw new IOException( "unexpected format [line " + line_number + "] in ["
388                             + getInputFile().getCanonicalPath() + "]" );
389                 }
390                 saw_query_sequence = false;
391                 saw_parsed_for_domains = true;
392             }
393             else if ( saw_parsed_for_domains && line.startsWith( "--------" ) ) {
394                 can_parse_domains = true;
395                 saw_parsed_for_domains = false;
396             }
397             else if ( line.startsWith( "Alignments of top-scoring domains:" ) ) {
398                 if ( !can_parse_domains ) {
399                     throw new IOException( "unexpected format [line " + line_number + "] in ["
400                             + getInputFile().getCanonicalPath() + "]" );
401                 }
402                 can_parse_domains = false;
403             }
404             else if ( line.startsWith( "//" ) ) {
405                 can_parse_domains = false;
406                 saw_double_slash = true;
407                 if ( current_protein.getProteinDomains().size() > 0 ) {
408                     if ( ( getMaxAllowedOverlap() != HmmPfamOutputParser.MAX_ALLOWED_OVERLAP_DEFAULT )
409                             || isIgnoreEngulfedDomains() ) {
410                         final int domains_count = current_protein.getNumberOfProteinDomains();
411                         current_protein = SurfacingUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
412                                                                                   isIgnoreEngulfedDomains(),
413                                                                                   current_protein );
414                         final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains();
415                         _domains_stored -= domains_removed;
416                         _domains_ignored_due_to_overlap += domains_removed;
417                     }
418                     addProtein( proteins, current_protein );
419                 }
420                 current_protein = null;
421             }
422             else if ( can_parse_domains && ( line.indexOf( "[no hits above thresholds]" ) == -1 ) ) {
423                 final String[] s = line.split( "\\s+" );
424                 if ( s.length != 10 ) {
425                     throw new IOException( "unexpected format in hmmpfam output:  \"" + line + "\" [line "
426                             + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
427                 }
428                 final String id = s[ 0 ];
429                 final String domain_count_str = s[ 1 ];
430                 final String from_str = s[ 2 ];
431                 final String to_str = s[ 3 ];
432                 final String query_match_str = s[ 4 ];
433                 final String hmm_match_str = s[ 7 ];
434                 final String score_str = s[ 8 ];
435                 final String e_value_str = s[ 9 ];
436                 int from = -1;
437                 int to = -1;
438                 double e_value = -1;
439                 double score = -1;
440                 boolean is_complete_hmm_match = false;
441                 boolean is_complete_query_match = false;
442                 try {
443                     from = Integer.valueOf( from_str ).intValue();
444                 }
445                 catch ( final NumberFormatException e ) {
446                     throw new IOException( "could not parse seq-f from \"" + line + "\" [line " + line_number
447                             + "] in [" + getInputFile().getCanonicalPath() + "]" );
448                 }
449                 try {
450                     to = Integer.valueOf( to_str ).intValue();
451                 }
452                 catch ( final NumberFormatException e ) {
453                     throw new IOException( "could not parse seq-t from \"" + line + "\" [line " + line_number
454                             + "] in [" + getInputFile().getCanonicalPath() + "]" );
455                 }
456                 try {
457                     score = Double.valueOf( score_str ).doubleValue();
458                 }
459                 catch ( final NumberFormatException e ) {
460                     throw new IOException( "could not parse score from \"" + line + "\" [line " + line_number
461                             + "] in [" + getInputFile().getCanonicalPath() + "]" );
462                 }
463                 try {
464                     e_value = Double.valueOf( e_value_str ).doubleValue();
465                 }
466                 catch ( final NumberFormatException e ) {
467                     throw new IOException( "could not parse E-value from \"" + line + "\" [line " + line_number
468                             + "] in [" + getInputFile().getCanonicalPath() + "]" );
469                 }
470                 if ( hmm_match_str.equals( "[]" ) ) {
471                     is_complete_hmm_match = true;
472                 }
473                 else if ( !( hmm_match_str.equals( ".]" ) || hmm_match_str.equals( "[." ) || hmm_match_str
474                         .equals( ".." ) ) ) {
475                     throw new IOException( "unexpected format in hmmpfam output:  \"" + line + "\" [line "
476                             + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
477                 }
478                 if ( query_match_str.equals( ".." ) ) {
479                     is_complete_query_match = true;
480                 }
481                 else if ( !( query_match_str.equals( ".]" ) || query_match_str.equals( "[." ) || query_match_str
482                         .equals( "[]" ) ) ) {
483                     throw new IOException( "unexpected format in hmmpfam output:  \"" + line + "\" [line "
484                             + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
485                 }
486                 final String[] ct = domain_count_str.split( "/" );
487                 if ( ct.length != 2 ) {
488                     throw new IOException( "unexpected format in hmmpfam output:  \"" + line + "\" [line "
489                             + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
490                 }
491                 final String number_str = ct[ 0 ];
492                 final String total_str = ct[ 1 ];
493                 int number = -1;
494                 int total = -1;
495                 try {
496                     number = Integer.valueOf( ( number_str ) ).intValue();
497                 }
498                 catch ( final NumberFormatException e ) {
499                     throw new IOException( "could not parse domain number from \"" + line + "\" [line " + line_number
500                             + "] in [" + getInputFile().getCanonicalPath() + "]" );
501                 }
502                 try {
503                     total = Integer.valueOf( ( total_str ) ).intValue();
504                 }
505                 catch ( final NumberFormatException e ) {
506                     throw new IOException( "could not parse domain count from \"" + line + "\" [line " + line_number
507                             + "] in [" + getInputFile().getCanonicalPath() + "]" );
508                 }
509                 ++_domains_encountered;
510                 boolean failed_cutoff = false;
511                 if ( getIndividualDomainScoreCutoffs() != null ) {
512                     if ( getIndividualDomainScoreCutoffs().containsKey( id ) ) {
513                         final double cutoff = Double.parseDouble( getIndividualDomainScoreCutoffs().get( id ) );
514                         if ( score < cutoff ) {
515                             failed_cutoff = true;
516                         }
517                     }
518                     else {
519                         throw new IOException( "could not find a score cutoff value for domain id \"" + id
520                                 + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
521                     }
522                 }
523                 final String uc_id = id.toUpperCase();
524                 if ( failed_cutoff ) {
525                     ++_domains_ignored_due_to_individual_score_cutoff;
526                 }
527                 else if ( ( getEValueMaximum() != HmmPfamOutputParser.E_VALUE_MAXIMUM_DEFAULT )
528                         && ( e_value > getEValueMaximum() ) ) {
529                     ++_domains_ignored_due_to_e_value;
530                 }
531                 else if ( isIgnoreDufs() && uc_id.startsWith( "DUF" ) ) {
532                     ++_domains_ignored_due_to_duf;
533                 }
534                 else if ( isIgnoreVirusLikeIds()
535                         && ( uc_id.contains( VIR ) || uc_id.contains( PHAGE ) || uc_id.contains( RETRO )
536                                 || uc_id.contains( TRANSPOS ) || uc_id.startsWith( RV ) || uc_id.startsWith( GAG )
537                                 || uc_id.startsWith( HCV ) || uc_id.startsWith( HERPES ) ) ) {
538                     ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToVirusLikeIdCountsMap(), id );
539                     ++_domains_ignored_due_to_virus_like_id;
540                 }
541                 else if ( ( getFilterType() == FilterType.NEGATIVE_DOMAIN ) && getFilter().contains( id ) ) {
542                     ++_domains_ignored_due_to_negative_domain_filter;
543                     ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToNegativeDomainFilterCountsMap(), id );
544                 }
545                 else {
546                     final BasicDomain pd = new BasicDomain( id,
547                                                             from,
548                                                             to,
549                                                             ( short ) number,
550                                                             ( short ) total,
551                                                             e_value,
552                                                             score );
553                     current_protein.addProteinDomain( pd );
554                     ++_domains_stored;
555                 }
556             }
557         } // while ( ( line = br.readLine() ) != null )
558         setTime( new Date().getTime() - start_time );
559         if ( !saw_double_slash ) {
560             throw new IOException( "file ends unexpectedly [line " + line_number + "]" );
561         }
562         return proteins;
563     }
564
565     public void setAllowNonUniqueQuery( final boolean allow_non_unique_query ) {
566         _allow_non_unique_query = allow_non_unique_query;
567     }
568
569     private void setDomainsEncountered( final int domains_encountered ) {
570         _domains_encountered = domains_encountered;
571     }
572
573     private void setDomainsIgnoredDueToDuf( final int domains_ignored_due_to_duf ) {
574         _domains_ignored_due_to_duf = domains_ignored_due_to_duf;
575     }
576
577     public void setDomainsIgnoredDueToEval( final int domains_ignored_due_to_e_value ) {
578         _domains_ignored_due_to_e_value = domains_ignored_due_to_e_value;
579     }
580
581     public void setDomainsIgnoredDueToIndividualScoreCutoff( final int domains_ignored_due_to_individual_score_cutoff ) {
582         _domains_ignored_due_to_individual_score_cutoff = domains_ignored_due_to_individual_score_cutoff;
583     }
584
585     private void setDomainsIgnoredDueToNegativeDomainFilterCountsMap( final Map<String, Integer> domains_ignored_due_to_negative_domain_filter_counts_map ) {
586         _domains_ignored_due_to_negative_domain_filter_counts_map = domains_ignored_due_to_negative_domain_filter_counts_map;
587     }
588
589     private void setDomainsIgnoredDueToNegativeFilter( final int domains_ignored_due_to_negative_domain_filter ) {
590         _domains_ignored_due_to_negative_domain_filter = domains_ignored_due_to_negative_domain_filter;
591     }
592
593     private void setDomainsIgnoredDueToOverlap( final int domains_ignored_due_to_overlap ) {
594         _domains_ignored_due_to_overlap = domains_ignored_due_to_overlap;
595     }
596
597     private void setDomainsIgnoredDueToVirusLikeId( final int i ) {
598         _domains_ignored_due_to_virus_like_id = i;
599     }
600
601     private void setDomainsIgnoredDueToVirusLikeIdCountsMap( final Map<String, Integer> domains_ignored_due_to_virus_like_id_counts_map ) {
602         _domains_ignored_due_to_virus_like_id_counts_map = domains_ignored_due_to_virus_like_id_counts_map;
603     }
604
605     private void setDomainsStored( final int domains_stored ) {
606         _domains_stored = domains_stored;
607     }
608
609     private void setDomainsStoredSet( final SortedSet<String> _storeddomains_stored ) {
610         _domains_stored_set = _storeddomains_stored;
611     }
612
613     public void setEValueMaximum( final double e_value_maximum ) {
614         if ( e_value_maximum < 0.0 ) {
615             throw new IllegalArgumentException( "attempt to set the maximum E-value to a negative value" );
616         }
617         _e_value_maximum = e_value_maximum;
618     }
619
620     public void setIgnoreDufs( final boolean ignore_dufs ) {
621         _ignore_dufs = ignore_dufs;
622     }
623
624     /**
625      * To ignore domains which are completely engulfed by domains (individual
626      * ones or stretches of overlapping ones) with better support values.
627      * 
628      * 
629      * @param ignored_engulfed_domains
630      */
631     public void setIgnoreEngulfedDomains( final boolean ignore_engulfed_domains ) {
632         _ignore_engulfed_domains = ignore_engulfed_domains;
633     }
634
635     public void setIgnoreVirusLikeIds( final boolean ignore_virus_like_ids ) {
636         _ignore_virus_like_ids = ignore_virus_like_ids;
637     }
638
639     /**
640      * Sets the individual domain score cutoff values (for example, gathering
641      * thresholds from Pfam). Domain ids are the keys, cutoffs the values.
642      * 
643      * @param individual_domain_score_cutoffs
644      */
645     public void setIndividualDomainScoreCutoffs( final Map<String, String> individual_domain_score_cutoffs ) {
646         _individual_domain_score_cutoffs = individual_domain_score_cutoffs;
647     }
648
649     public void setMaxAllowedOverlap( final int max_allowed_overlap ) {
650         if ( max_allowed_overlap < 0 ) {
651             throw new IllegalArgumentException( "Attempt to set max allowed overlap to less than zero." );
652         }
653         _max_allowed_overlap = max_allowed_overlap;
654     }
655
656     private void setProteinsEncountered( final int proteins_encountered ) {
657         _proteins_encountered = proteins_encountered;
658     }
659
660     private void setProteinsIgnoredDueToFilter( final int proteins_ignored_due_to_filter ) {
661         _proteins_ignored_due_to_filter = proteins_ignored_due_to_filter;
662     }
663
664     private void setProteinsStored( final int proteins_stored ) {
665         _proteins_stored = proteins_stored;
666     }
667
668     public void setReturnType( final ReturnType return_type ) {
669         _return_type = return_type;
670     }
671
672     private void setTime( final long time ) {
673         _time = time;
674     }
675
676     public void setVerbose( final boolean verbose ) {
677         _verbose = verbose;
678     }
679
680     public static enum FilterType {
681         NONE, POSITIVE_PROTEIN, NEGATIVE_PROTEIN, NEGATIVE_DOMAIN
682     }
683
684     public static enum ReturnType {
685         UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN
686     }
687 }