3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 // Contact: phylosoft @ gmail . com
25 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
27 package org.forester.protein;
29 import java.util.ArrayList;
30 import java.util.Collections;
31 import java.util.Comparator;
32 import java.util.List;
33 import java.util.SortedSet;
34 import java.util.TreeSet;
36 import org.forester.species.BasicSpecies;
37 import org.forester.species.Species;
38 import org.forester.util.ForesterUtil;
40 // Note: when implementing any "equals" method need to keep in mind that
41 // proteins could have the same name and/or id!
42 public class BasicProtein implements Protein {
44 private final ProteinId _id;
45 private final int _length;
46 private final Species _species;
49 private String _accession;
50 private final List<Domain> _protein_domains;
51 public static Comparator<Domain> DomainMidPositionComparator = new Comparator<Domain>() {
54 public int compare( final Domain d1,
56 final int m1 = ( d1.getTo() + d1.getFrom() );
57 final int m2 = ( d2.getTo() + d2.getFrom() );
66 public BasicProtein( final String id_str, final String species_str, final int length ) {
68 throw new IllegalArgumentException( "attempt to create protein of length " + length );
70 if ( ForesterUtil.isEmpty( id_str ) ) {
71 throw new IllegalArgumentException( "attempt to create protein with null or empty identifier" );
73 if ( ForesterUtil.isEmpty( species_str ) ) {
74 throw new IllegalArgumentException( "attempt to create protein with null or empty species" );
76 _id = new ProteinId( id_str );
77 _species = new BasicSpecies( species_str );
79 _protein_domains = new ArrayList<Domain>();
84 public void addProteinDomain( final Domain protein_domain ) {
85 getProteinDomains().add( protein_domain );
90 * If in_nc_order is set to true, this returns true only and only if
91 * the order in List 'domains' and this protein (as determined by the start positions
92 * of the domains of this proteins, _not_ by their index) are the same
93 * (interspersing, 'other', domains in this are ignored).
94 * If in_nc_order is set to false, this returns true only and only if
95 * this contains all domains listed in 'domains' (order and count do not matter).
97 * @param domains a list of domain ids in a certain order.
98 * @param in_nc_order to consider order
101 public boolean contains( final List<String> query_domain_ids, final boolean in_nc_order ) {
102 if ( !in_nc_order ) {
103 for( final String query_domain_id : query_domain_ids ) {
104 if ( !getProteinDomainIds().contains( query_domain_id ) ) {
111 int current_start_position = -1;
112 I: for( final String query_domain_id : query_domain_ids ) {
113 if ( getProteinDomainIds().contains( query_domain_id ) ) {
114 final List<Domain> found_domains = getProteinDomains( query_domain_id );
115 final SortedSet<Integer> ordered_start_positions = new TreeSet<Integer>();
116 for( final Domain found_domain : found_domains ) {
117 ordered_start_positions.add( found_domain.getFrom() );
119 for( final int start_position : ordered_start_positions ) {
120 if ( start_position > current_start_position ) {
121 current_start_position = start_position;
136 public String getAccession() {
141 public String getDescription() {
146 public List<Domain> getDomainsSortedByPosition() {
147 final List<Domain> domains = new ArrayList<Domain>( getProteinDomains().size() );
148 for( final Domain domain : getProteinDomains() ) {
149 domains.add( domain );
151 Collections.sort( domains, DomainMidPositionComparator );
156 public int getLength() {
161 public String getName() {
166 public int getNumberOfProteinDomains() {
167 return getProteinDomains().size();
171 public Domain getProteinDomain( final int index ) {
172 return _protein_domains.get( index );
176 public int getProteinDomainCount( final String domain_id ) {
177 return getProteinDomains( domain_id ).size();
181 public List<Domain> getProteinDomains() {
182 return _protein_domains;
186 public List<Domain> getProteinDomains( final String domain_id ) {
187 final List<Domain> domains = new ArrayList<Domain>();
188 for( final Domain domain : getProteinDomains() ) {
189 if ( domain.getDomainId().equals( domain_id ) ) {
190 domains.add( domain );
197 public ProteinId getProteinId() {
202 public Species getSpecies() {
206 public void setAccession( final String accession ) {
207 _accession = accession;
210 public void setDescription( final String description ) {
214 public void setName( final String name ) {
219 public final String toDomainArchitectureString( final String separator, final double ie_cutoff ) {
220 final StringBuilder sb = new StringBuilder();
221 boolean first = true;
222 for( final Domain d : getDomainsSortedByPosition() ) {
223 if ( ( ie_cutoff <= -1 ) || ( d.getPerDomainEvalue() <= ie_cutoff ) ) {
228 sb.append( separator );
230 sb.append( d.getDomainId() );
233 return sb.toString();
238 public final String toDomainArchitectureString( final String separator ) {
239 return toDomainArchitectureString( separator, -1 );
242 public String toDomainArchitectureString( final String separator,
243 final int repeats_limit,
244 final String repeat_separator ) {
245 if ( repeats_limit < 3 ) {
246 throw new IllegalArgumentException( "repeats limit cannot be smaller than 3" );
248 final StringBuilder sb = new StringBuilder();
249 StringBuilder buffer = new StringBuilder();
252 for( final Domain d : getDomainsSortedByPosition() ) {
253 final String id = d.getDomainId();
254 if ( prev_id.equals( id ) ) {
260 buffer = new StringBuilder();
262 if ( counter < repeats_limit ) {
264 buffer.append( separator );
266 else if ( counter == repeats_limit ) {
267 buffer = new StringBuilder();
269 buffer.append( repeat_separator );
271 buffer.append( repeat_separator );
273 buffer.append( separator );
277 sb.append( buffer.substring( 0, buffer.length() - 1 ) );
278 return sb.toString();
282 public String toString() {
283 return toDomainArchitectureString( "--", 1 );
286 private List<String> getProteinDomainIds() {
287 final List<String> ids = new ArrayList<String>( getProteinDomains().size() );
288 for( final Domain domain : getProteinDomains() ) {
289 ids.add( domain.getDomainId() );
294 private void init() {