// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
// Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
package org.forester.protein;
import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;
// proteins could have the same name and/or id!
public class BasicProtein implements Protein {
- private final ProteinId _id;
- private final int _length;
- private final Species _species;
- private String _name;
- private String _desc;
- private String _accession;
- private final List<Domain> _protein_domains;
+ private final ProteinId _id;
+ private final int _length;
+ private final Species _species;
+ private String _name;
+ private String _desc;
+ private String _accession;
+ private final List<Domain> _protein_domains;
+ public static Comparator<Domain> DomainMidPositionComparator = new Comparator<Domain>() {
+
+ @Override
+ public int compare( final Domain d1,
+ final Domain d2 ) {
+ final int m1 = ( d1.getTo() + d1.getFrom() );
+ final int m2 = ( d2.getTo() + d2.getFrom() );
+ return m1 < m2 ? -1 : m1 > m2 ? 1 : d1
+ .getDomainId()
+ .compareTo( d2.getDomainId() );
+ }
+ };
public BasicProtein( final String id_str, final String species_str, final int length ) {
if ( length < 0 ) {
* @param in_nc_order to consider order
* @return
*/
- public boolean contains( final List<DomainId> query_domain_ids, final boolean in_nc_order ) {
+ public boolean contains( final List<String> query_domain_ids, final boolean in_nc_order ) {
if ( !in_nc_order ) {
- for( final DomainId query_domain_id : query_domain_ids ) {
+ for( final String query_domain_id : query_domain_ids ) {
if ( !getProteinDomainIds().contains( query_domain_id ) ) {
return false;
}
}
else {
int current_start_position = -1;
- I: for( final DomainId query_domain_id : query_domain_ids ) {
+ I: for( final String query_domain_id : query_domain_ids ) {
if ( getProteinDomainIds().contains( query_domain_id ) ) {
final List<Domain> found_domains = getProteinDomains( query_domain_id );
final SortedSet<Integer> ordered_start_positions = new TreeSet<Integer>();
}
@Override
+ public List<Domain> getDomainsSortedByPosition() {
+ final List<Domain> domains = new ArrayList<Domain>( getProteinDomains().size() );
+ for( final Domain domain : getProteinDomains() ) {
+ domains.add( domain );
+ }
+ Collections.sort( domains, DomainMidPositionComparator );
+ return domains;
+ }
+
+ @Override
+ public int getLength() {
+ return _length;
+ }
+
+ @Override
public String getName() {
return _name;
}
}
@Override
- public int getProteinDomainCount( final DomainId domain_id ) {
+ public int getProteinDomainCount( final String domain_id ) {
return getProteinDomains( domain_id ).size();
}
- private List<DomainId> getProteinDomainIds() {
- final List<DomainId> ids = new ArrayList<DomainId>( getProteinDomains().size() );
- for( final Domain domain : getProteinDomains() ) {
- ids.add( domain.getDomainId() );
- }
- return ids;
- }
-
@Override
public List<Domain> getProteinDomains() {
return _protein_domains;
}
@Override
- public List<Domain> getProteinDomains( final DomainId domain_id ) {
+ public List<Domain> getProteinDomains( final String domain_id ) {
final List<Domain> domains = new ArrayList<Domain>();
for( final Domain domain : getProteinDomains() ) {
if ( domain.getDomainId().equals( domain_id ) ) {
return _species;
}
- private void init() {
- _desc = "";
- _accession = "";
- _name = "";
- }
-
public void setAccession( final String accession ) {
_accession = accession;
}
_name = name;
}
+ public String toDomainArchitectureString( final String separator ) {
+ final StringBuilder sb = new StringBuilder();
+ boolean first = true;
+ for( final Domain d : getDomainsSortedByPosition() ) {
+ if ( first ) {
+ first = false;
+ }
+ else {
+ sb.append( separator );
+ }
+ sb.append( d.getDomainId() );
+ }
+ return sb.toString();
+ }
+
+ public String toDomainArchitectureString( final String separator,
+ final int repeats_limit,
+ final String repeat_separator ) {
+ if ( repeats_limit < 3 ) {
+ throw new IllegalArgumentException( "repeats limit cannot be smaller than 3" );
+ }
+ final StringBuilder sb = new StringBuilder();
+ StringBuilder buffer = new StringBuilder();
+ String prev_id = "";
+ int counter = 1;
+ for( final Domain d : getDomainsSortedByPosition() ) {
+ final String id = d.getDomainId();
+ if ( prev_id.equals( id ) ) {
+ counter++;
+ }
+ else {
+ counter = 1;
+ sb.append( buffer );
+ buffer = new StringBuilder();
+ }
+ if ( counter < repeats_limit ) {
+ buffer.append( id );
+ buffer.append( separator );
+ }
+ else if ( counter == repeats_limit ) {
+ buffer = new StringBuilder();
+ buffer.append( id );
+ buffer.append( repeat_separator );
+ buffer.append( id );
+ buffer.append( repeat_separator );
+ buffer.append( id );
+ buffer.append( separator );
+ }
+ prev_id = id;
+ }
+ sb.append( buffer.substring( 0, buffer.length() - 1 ) );
+ return sb.toString();
+ }
+
@Override
- public int getLength() {
- return _length;
+ public String toString() {
+ return toDomainArchitectureString( "~" );
+ }
+
+ private List<String> getProteinDomainIds() {
+ final List<String> ids = new ArrayList<String>( getProteinDomains().size() );
+ for( final Domain domain : getProteinDomains() ) {
+ ids.add( domain.getDomainId() );
+ }
+ return ids;
+ }
+
+ private void init() {
+ _desc = "";
+ _accession = "";
+ _name = "";
}
}