X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Futil%2FForesterUtil.java;h=a8c81b0dd23ae214f2280b324a659ae3f3f544f2;hb=aec065f948a075773794133f102ea19eb1d59f64;hp=cb04e6521e93833a056a9756c4cad769ed6647ac;hpb=cc75486aa58b98ab6fa53d8de4cb9b984a86bf83;p=jalview.git diff --git a/forester/java/src/org/forester/util/ForesterUtil.java b/forester/java/src/org/forester/util/ForesterUtil.java index cb04e65..a8c81b0 100644 --- a/forester/java/src/org/forester/util/ForesterUtil.java +++ b/forester/java/src/org/forester/util/ForesterUtil.java @@ -21,7 +21,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.util; @@ -59,6 +59,7 @@ import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; +import java.util.regex.Matcher; import java.util.regex.Pattern; import org.forester.phylogeny.PhylogenyNode; @@ -82,6 +83,14 @@ public final class ForesterUtil { public static final NumberFormat FORMATTER_6; public static final NumberFormat FORMATTER_06; public static final NumberFormat FORMATTER_3; + public static final String NCBI_PROTEIN = "http://www.ncbi.nlm.nih.gov/protein/"; + public static final String NCBI_NUCCORE = "http://www.ncbi.nlm.nih.gov/nuccore/"; + public final static String UNIPROT_KB = "http://www.uniprot.org/uniprot/"; + public final static Pattern UNIPROT_KB_PATTERN_1 = Pattern + .compile( "(?:\\b|_)(?:sp|tr)[\\.|\\-_=/\\\\]([A-Z][0-9][A-Z0-9]{3}[0-9])(?:\\b|_)" ); + public final static Pattern UNIPROT_KB_PATTERN_2 = Pattern + .compile( "\\b(?:[A-Z0-9]{2,5}|(?:[A-Z][0-9][A-Z0-9]{3}[0-9]))_(([A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA)\\b" ); + public static final String NCBI_GI = "http://www.ncbi.nlm.nih.gov/protein/gi:"; static { final DecimalFormatSymbols dfs = new DecimalFormatSymbols(); dfs.setDecimalSeparator( '.' ); @@ -95,6 +104,124 @@ public final class ForesterUtil { private ForesterUtil() { } + public static String extractRefSeqAccessorAccessor( final PhylogenyNode node ) { + String v = null; + if ( node.getNodeData().isHasSequence() ) { + final Sequence seq = node.getNodeData().getSequence(); + if ( !isEmpty( seq.getSymbol() ) ) { + v = SequenceIdParser.parseRefSeqAccessor( seq.getSymbol() ); + } + if ( isEmpty( v ) && !isEmpty( seq.getName() ) ) { + v = SequenceIdParser.parseRefSeqAccessor( seq.getName() ); + } + if ( isEmpty( v ) && ( node.getNodeData().getSequence().getAccession() != null ) + && !isEmpty( seq.getAccession().getValue() ) ) { + v = SequenceIdParser.parseRefSeqAccessor( seq.getAccession().getValue() ); + } + } + if ( isEmpty( v ) && !isEmpty( node.getName() ) ) { + v = SequenceIdParser.parseRefSeqAccessor( node.getName() ); + } + return v; + } + + public static String extractGenbankAccessor( final PhylogenyNode node ) { + String v = null; + if ( node.getNodeData().isHasSequence() ) { + final Sequence seq = node.getNodeData().getSequence(); + if ( !isEmpty( seq.getSymbol() ) ) { + v = SequenceIdParser.parseGenbankAccessor( seq.getSymbol() ); + } + if ( isEmpty( v ) && !isEmpty( seq.getName() ) ) { + v = SequenceIdParser.parseGenbankAccessor( seq.getName() ); + } + if ( isEmpty( v ) && ( node.getNodeData().getSequence().getAccession() != null ) + && !isEmpty( seq.getAccession().getValue() ) ) { + v = SequenceIdParser.parseGenbankAccessor( seq.getAccession().getValue() ); + } + } + if ( isEmpty( v ) && !isEmpty( node.getName() ) ) { + v = SequenceIdParser.parseGenbankAccessor( node.getName() ); + } + return v; + } + + public static String extractGInumber( final PhylogenyNode node ) { + String v = null; + if ( node.getNodeData().isHasSequence() ) { + final Sequence seq = node.getNodeData().getSequence(); + if ( isEmpty( v ) && !isEmpty( seq.getName() ) ) { + v = SequenceIdParser.parseGInumber( seq.getName() ); + } + if ( isEmpty( v ) && ( node.getNodeData().getSequence().getAccession() != null ) + && !isEmpty( seq.getAccession().getValue() ) ) { + v = SequenceIdParser.parseGInumber( seq.getAccession().getValue() ); + } + } + if ( isEmpty( v ) && !isEmpty( node.getName() ) ) { + v = SequenceIdParser.parseGInumber( node.getName() ); + } + return v; + } + + public static String extractUniProtKbProteinSeqIdentifier( final PhylogenyNode node ) { + String upkb = null; + if ( node.getNodeData().isHasSequence() ) { + final Sequence seq = node.getNodeData().getSequence(); + Matcher m; + if ( !isEmpty( seq.getSymbol() ) ) { + m = UNIPROT_KB_PATTERN_1.matcher( seq.getSymbol() ); + if ( m.find() ) { + upkb = m.group( 1 ); + } + else { + m = UNIPROT_KB_PATTERN_2.matcher( seq.getSymbol() ); + if ( m.find() ) { + upkb = m.group(); + } + } + } + if ( isEmpty( upkb ) && !isEmpty( seq.getName() ) ) { + m = UNIPROT_KB_PATTERN_1.matcher( seq.getName() ); + if ( m.find() ) { + upkb = m.group( 1 ); + } + else { + m = UNIPROT_KB_PATTERN_2.matcher( seq.getName() ); + if ( m.find() ) { + upkb = m.group(); + } + } + } + if ( isEmpty( upkb ) && ( node.getNodeData().getSequence().getAccession() != null ) + && !isEmpty( seq.getAccession().getValue() ) ) { + m = UNIPROT_KB_PATTERN_1.matcher( seq.getAccession().getValue() ); + if ( m.find() ) { + upkb = m.group( 1 ); + } + else { + m = UNIPROT_KB_PATTERN_2.matcher( seq.getAccession().getValue() ); + if ( m.find() ) { + upkb = m.group(); + } + } + } + } + if ( isEmpty( upkb ) && !isEmpty( node.getName() ) ) { + final Matcher m1 = UNIPROT_KB_PATTERN_1.matcher( node.getName() ); + if ( m1.find() ) { + upkb = m1.group( 1 ); + } + else { + final Matcher m2 = UNIPROT_KB_PATTERN_2.matcher( node.getName() ); + if ( m2.find() ) { + upkb = m2.group(); + } + } + } + return upkb; + } + final public static void appendSeparatorIfNotEmpty( final StringBuffer sb, final char separator ) { if ( sb.length() > 0 ) { sb.append( separator ); @@ -996,6 +1123,10 @@ public final class ForesterUtil { System.out.print( "]" ); } + public final static void updateProgress( final int i, final DecimalFormat f ) { + System.out.print( "\r[" + f.format( i ) + "]" ); + } + public final static String wordWrap( final String str, final int width ) { final StringBuilder sb = new StringBuilder( str ); int start = 0;