From: cmzmasek@gmail.com Date: Thu, 29 Mar 2012 01:00:16 +0000 (+0000) Subject: in progress X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=3be5f9e46469980f068a9f8170e1d20849b5eaf8;p=jalview.git in progress --- diff --git a/forester/java/src/org/forester/archaeopteryx/AptxUtil.java b/forester/java/src/org/forester/archaeopteryx/AptxUtil.java index cb3a2c6..cc0c365 100644 --- a/forester/java/src/org/forester/archaeopteryx/AptxUtil.java +++ b/forester/java/src/org/forester/archaeopteryx/AptxUtil.java @@ -50,6 +50,8 @@ import java.util.Map; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import javax.imageio.IIOImage; import javax.imageio.ImageIO; @@ -68,6 +70,7 @@ import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; +import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.BranchColor; import org.forester.phylogeny.data.Distribution; import org.forester.phylogeny.data.Sequence; @@ -82,12 +85,38 @@ import org.forester.ws.uniprot.UniProtTaxonomy; public final class AptxUtil { + private final static Pattern seq_identifier_pattern_1 = Pattern + .compile( "^([A-Za-z]{2,5})[|=:]([0-9A-Za-z_\\.]{5,40})\\s*$" ); + private final static Pattern seq_identifier_pattern_2 = Pattern + .compile( "^([A-Za-z]{2,5})[|=:]([0-9A-Za-z_\\.]{5,40})[|,; ].*$" ); private final static String[] AVAILABLE_FONT_FAMILIES_SORTED = GraphicsEnvironment.getLocalGraphicsEnvironment() .getAvailableFontFamilyNames(); static { Arrays.sort( AVAILABLE_FONT_FAMILIES_SORTED ); } + public final static Accession obtainSequenceAccessionFromName( final String sequence_name ) { + final String n = sequence_name.trim(); + final Matcher matcher1 = seq_identifier_pattern_1.matcher( n ); + String group1 = ""; + String group2 = ""; + if ( matcher1.matches() ) { + group1 = matcher1.group( 1 ); + group2 = matcher1.group( 2 ); + } + else { + final Matcher matcher2 = seq_identifier_pattern_2.matcher( n ); + if ( matcher2.matches() ) { + group1 = matcher2.group( 1 ); + group2 = matcher2.group( 2 ); + } + } + if ( ForesterUtil.isEmpty( group1 ) || ForesterUtil.isEmpty( group2 ) ) { + return null; + } + return new Accession( group2, group1 ); + } + public static void ensurePresenceOfTaxonomy( final PhylogenyNode node ) { if ( !node.getNodeData().isHasTaxonomy() ) { node.getNodeData().setTaxonomy( new Taxonomy() ); diff --git a/forester/java/src/org/forester/archaeopteryx/Constants.java b/forester/java/src/org/forester/archaeopteryx/Constants.java index 9cf4f40..f67902c 100644 --- a/forester/java/src/org/forester/archaeopteryx/Constants.java +++ b/forester/java/src/org/forester/archaeopteryx/Constants.java @@ -37,13 +37,13 @@ import org.forester.util.ForesterConstants; public final class Constants { final static boolean __ALLOW_PHYLOGENETIC_INFERENCE = false; - public final static boolean __RELEASE = false; // TODO remove me + public final static boolean __RELEASE = true; // TODO remove me public final static boolean __SNAPSHOT_RELEASE = true; // TODO remove me public final static boolean __SYNTH_LF = false; // TODO remove me public final static boolean ALLOW_DDBJ_BLAST = false; public final static String PRG_NAME = "Archaeopteryx"; - final static String VERSION = "0.969 NM"; - final static String PRG_DATE = "2012.03.05"; + final static String VERSION = "0.970 9M"; + final static String PRG_DATE = "2012.03.28"; final static String DEFAULT_CONFIGURATION_FILE_NAME = "_aptx_configuration_file"; final static String[] DEFAULT_FONT_CHOICES = { "Verdana", "Tahoma", "Arial", "Helvetica", "Dialog", "Lucida Sans", "SansSerif", "Sans-serif", "Sans" }; diff --git a/forester/java/src/org/forester/archaeopteryx/TreePanel.java b/forester/java/src/org/forester/archaeopteryx/TreePanel.java index 06d4e2f..18d4089 100644 --- a/forester/java/src/org/forester/archaeopteryx/TreePanel.java +++ b/forester/java/src/org/forester/archaeopteryx/TreePanel.java @@ -465,12 +465,26 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee } if ( node.getNodeData().isHasSequence() ) { final String query = Blast.obtainQueryForBlast( node ); + boolean nucleotide = false; + if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getType() ) ) { + if ( !node.getNodeData().getSequence().getType().toLowerCase().equals( PhyloXmlUtil.SEQ_TYPE_PROTEIN ) ) { + nucleotide = true; + } + } + else if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getMolecularSequence() ) ) { + nucleotide = !ForesterUtil.seqIsLikelyToBeAa( node.getNodeData().getSequence().getMolecularSequence() ); + } if ( !ForesterUtil.isEmpty( query ) ) { JApplet applet = null; if ( isApplet() ) { applet = obtainApplet(); } - Blast.NcbiBlastWeb( query, applet, this ); + try { + Blast.openNcbiBlastWeb( query, nucleotide, applet, this ); + } + catch ( final Exception e ) { + e.printStackTrace(); + } if ( Constants.ALLOW_DDBJ_BLAST ) { try { System.out.println( "trying: " + query ); @@ -1438,11 +1452,12 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee } final private boolean isCanBlast( final PhylogenyNode node ) { - return ( node.getNodeData().isHasSequence() && ( ( ( node.getNodeData().getSequence().getAccession() != null ) && !ForesterUtil - .isEmpty( node.getNodeData().getSequence().getAccession().getValue() ) ) - || !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) - || !ForesterUtil.isEmpty( node.getNodeData().getSequence().getSymbol() ) || !ForesterUtil.isEmpty( node - .getNodeData().getSequence().getMolecularSequence() ) ) ); + return ( node.getNodeData().isHasSequence() + && ( ( ( node.getNodeData().getSequence().getAccession() != null ) && !ForesterUtil.isEmpty( node + .getNodeData().getSequence().getAccession().getValue() ) ) + || !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) || !ForesterUtil + .isEmpty( node.getNodeData().getSequence().getMolecularSequence() ) ) && Blast + .isContainsQueryForBlast( node ) ); } final boolean isCanCollapse() { diff --git a/forester/java/src/org/forester/archaeopteryx/tools/Blast.java b/forester/java/src/org/forester/archaeopteryx/tools/Blast.java index 38519e4..97120ad 100644 --- a/forester/java/src/org/forester/archaeopteryx/tools/Blast.java +++ b/forester/java/src/org/forester/archaeopteryx/tools/Blast.java @@ -32,30 +32,32 @@ import java.util.Arrays; import java.util.Enumeration; import java.util.Hashtable; import java.util.Vector; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import javax.swing.JApplet; import org.forester.archaeopteryx.AptxUtil; import org.forester.archaeopteryx.TreePanel; import org.forester.phylogeny.PhylogenyNode; +import org.forester.phylogeny.data.Accession; import org.forester.util.ForesterUtil; import org.forester.ws.wabi.RestUtil; -public class Blast { +public final class Blast { - final static Pattern identifier_pattern_1 = Pattern.compile( "^([A-Za-z]{2,5})[|=:]([0-9A-Za-z\\.]{4,40})\\s*$" ); - final static Pattern identifier_pattern_2 = Pattern - .compile( "^([A-Za-z]{2,5})[|=:]([0-9A-Za-z\\.]{4,40})[|,; ].*$" ); - - public Blast() { - } - - public static void NcbiBlastWeb( final String query, final JApplet applet, final TreePanel p ) { + final public static void openNcbiBlastWeb( final String query, + final boolean is_nucleic_acids, + final JApplet applet, + final TreePanel p ) { //http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Web&PAGE=Proteins&DATABASE=swissprot&QUERY=gi|163848401 final StringBuilder uri_str = new StringBuilder(); - uri_str.append( "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Web&DATABASE=nr&PAGE=Proteins&QUERY=" ); + uri_str.append( "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Web&DATABASE=nr&PAGE=" ); + if ( is_nucleic_acids ) { + uri_str.append( "Nucleotide" ); + } + else { + uri_str.append( "Proteins" ); + } + uri_str.append( "&QUERY=" ); uri_str.append( query ); try { AptxUtil.launchWebBrowser( new URI( uri_str.toString() ), applet != null, applet, "_aptx_blast" ); @@ -70,7 +72,7 @@ public class Blast { } } - public static String obtainQueryForBlast( final PhylogenyNode node ) { + final public static String obtainQueryForBlast( final PhylogenyNode node ) { String query = ""; if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getMolecularSequence() ) ) { query = node.getNodeData().getSequence().getMolecularSequence(); @@ -83,32 +85,19 @@ public class Blast { query += node.getNodeData().getSequence().getAccession().getValue(); } else if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) ) { - final String name = node.getNodeData().getSequence().getName(); - final Matcher matcher1 = identifier_pattern_1.matcher( name ); - final Matcher matcher2 = identifier_pattern_2.matcher( name ); - String group1 = ""; - String group2 = ""; - if ( matcher1.matches() ) { - group1 = matcher1.group( 1 ); - group2 = matcher1.group( 2 ); - System.out.println( "1 1=" + group1 ); - System.out.println( "1 2=" + group2 ); - } - if ( matcher2.matches() ) { - group1 = matcher2.group( 1 ); - group2 = matcher2.group( 2 ); - System.out.println( "2 1=" + group1 ); - System.out.println( "2 2=" + group2 ); - } - if ( !ForesterUtil.isEmpty( group1 ) && !ForesterUtil.isEmpty( group2 ) ) { - query = group1 + "%7C" + group2; + final Accession acc = AptxUtil.obtainSequenceAccessionFromName( node.getNodeData().getSequence().getName() ); + if ( acc != null ) { + query = acc.getSource() + "%7C" + acc.getValue(); } } - System.out.println( query ); return query; } - public void ddbjBlast( final String geneName ) { + final public static boolean isContainsQueryForBlast( final PhylogenyNode node ) { + return !ForesterUtil.isEmpty( obtainQueryForBlast( node ) ); + } + + final public void ddbjBlast( final String geneName ) { // Retrieve accession number list which has specified gene name from searchByXMLPath of ARSA. Please click here for details of ARSA. /*target: Sequence length is between 300bp and 1000bp. Feature key is CDS. diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlUtil.java b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlUtil.java index 156c65a..9ccfdf9 100644 --- a/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlUtil.java +++ b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlUtil.java @@ -47,10 +47,13 @@ public final class PhyloXmlUtil { public static final String VECTOR_PROPERTY_REF = "vector:index="; public static final String VECTOR_PROPERTY_TYPE = "xsd:decimal"; public static final String UNIPROT_TAX_PROVIDER = "uniprot"; + public static final String SEQ_TYPE_RNA = "rna"; + public static final String SEQ_TYPE_DNA = "dna"; + public static final String SEQ_TYPE_PROTEIN = "protein"; static { - SEQUENCE_TYPES.add( "rna" ); - SEQUENCE_TYPES.add( "protein" ); - SEQUENCE_TYPES.add( "dna" ); + SEQUENCE_TYPES.add( SEQ_TYPE_RNA ); + SEQUENCE_TYPES.add( SEQ_TYPE_PROTEIN ); + SEQUENCE_TYPES.add( SEQ_TYPE_DNA ); TAXONOMY_RANKS_LIST.add( "domain" ); TAXONOMY_RANKS_LIST.add( "superkingdom" ); TAXONOMY_RANKS_LIST.add( "kingdom" ); diff --git a/forester/java/src/org/forester/util/ForesterUtil.java b/forester/java/src/org/forester/util/ForesterUtil.java index de93bd1..58dbf45 100644 --- a/forester/java/src/org/forester/util/ForesterUtil.java +++ b/forester/java/src/org/forester/util/ForesterUtil.java @@ -96,6 +96,17 @@ public final class ForesterUtil { } } + public static boolean seqIsLikelyToBeAa( final String s ) { + final String seq = s.toLowerCase(); + if ( ( seq.indexOf( 'r' ) > -1 ) || ( seq.indexOf( 'd' ) > -1 ) || ( seq.indexOf( 'e' ) > -1 ) + || ( seq.indexOf( 'q' ) > -1 ) || ( seq.indexOf( 'h' ) > -1 ) || ( seq.indexOf( 'k' ) > -1 ) + || ( seq.indexOf( 'w' ) > -1 ) || ( seq.indexOf( 's' ) > -1 ) || ( seq.indexOf( 'm' ) > -1 ) + || ( seq.indexOf( 'p' ) > -1 ) || ( seq.indexOf( 'v' ) > -1 ) ) { + return true; + } + return false; + } + /** * This calculates a color. If value is equal to min the returned color is * minColor, if value is equal to max the returned color is maxColor,