X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Farchaeopteryx%2Ftools%2FBlast.java;h=839a328fb61a0daf20b78c04a51906d924fb219e;hb=a9cc2cc5c9cda0ab49f26ccfb7a1aa30531e6ecf;hp=38519e443d366905923c42ed480d7e358a4b1b56;hpb=0feab495c350ff488c86253826bca4b202c46f4d;p=jalview.git diff --git a/forester/java/src/org/forester/archaeopteryx/tools/Blast.java b/forester/java/src/org/forester/archaeopteryx/tools/Blast.java index 38519e4..839a328 100644 --- a/forester/java/src/org/forester/archaeopteryx/tools/Blast.java +++ b/forester/java/src/org/forester/archaeopteryx/tools/Blast.java @@ -21,7 +21,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx.tools; @@ -32,30 +32,33 @@ import java.util.Arrays; import java.util.Enumeration; import java.util.Hashtable; import java.util.Vector; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import javax.swing.JApplet; import org.forester.archaeopteryx.AptxUtil; import org.forester.archaeopteryx.TreePanel; import org.forester.phylogeny.PhylogenyNode; +import org.forester.phylogeny.data.Identifier; import org.forester.util.ForesterUtil; +import org.forester.util.SequenceIdParser; import org.forester.ws.wabi.RestUtil; -public class Blast { +public final class Blast { - final static Pattern identifier_pattern_1 = Pattern.compile( "^([A-Za-z]{2,5})[|=:]([0-9A-Za-z\\.]{4,40})\\s*$" ); - final static Pattern identifier_pattern_2 = Pattern - .compile( "^([A-Za-z]{2,5})[|=:]([0-9A-Za-z\\.]{4,40})[|,; ].*$" ); - - public Blast() { - } - - public static void NcbiBlastWeb( final String query, final JApplet applet, final TreePanel p ) { + final public static void openNcbiBlastWeb( final String query, + final boolean is_nucleic_acids, + final JApplet applet, + final TreePanel p ) { //http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Web&PAGE=Proteins&DATABASE=swissprot&QUERY=gi|163848401 final StringBuilder uri_str = new StringBuilder(); - uri_str.append( "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Web&DATABASE=nr&PAGE=Proteins&QUERY=" ); + uri_str.append( "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Web&DATABASE=nr&PAGE=" ); + if ( is_nucleic_acids ) { + uri_str.append( "Nucleotide" ); + } + else { + uri_str.append( "Proteins" ); + } + uri_str.append( "&QUERY=" ); uri_str.append( query ); try { AptxUtil.launchWebBrowser( new URI( uri_str.toString() ), applet != null, applet, "_aptx_blast" ); @@ -70,45 +73,47 @@ public class Blast { } } - public static String obtainQueryForBlast( final PhylogenyNode node ) { + final public static String obtainQueryForBlast( final PhylogenyNode node ) { String query = ""; - if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getMolecularSequence() ) ) { - query = node.getNodeData().getSequence().getMolecularSequence(); - } - else if ( ( node.getNodeData().getSequence().getAccession() != null ) - && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getValue() ) ) { - if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getSource() ) ) { - query = node.getNodeData().getSequence().getAccession().getSource() + "%7C"; + if ( node.getNodeData().isHasSequence() ) { + if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getMolecularSequence() ) ) { + query = node.getNodeData().getSequence().getMolecularSequence(); + } + if ( ForesterUtil.isEmpty( query ) && ( node.getNodeData().getSequence().getAccession() != null ) + && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getValue() ) ) { + final Identifier id = SequenceIdParser.parse( node.getNodeData().getSequence().getAccession() + .getValue() ); + if ( id != null ) { + query = id.getValue(); + } } - query += node.getNodeData().getSequence().getAccession().getValue(); - } - else if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) ) { - final String name = node.getNodeData().getSequence().getName(); - final Matcher matcher1 = identifier_pattern_1.matcher( name ); - final Matcher matcher2 = identifier_pattern_2.matcher( name ); - String group1 = ""; - String group2 = ""; - if ( matcher1.matches() ) { - group1 = matcher1.group( 1 ); - group2 = matcher1.group( 2 ); - System.out.println( "1 1=" + group1 ); - System.out.println( "1 2=" + group2 ); + if ( ForesterUtil.isEmpty( query ) && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) ) { + final Identifier id = SequenceIdParser.parse( node.getNodeData().getSequence().getName() ); + if ( id != null ) { + query = id.getValue(); + } } - if ( matcher2.matches() ) { - group1 = matcher2.group( 1 ); - group2 = matcher2.group( 2 ); - System.out.println( "2 1=" + group1 ); - System.out.println( "2 2=" + group2 ); + if ( ForesterUtil.isEmpty( query ) && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getSymbol() ) ) { + final Identifier id = SequenceIdParser.parse( node.getNodeData().getSequence().getSymbol() ); + if ( id != null ) { + query = id.getValue(); + } } - if ( !ForesterUtil.isEmpty( group1 ) && !ForesterUtil.isEmpty( group2 ) ) { - query = group1 + "%7C" + group2; + } + if ( ForesterUtil.isEmpty( query ) && !ForesterUtil.isEmpty( node.getName() ) ) { + final Identifier id = SequenceIdParser.parse( node.getName() ); + if ( id != null ) { + query = id.getValue(); } } - System.out.println( query ); return query; } - public void ddbjBlast( final String geneName ) { + final public static boolean isContainsQueryForBlast( final PhylogenyNode node ) { + return !ForesterUtil.isEmpty( obtainQueryForBlast( node ) ); + } + + final public void ddbjBlast( final String geneName ) { // Retrieve accession number list which has specified gene name from searchByXMLPath of ARSA. Please click here for details of ARSA. /*target: Sequence length is between 300bp and 1000bp. Feature key is CDS.