in progress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Thu, 29 Mar 2012 01:00:16 +0000 (01:00 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Thu, 29 Mar 2012 01:00:16 +0000 (01:00 +0000)
forester/java/src/org/forester/archaeopteryx/AptxUtil.java
forester/java/src/org/forester/archaeopteryx/Constants.java
forester/java/src/org/forester/archaeopteryx/TreePanel.java
forester/java/src/org/forester/archaeopteryx/tools/Blast.java
forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlUtil.java
forester/java/src/org/forester/util/ForesterUtil.java

index cb3a2c6..cc0c365 100644 (file)
@@ -50,6 +50,8 @@ import java.util.Map;
 import java.util.Set;
 import java.util.SortedSet;
 import java.util.TreeSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import javax.imageio.IIOImage;
 import javax.imageio.ImageIO;
@@ -68,6 +70,7 @@ import org.forester.io.parsers.util.ParserUtils;
 import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.PhylogenyMethods;
 import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Accession;
 import org.forester.phylogeny.data.BranchColor;
 import org.forester.phylogeny.data.Distribution;
 import org.forester.phylogeny.data.Sequence;
@@ -82,12 +85,38 @@ import org.forester.ws.uniprot.UniProtTaxonomy;
 
 public final class AptxUtil {
 
+    private final static Pattern  seq_identifier_pattern_1       = Pattern
+                                                                         .compile( "^([A-Za-z]{2,5})[|=:]([0-9A-Za-z_\\.]{5,40})\\s*$" );
+    private final static Pattern  seq_identifier_pattern_2       = Pattern
+                                                                         .compile( "^([A-Za-z]{2,5})[|=:]([0-9A-Za-z_\\.]{5,40})[|,; ].*$" );
     private final static String[] AVAILABLE_FONT_FAMILIES_SORTED = GraphicsEnvironment.getLocalGraphicsEnvironment()
                                                                          .getAvailableFontFamilyNames();
     static {
         Arrays.sort( AVAILABLE_FONT_FAMILIES_SORTED );
     }
 
+    public final static Accession obtainSequenceAccessionFromName( final String sequence_name ) {
+        final String n = sequence_name.trim();
+        final Matcher matcher1 = seq_identifier_pattern_1.matcher( n );
+        String group1 = "";
+        String group2 = "";
+        if ( matcher1.matches() ) {
+            group1 = matcher1.group( 1 );
+            group2 = matcher1.group( 2 );
+        }
+        else {
+            final Matcher matcher2 = seq_identifier_pattern_2.matcher( n );
+            if ( matcher2.matches() ) {
+                group1 = matcher2.group( 1 );
+                group2 = matcher2.group( 2 );
+            }
+        }
+        if ( ForesterUtil.isEmpty( group1 ) || ForesterUtil.isEmpty( group2 ) ) {
+            return null;
+        }
+        return new Accession( group2, group1 );
+    }
+
     public static void ensurePresenceOfTaxonomy( final PhylogenyNode node ) {
         if ( !node.getNodeData().isHasTaxonomy() ) {
             node.getNodeData().setTaxonomy( new Taxonomy() );
index 9cf4f40..f67902c 100644 (file)
@@ -37,13 +37,13 @@ import org.forester.util.ForesterConstants;
 public final class Constants {
 
     final static boolean        __ALLOW_PHYLOGENETIC_INFERENCE                                = false;
-    public final static boolean __RELEASE                                                     = false;                                                    // TODO remove me
+    public final static boolean __RELEASE                                                     = true;                                                     // TODO remove me
     public final static boolean __SNAPSHOT_RELEASE                                            = true;                                                     // TODO remove me
     public final static boolean __SYNTH_LF                                                    = false;                                                    // TODO remove me
     public final static boolean ALLOW_DDBJ_BLAST                                              = false;
     public final static String  PRG_NAME                                                      = "Archaeopteryx";
-    final static String         VERSION                                                       = "0.969 NM";
-    final static String         PRG_DATE                                                      = "2012.03.05";
+    final static String         VERSION                                                       = "0.970 9M";
+    final static String         PRG_DATE                                                      = "2012.03.28";
     final static String         DEFAULT_CONFIGURATION_FILE_NAME                               = "_aptx_configuration_file";
     final static String[]       DEFAULT_FONT_CHOICES                                          = { "Verdana", "Tahoma",
             "Arial", "Helvetica", "Dialog", "Lucida Sans", "SansSerif", "Sans-serif", "Sans" };
index 06d4e2f..18d4089 100644 (file)
@@ -465,12 +465,26 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee
         }
         if ( node.getNodeData().isHasSequence() ) {
             final String query = Blast.obtainQueryForBlast( node );
+            boolean nucleotide = false;
+            if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getType() ) ) {
+                if ( !node.getNodeData().getSequence().getType().toLowerCase().equals( PhyloXmlUtil.SEQ_TYPE_PROTEIN ) ) {
+                    nucleotide = true;
+                }
+            }
+            else if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getMolecularSequence() ) ) {
+                nucleotide = !ForesterUtil.seqIsLikelyToBeAa( node.getNodeData().getSequence().getMolecularSequence() );
+            }
             if ( !ForesterUtil.isEmpty( query ) ) {
                 JApplet applet = null;
                 if ( isApplet() ) {
                     applet = obtainApplet();
                 }
-                Blast.NcbiBlastWeb( query, applet, this );
+                try {
+                    Blast.openNcbiBlastWeb( query, nucleotide, applet, this );
+                }
+                catch ( final Exception e ) {
+                    e.printStackTrace();
+                }
                 if ( Constants.ALLOW_DDBJ_BLAST ) {
                     try {
                         System.out.println( "trying: " + query );
@@ -1438,11 +1452,12 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee
     }
 
     final private boolean isCanBlast( final PhylogenyNode node ) {
-        return ( node.getNodeData().isHasSequence() && ( ( ( node.getNodeData().getSequence().getAccession() != null ) && !ForesterUtil
-                .isEmpty( node.getNodeData().getSequence().getAccession().getValue() ) )
-                || !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() )
-                || !ForesterUtil.isEmpty( node.getNodeData().getSequence().getSymbol() ) || !ForesterUtil.isEmpty( node
-                .getNodeData().getSequence().getMolecularSequence() ) ) );
+        return ( node.getNodeData().isHasSequence()
+                && ( ( ( node.getNodeData().getSequence().getAccession() != null ) && !ForesterUtil.isEmpty( node
+                        .getNodeData().getSequence().getAccession().getValue() ) )
+                        || !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) || !ForesterUtil
+                        .isEmpty( node.getNodeData().getSequence().getMolecularSequence() ) ) && Blast
+                .isContainsQueryForBlast( node ) );
     }
 
     final boolean isCanCollapse() {
index 38519e4..97120ad 100644 (file)
@@ -32,30 +32,32 @@ import java.util.Arrays;
 import java.util.Enumeration;
 import java.util.Hashtable;
 import java.util.Vector;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
 
 import javax.swing.JApplet;
 
 import org.forester.archaeopteryx.AptxUtil;
 import org.forester.archaeopteryx.TreePanel;
 import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Accession;
 import org.forester.util.ForesterUtil;
 import org.forester.ws.wabi.RestUtil;
 
-public class Blast {
+public final class Blast {
 
-    final static Pattern identifier_pattern_1 = Pattern.compile( "^([A-Za-z]{2,5})[|=:]([0-9A-Za-z\\.]{4,40})\\s*$" );
-    final static Pattern identifier_pattern_2 = Pattern
-                                                      .compile( "^([A-Za-z]{2,5})[|=:]([0-9A-Za-z\\.]{4,40})[|,; ].*$" );
-
-    public Blast() {
-    }
-
-    public static void NcbiBlastWeb( final String query, final JApplet applet, final TreePanel p ) {
+    final public static void openNcbiBlastWeb( final String query,
+                                               final boolean is_nucleic_acids,
+                                               final JApplet applet,
+                                               final TreePanel p ) {
         //http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Web&PAGE=Proteins&DATABASE=swissprot&QUERY=gi|163848401
         final StringBuilder uri_str = new StringBuilder();
-        uri_str.append( "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Web&DATABASE=nr&PAGE=Proteins&QUERY=" );
+        uri_str.append( "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Web&DATABASE=nr&PAGE=" );
+        if ( is_nucleic_acids ) {
+            uri_str.append( "Nucleotide" );
+        }
+        else {
+            uri_str.append( "Proteins" );
+        }
+        uri_str.append( "&QUERY=" );
         uri_str.append( query );
         try {
             AptxUtil.launchWebBrowser( new URI( uri_str.toString() ), applet != null, applet, "_aptx_blast" );
@@ -70,7 +72,7 @@ public class Blast {
         }
     }
 
-    public static String obtainQueryForBlast( final PhylogenyNode node ) {
+    final public static String obtainQueryForBlast( final PhylogenyNode node ) {
         String query = "";
         if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getMolecularSequence() ) ) {
             query = node.getNodeData().getSequence().getMolecularSequence();
@@ -83,32 +85,19 @@ public class Blast {
             query += node.getNodeData().getSequence().getAccession().getValue();
         }
         else if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) ) {
-            final String name = node.getNodeData().getSequence().getName();
-            final Matcher matcher1 = identifier_pattern_1.matcher( name );
-            final Matcher matcher2 = identifier_pattern_2.matcher( name );
-            String group1 = "";
-            String group2 = "";
-            if ( matcher1.matches() ) {
-                group1 = matcher1.group( 1 );
-                group2 = matcher1.group( 2 );
-                System.out.println( "1 1=" + group1 );
-                System.out.println( "1 2=" + group2 );
-            }
-            if ( matcher2.matches() ) {
-                group1 = matcher2.group( 1 );
-                group2 = matcher2.group( 2 );
-                System.out.println( "2 1=" + group1 );
-                System.out.println( "2 2=" + group2 );
-            }
-            if ( !ForesterUtil.isEmpty( group1 ) && !ForesterUtil.isEmpty( group2 ) ) {
-                query = group1 + "%7C" + group2;
+            final Accession acc = AptxUtil.obtainSequenceAccessionFromName( node.getNodeData().getSequence().getName() );
+            if ( acc != null ) {
+                query = acc.getSource() + "%7C" + acc.getValue();
             }
         }
-        System.out.println( query );
         return query;
     }
 
-    public void ddbjBlast( final String geneName ) {
+    final public static boolean isContainsQueryForBlast( final PhylogenyNode node ) {
+        return !ForesterUtil.isEmpty( obtainQueryForBlast( node ) );
+    }
+
+    final public void ddbjBlast( final String geneName ) {
         // Retrieve accession number list which has specified gene name from searchByXMLPath of ARSA. Please click here for details of ARSA.
         /*target: Sequence length is between 300bp and 1000bp.
         Feature key is CDS.
index 156c65a..9ccfdf9 100644 (file)
@@ -47,10 +47,13 @@ public final class PhyloXmlUtil {
     public static final String       VECTOR_PROPERTY_REF                        = "vector:index=";
     public static final String       VECTOR_PROPERTY_TYPE                       = "xsd:decimal";
     public static final String       UNIPROT_TAX_PROVIDER                       = "uniprot";
+    public static final String       SEQ_TYPE_RNA                               = "rna";
+    public static final String       SEQ_TYPE_DNA                               = "dna";
+    public static final String       SEQ_TYPE_PROTEIN                           = "protein";
     static {
-        SEQUENCE_TYPES.add( "rna" );
-        SEQUENCE_TYPES.add( "protein" );
-        SEQUENCE_TYPES.add( "dna" );
+        SEQUENCE_TYPES.add( SEQ_TYPE_RNA );
+        SEQUENCE_TYPES.add( SEQ_TYPE_PROTEIN );
+        SEQUENCE_TYPES.add( SEQ_TYPE_DNA );
         TAXONOMY_RANKS_LIST.add( "domain" );
         TAXONOMY_RANKS_LIST.add( "superkingdom" );
         TAXONOMY_RANKS_LIST.add( "kingdom" );
index de93bd1..58dbf45 100644 (file)
@@ -96,6 +96,17 @@ public final class ForesterUtil {
         }
     }
 
+    public static boolean seqIsLikelyToBeAa( final String s ) {
+        final String seq = s.toLowerCase();
+        if ( ( seq.indexOf( 'r' ) > -1 ) || ( seq.indexOf( 'd' ) > -1 ) || ( seq.indexOf( 'e' ) > -1 )
+                || ( seq.indexOf( 'q' ) > -1 ) || ( seq.indexOf( 'h' ) > -1 ) || ( seq.indexOf( 'k' ) > -1 )
+                || ( seq.indexOf( 'w' ) > -1 ) || ( seq.indexOf( 's' ) > -1 ) || ( seq.indexOf( 'm' ) > -1 )
+                || ( seq.indexOf( 'p' ) > -1 ) || ( seq.indexOf( 'v' ) > -1 ) ) {
+            return true;
+        }
+        return false;
+    }
+
     /**
      * This calculates a color. If value is equal to min the returned color is
      * minColor, if value is equal to max the returned color is maxColor,