import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import javax.imageio.IIOImage;
import javax.imageio.ImageIO;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.PhylogenyMethods;
import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Accession;
import org.forester.phylogeny.data.BranchColor;
import org.forester.phylogeny.data.Distribution;
import org.forester.phylogeny.data.Sequence;
public final class AptxUtil {
+ private final static Pattern seq_identifier_pattern_1 = Pattern
+ .compile( "^([A-Za-z]{2,5})[|=:]([0-9A-Za-z_\\.]{5,40})\\s*$" );
+ private final static Pattern seq_identifier_pattern_2 = Pattern
+ .compile( "^([A-Za-z]{2,5})[|=:]([0-9A-Za-z_\\.]{5,40})[|,; ].*$" );
private final static String[] AVAILABLE_FONT_FAMILIES_SORTED = GraphicsEnvironment.getLocalGraphicsEnvironment()
.getAvailableFontFamilyNames();
static {
Arrays.sort( AVAILABLE_FONT_FAMILIES_SORTED );
}
+ public final static Accession obtainSequenceAccessionFromName( final String sequence_name ) {
+ final String n = sequence_name.trim();
+ final Matcher matcher1 = seq_identifier_pattern_1.matcher( n );
+ String group1 = "";
+ String group2 = "";
+ if ( matcher1.matches() ) {
+ group1 = matcher1.group( 1 );
+ group2 = matcher1.group( 2 );
+ }
+ else {
+ final Matcher matcher2 = seq_identifier_pattern_2.matcher( n );
+ if ( matcher2.matches() ) {
+ group1 = matcher2.group( 1 );
+ group2 = matcher2.group( 2 );
+ }
+ }
+ if ( ForesterUtil.isEmpty( group1 ) || ForesterUtil.isEmpty( group2 ) ) {
+ return null;
+ }
+ return new Accession( group2, group1 );
+ }
+
public static void ensurePresenceOfTaxonomy( final PhylogenyNode node ) {
if ( !node.getNodeData().isHasTaxonomy() ) {
node.getNodeData().setTaxonomy( new Taxonomy() );
public final class Constants {
final static boolean __ALLOW_PHYLOGENETIC_INFERENCE = false;
- public final static boolean __RELEASE = false; // TODO remove me
+ public final static boolean __RELEASE = true; // TODO remove me
public final static boolean __SNAPSHOT_RELEASE = true; // TODO remove me
public final static boolean __SYNTH_LF = false; // TODO remove me
public final static boolean ALLOW_DDBJ_BLAST = false;
public final static String PRG_NAME = "Archaeopteryx";
- final static String VERSION = "0.969 NM";
- final static String PRG_DATE = "2012.03.05";
+ final static String VERSION = "0.970 9M";
+ final static String PRG_DATE = "2012.03.28";
final static String DEFAULT_CONFIGURATION_FILE_NAME = "_aptx_configuration_file";
final static String[] DEFAULT_FONT_CHOICES = { "Verdana", "Tahoma",
"Arial", "Helvetica", "Dialog", "Lucida Sans", "SansSerif", "Sans-serif", "Sans" };
}
if ( node.getNodeData().isHasSequence() ) {
final String query = Blast.obtainQueryForBlast( node );
+ boolean nucleotide = false;
+ if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getType() ) ) {
+ if ( !node.getNodeData().getSequence().getType().toLowerCase().equals( PhyloXmlUtil.SEQ_TYPE_PROTEIN ) ) {
+ nucleotide = true;
+ }
+ }
+ else if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getMolecularSequence() ) ) {
+ nucleotide = !ForesterUtil.seqIsLikelyToBeAa( node.getNodeData().getSequence().getMolecularSequence() );
+ }
if ( !ForesterUtil.isEmpty( query ) ) {
JApplet applet = null;
if ( isApplet() ) {
applet = obtainApplet();
}
- Blast.NcbiBlastWeb( query, applet, this );
+ try {
+ Blast.openNcbiBlastWeb( query, nucleotide, applet, this );
+ }
+ catch ( final Exception e ) {
+ e.printStackTrace();
+ }
if ( Constants.ALLOW_DDBJ_BLAST ) {
try {
System.out.println( "trying: " + query );
}
final private boolean isCanBlast( final PhylogenyNode node ) {
- return ( node.getNodeData().isHasSequence() && ( ( ( node.getNodeData().getSequence().getAccession() != null ) && !ForesterUtil
- .isEmpty( node.getNodeData().getSequence().getAccession().getValue() ) )
- || !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() )
- || !ForesterUtil.isEmpty( node.getNodeData().getSequence().getSymbol() ) || !ForesterUtil.isEmpty( node
- .getNodeData().getSequence().getMolecularSequence() ) ) );
+ return ( node.getNodeData().isHasSequence()
+ && ( ( ( node.getNodeData().getSequence().getAccession() != null ) && !ForesterUtil.isEmpty( node
+ .getNodeData().getSequence().getAccession().getValue() ) )
+ || !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) || !ForesterUtil
+ .isEmpty( node.getNodeData().getSequence().getMolecularSequence() ) ) && Blast
+ .isContainsQueryForBlast( node ) );
}
final boolean isCanCollapse() {
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Vector;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
import javax.swing.JApplet;
import org.forester.archaeopteryx.AptxUtil;
import org.forester.archaeopteryx.TreePanel;
import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Accession;
import org.forester.util.ForesterUtil;
import org.forester.ws.wabi.RestUtil;
-public class Blast {
+public final class Blast {
- final static Pattern identifier_pattern_1 = Pattern.compile( "^([A-Za-z]{2,5})[|=:]([0-9A-Za-z\\.]{4,40})\\s*$" );
- final static Pattern identifier_pattern_2 = Pattern
- .compile( "^([A-Za-z]{2,5})[|=:]([0-9A-Za-z\\.]{4,40})[|,; ].*$" );
-
- public Blast() {
- }
-
- public static void NcbiBlastWeb( final String query, final JApplet applet, final TreePanel p ) {
+ final public static void openNcbiBlastWeb( final String query,
+ final boolean is_nucleic_acids,
+ final JApplet applet,
+ final TreePanel p ) {
//http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Web&PAGE=Proteins&DATABASE=swissprot&QUERY=gi|163848401
final StringBuilder uri_str = new StringBuilder();
- uri_str.append( "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Web&DATABASE=nr&PAGE=Proteins&QUERY=" );
+ uri_str.append( "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Web&DATABASE=nr&PAGE=" );
+ if ( is_nucleic_acids ) {
+ uri_str.append( "Nucleotide" );
+ }
+ else {
+ uri_str.append( "Proteins" );
+ }
+ uri_str.append( "&QUERY=" );
uri_str.append( query );
try {
AptxUtil.launchWebBrowser( new URI( uri_str.toString() ), applet != null, applet, "_aptx_blast" );
}
}
- public static String obtainQueryForBlast( final PhylogenyNode node ) {
+ final public static String obtainQueryForBlast( final PhylogenyNode node ) {
String query = "";
if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getMolecularSequence() ) ) {
query = node.getNodeData().getSequence().getMolecularSequence();
query += node.getNodeData().getSequence().getAccession().getValue();
}
else if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) ) {
- final String name = node.getNodeData().getSequence().getName();
- final Matcher matcher1 = identifier_pattern_1.matcher( name );
- final Matcher matcher2 = identifier_pattern_2.matcher( name );
- String group1 = "";
- String group2 = "";
- if ( matcher1.matches() ) {
- group1 = matcher1.group( 1 );
- group2 = matcher1.group( 2 );
- System.out.println( "1 1=" + group1 );
- System.out.println( "1 2=" + group2 );
- }
- if ( matcher2.matches() ) {
- group1 = matcher2.group( 1 );
- group2 = matcher2.group( 2 );
- System.out.println( "2 1=" + group1 );
- System.out.println( "2 2=" + group2 );
- }
- if ( !ForesterUtil.isEmpty( group1 ) && !ForesterUtil.isEmpty( group2 ) ) {
- query = group1 + "%7C" + group2;
+ final Accession acc = AptxUtil.obtainSequenceAccessionFromName( node.getNodeData().getSequence().getName() );
+ if ( acc != null ) {
+ query = acc.getSource() + "%7C" + acc.getValue();
}
}
- System.out.println( query );
return query;
}
- public void ddbjBlast( final String geneName ) {
+ final public static boolean isContainsQueryForBlast( final PhylogenyNode node ) {
+ return !ForesterUtil.isEmpty( obtainQueryForBlast( node ) );
+ }
+
+ final public void ddbjBlast( final String geneName ) {
// Retrieve accession number list which has specified gene name from searchByXMLPath of ARSA. Please click here for details of ARSA.
/*target: Sequence length is between 300bp and 1000bp.
Feature key is CDS.
public static final String VECTOR_PROPERTY_REF = "vector:index=";
public static final String VECTOR_PROPERTY_TYPE = "xsd:decimal";
public static final String UNIPROT_TAX_PROVIDER = "uniprot";
+ public static final String SEQ_TYPE_RNA = "rna";
+ public static final String SEQ_TYPE_DNA = "dna";
+ public static final String SEQ_TYPE_PROTEIN = "protein";
static {
- SEQUENCE_TYPES.add( "rna" );
- SEQUENCE_TYPES.add( "protein" );
- SEQUENCE_TYPES.add( "dna" );
+ SEQUENCE_TYPES.add( SEQ_TYPE_RNA );
+ SEQUENCE_TYPES.add( SEQ_TYPE_PROTEIN );
+ SEQUENCE_TYPES.add( SEQ_TYPE_DNA );
TAXONOMY_RANKS_LIST.add( "domain" );
TAXONOMY_RANKS_LIST.add( "superkingdom" );
TAXONOMY_RANKS_LIST.add( "kingdom" );
}
}
+ public static boolean seqIsLikelyToBeAa( final String s ) {
+ final String seq = s.toLowerCase();
+ if ( ( seq.indexOf( 'r' ) > -1 ) || ( seq.indexOf( 'd' ) > -1 ) || ( seq.indexOf( 'e' ) > -1 )
+ || ( seq.indexOf( 'q' ) > -1 ) || ( seq.indexOf( 'h' ) > -1 ) || ( seq.indexOf( 'k' ) > -1 )
+ || ( seq.indexOf( 'w' ) > -1 ) || ( seq.indexOf( 's' ) > -1 ) || ( seq.indexOf( 'm' ) > -1 )
+ || ( seq.indexOf( 'p' ) > -1 ) || ( seq.indexOf( 'v' ) > -1 ) ) {
+ return true;
+ }
+ return false;
+ }
+
/**
* This calculates a color. If value is equal to min the returned color is
* minColor, if value is equal to max the returned color is maxColor,