work on: add ability to perform GSDI in applets
[jalview.git] / forester / java / src / org / forester / archaeopteryx / AptxUtil.java
index 627e152..89a0cda 100644 (file)
@@ -36,10 +36,12 @@ import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.io.UnsupportedEncodingException;
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
 import java.net.URI;
 import java.net.URL;
+import java.net.URLEncoder;
 import java.text.ParseException;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -73,6 +75,7 @@ import org.forester.io.parsers.tol.TolParser;
 import org.forester.io.parsers.util.ParserUtils;
 import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.PhylogenyMethods;
+import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY;
 import org.forester.phylogeny.PhylogenyNode;
 import org.forester.phylogeny.data.Accession;
 import org.forester.phylogeny.data.BranchColor;
@@ -83,17 +86,13 @@ import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
 import org.forester.phylogeny.iterators.PreorderTreeIterator;
 import org.forester.util.AsciiHistogram;
 import org.forester.util.DescriptiveStatistics;
+import org.forester.util.ForesterConstants;
 import org.forester.util.ForesterUtil;
+import org.forester.util.SequenceIdParser;
 import org.forester.ws.seqdb.UniProtTaxonomy;
 
 public final class AptxUtil {
 
-    final static String           UNIPROT_KB                     = "http://www.uniprot.org/uniprot/";
-    final static Pattern          UNIPROT_KB_PATTERN_1             = Pattern
-            .compile( "\\bsp|tr.\\S([A-Z0-9]{5,6})\\b" );
-
-    final static Pattern          UNIPROT_KB_PATTERN_2             = Pattern
-                                                                         .compile( "\\b[A-Z0-9]{5,6}_[A-Z9][A-Z]{2}[A-Z0-9]{2}|RAT|PIG|PEA\\b" );
     private final static Pattern  seq_identifier_pattern_1       = Pattern
                                                                          .compile( "^([A-Za-z]{2,5})[|=:]([0-9A-Za-z_\\.]{5,40})\\s*$" );
     private final static Pattern  seq_identifier_pattern_2       = Pattern
@@ -104,6 +103,69 @@ public final class AptxUtil {
         Arrays.sort( AVAILABLE_FONT_FAMILIES_SORTED );
     }
 
+    public final static String createUriForSeqWeb( final PhylogenyNode node,
+                                                   final Configuration conf,
+                                                   final TreePanel tp ) {
+        String uri_str = null;
+        final String upkb = ForesterUtil.extractUniProtKbProteinSeqIdentifier( node );
+        if ( !ForesterUtil.isEmpty( upkb ) ) {
+            try {
+                uri_str = ForesterUtil.UNIPROT_KB + URLEncoder.encode( upkb, ForesterConstants.UTF8 );
+            }
+            catch ( final UnsupportedEncodingException e ) {
+                showErrorMessage( tp, e.toString() );
+                e.printStackTrace();
+            }
+        }
+        if ( ForesterUtil.isEmpty( uri_str ) ) {
+            final String v = ForesterUtil.extractGenbankAccessor( node );
+            if ( !ForesterUtil.isEmpty( v ) ) {
+                try {
+                    if ( SequenceIdParser.isProtein( v ) ) {
+                        uri_str = ForesterUtil.NCBI_PROTEIN + URLEncoder.encode( v, ForesterConstants.UTF8 );
+                    }
+                    else {
+                        uri_str = ForesterUtil.NCBI_NUCCORE + URLEncoder.encode( v, ForesterConstants.UTF8 );
+                    }
+                }
+                catch ( final UnsupportedEncodingException e ) {
+                    showErrorMessage( tp, e.toString() );
+                    e.printStackTrace();
+                }
+            }
+        }
+        if ( ForesterUtil.isEmpty( uri_str ) ) {
+            final String v = ForesterUtil.extractRefSeqAccessorAccessor( node );
+            if ( !ForesterUtil.isEmpty( v ) ) {
+                try {
+                    if ( SequenceIdParser.isProtein( v ) ) {
+                        uri_str = ForesterUtil.NCBI_PROTEIN + URLEncoder.encode( v, ForesterConstants.UTF8 );
+                    }
+                    else {
+                        uri_str = ForesterUtil.NCBI_NUCCORE + URLEncoder.encode( v, ForesterConstants.UTF8 );
+                    }
+                }
+                catch ( final UnsupportedEncodingException e ) {
+                    showErrorMessage( tp, e.toString() );
+                    e.printStackTrace();
+                }
+            }
+        }
+        if ( ForesterUtil.isEmpty( uri_str ) ) {
+            final String v = ForesterUtil.extractGInumber( node );
+            if ( !ForesterUtil.isEmpty( v ) ) {
+                try {
+                    uri_str = ForesterUtil.NCBI_GI + URLEncoder.encode( v, ForesterConstants.UTF8 );
+                }
+                catch ( final UnsupportedEncodingException e ) {
+                    showErrorMessage( tp, e.toString() );
+                    e.printStackTrace();
+                }
+            }
+        }
+        return uri_str;
+    }
+
     public static MaskFormatter createMaskFormatter( final String s ) {
         MaskFormatter formatter = null;
         try {
@@ -966,8 +1028,9 @@ public final class AptxUtil {
                                                      final boolean phyloxml_validate_against_xsd,
                                                      final boolean replace_underscores,
                                                      final boolean internal_numbers_are_confidences,
-                                                     final TAXONOMY_EXTRACTION taxonomy_extraction )
-            throws FileNotFoundException, IOException {
+                                                     final TAXONOMY_EXTRACTION taxonomy_extraction,
+                                                     final boolean midpoint_reroot ) throws FileNotFoundException,
+            IOException {
         final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
         final PhylogenyParser parser;
         boolean nhx_or_nexus = false;
@@ -996,6 +1059,12 @@ public final class AptxUtil {
                 PhylogenyMethods.transferInternalNodeNamesToConfidence( phy );
             }
         }
+        if ( midpoint_reroot ) {
+            for( final Phylogeny phy : phys ) {
+                PhylogenyMethods.midpointRoot( phy );
+                PhylogenyMethods.orderAppearance( phy.getRoot(), true, true, DESCENDANT_SORT_PRIORITY.NODE_NAME );
+            }
+        }
         return phys;
     }