in progress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Wed, 27 Aug 2014 23:42:15 +0000 (23:42 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Wed, 27 Aug 2014 23:42:15 +0000 (23:42 +0000)
forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java
forester/java/src/org/forester/test/Test.java
forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java
forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java
forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java
forester/java/src/org/forester/ws/seqdb/UniProtEntry.java

index 4617a8f..c90adb5 100644 (file)
@@ -292,7 +292,7 @@ public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, P
                         final Matcher link_m = LINK_TAXA_PATTERN.matcher( line );
                         if ( link_m.lookingAt() ) {
                             final String link = link_m.group( 1 );
-                            System.out.println( "link taxa:" + link );
+                            //System.out.println( "link taxa:" + link );
                         }
                     }
                     else if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) {
@@ -398,14 +398,14 @@ public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, P
                         final Matcher link_m = LINK_TAXA_PATTERN.matcher( line );
                         if ( link_m.lookingAt() ) {
                             final String link = link_m.group( 1 );
-                            System.out.println( "link taxa:" + link );
+                            //System.out.println( "link taxa:" + link );
                         }
                     }
                     else {
                         final Matcher datatype_matcher = DATATYPE_PATTERN.matcher( line_lc );
                         if ( datatype_matcher.find() ) {
                             _datatype = datatype_matcher.group( 1 );
-                            System.out.println( _datatype );
+                            //System.out.println( _datatype );
                         }
                         else {
                             if ( ( _datatype != null )
@@ -430,7 +430,7 @@ public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, P
                                         s = BasicSequence.createRnaSequence( id, seq );
                                     }
                                     _seqs.put( id, s );
-                                    System.out.println( s );
+                                    //System.out.println( s );
                                 }
                             }
                         }
index ba18142..35b8600 100644 (file)
@@ -139,7 +139,7 @@ public final class Test {
     private final static String  PATH_TO_TEST_DATA         = System.getProperty( "user.dir" )
                                                                    + ForesterUtil.getFileSeparator() + "test_data"
                                                                    + ForesterUtil.getFileSeparator();
-    private final static boolean PERFORM_DB_TESTS          = false;
+    private final static boolean PERFORM_DB_TESTS          = true;
     private static final boolean PERFORM_WEB_TREE_ACCESS   = true;
     private static final String  PHYLOXML_LOCAL_XSD        = PATH_TO_RESOURCES + "phyloxml_schema/"
                                                                    + ForesterConstants.PHYLO_XML_VERSION + "/"
@@ -12695,6 +12695,14 @@ public final class Test {
             if ( !entry.getTaxonomyIdentifier().equals( "9986" ) ) {
                 return false;
             }
+            if ( !entry
+                    .getMolecularSequence()
+                    .getMolecularSequenceAsString()
+                    .startsWith( "MALLHSARVLSGVASAFHPGLAAAASARASSWWAHVEMGPPDPILGVTEAYKRDTNSKKMNLGVGAYRDDNGKPYVLPSVRKAEAQIAAKGLDKEYLPIGGLAEFCRASAELALGENSEV" )
+                    || !entry.getMolecularSequence().getMolecularSequenceAsString().endsWith( "LAHAIHQVTK" ) ) {
+                System.out.println( entry.getMolecularSequence().getMolecularSequenceAsString() );
+                return false;
+            }
         }
         catch ( final IOException e ) {
             System.out.println();
index 00c52fc..df739ed 100644 (file)
@@ -34,6 +34,7 @@ import java.util.regex.Pattern;
 import org.forester.go.GoTerm;
 import org.forester.phylogeny.data.Accession;
 import org.forester.phylogeny.data.Annotation;
+import org.forester.sequence.MolecularSequence;
 import org.forester.util.ForesterUtil;
 
 public final class EbiDbEntry implements SequenceDatabaseEntry {
@@ -675,4 +676,10 @@ public final class EbiDbEntry implements SequenceDatabaseEntry {
         }
         sb.append( s.trim() );
     }
+
+    @Override
+    public MolecularSequence getMolecularSequence() {
+        // TODO Auto-generated method stub
+        return null;
+    }
 }
index 808da38..d1cd293 100644 (file)
@@ -30,6 +30,7 @@ import java.util.SortedSet;
 import org.forester.go.GoTerm;
 import org.forester.phylogeny.data.Accession;
 import org.forester.phylogeny.data.Annotation;
+import org.forester.sequence.MolecularSequence;
 
 public interface SequenceDatabaseEntry {
 
@@ -58,4 +59,6 @@ public interface SequenceDatabaseEntry {
     public String getMap();
 
     public String getChromosome();
+
+    public MolecularSequence getMolecularSequence();
 }
\ No newline at end of file
index ed6387e..cd0ba0e 100644 (file)
@@ -49,6 +49,7 @@ import org.forester.phylogeny.data.Identifier;
 import org.forester.phylogeny.data.Sequence;
 import org.forester.phylogeny.data.Taxonomy;
 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.sequence.MolecularSequence.TYPE;
 import org.forester.util.ForesterUtil;
 import org.forester.util.SequenceAccessionTools;
 
@@ -380,6 +381,19 @@ public final class SequenceDbWsTools {
                     // Eat this exception.
                 }
             }
+            if ( ( db_entry.getMolecularSequence() != null )
+                    && !ForesterUtil.isEmpty( db_entry.getMolecularSequence().getMolecularSequenceAsString() ) ) {
+                seq.setMolecularSequence( db_entry.getMolecularSequence().getMolecularSequenceAsString() );
+                if ( db_entry.getMolecularSequence().getType() == TYPE.AA ) {
+                    seq.setType( "protein" );
+                }
+                else if ( db_entry.getMolecularSequence().getType() == TYPE.DNA ) {
+                    seq.setType( "dna" );
+                }
+                else if ( db_entry.getMolecularSequence().getType() == TYPE.RNA ) {
+                    seq.setType( "rna" );
+                }
+            }
             if ( ( db_entry.getGoTerms() != null ) && !db_entry.getGoTerms().isEmpty() ) {
                 for( final GoTerm go : db_entry.getGoTerms() ) {
                     final Annotation ann = new Annotation( go.getGoId().getId() );
index 00f4b67..fa70bf3 100644 (file)
@@ -36,6 +36,8 @@ import org.forester.go.GoNameSpace;
 import org.forester.go.GoTerm;
 import org.forester.phylogeny.data.Accession;
 import org.forester.phylogeny.data.Annotation;
+import org.forester.sequence.BasicSequence;
+import org.forester.sequence.MolecularSequence;
 import org.forester.util.ForesterUtil;
 
 public final class UniProtEntry implements SequenceDatabaseEntry {
@@ -59,6 +61,7 @@ public final class UniProtEntry implements SequenceDatabaseEntry {
     private String               _os_scientific_name;
     private String               _symbol;
     private String               _tax_id;
+    private MolecularSequence    _mol_seq;
 
     private UniProtEntry() {
     }
@@ -142,6 +145,10 @@ public final class UniProtEntry implements SequenceDatabaseEntry {
         }
     }
 
+    private void setMolecularSequence( final MolecularSequence mol_seq ) {
+        _mol_seq = mol_seq;
+    }
+
     private void setGeneName( final String gene_name ) {
         if ( _gene_name == null ) {
             _gene_name = gene_name;
@@ -172,6 +179,9 @@ public final class UniProtEntry implements SequenceDatabaseEntry {
 
     public static SequenceDatabaseEntry createInstanceFromPlainText( final List<String> lines ) {
         final UniProtEntry e = new UniProtEntry();
+        boolean saw_sq = false;
+        final StringBuffer sq_buffer = new StringBuffer();
+        boolean is_aa = false;
         for( final String line : lines ) {
             //System.out.println( line );
             if ( line.startsWith( "AC" ) ) {
@@ -286,6 +296,18 @@ public final class UniProtEntry implements SequenceDatabaseEntry {
                     e.setTaxId( SequenceDbWsTools.extractFromTo( line, "NCBI_TaxID=", ";" ) );
                 }
             }
+            else if ( line.startsWith( "SQ" ) ) {
+                saw_sq = true;
+                if ( line.contains( "AA;" ) ) {
+                    is_aa = true;
+                }
+            }
+            else if ( saw_sq && line.startsWith( " " ) ) {
+                sq_buffer.append( line.replaceAll( "\\s+", "" ) );
+            }
+        }
+        if ( ( sq_buffer.length() > 0 ) && is_aa ) {
+            e.setMolecularSequence( BasicSequence.createAaSequence( e.getAccession(), sq_buffer.toString() ) );
         }
         return e;
     }
@@ -304,4 +326,9 @@ public final class UniProtEntry implements SequenceDatabaseEntry {
     public String getChromosome() {
         return null;
     }
+
+    @Override
+    public MolecularSequence getMolecularSequence() {
+        return _mol_seq;
+    }
 }