From: cmzmasek@gmail.com Date: Wed, 27 Aug 2014 23:42:15 +0000 (+0000) Subject: in progress X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=3de7dbc5956ac31ed06741b197a2f0d15608487d;p=jalview.git in progress --- diff --git a/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java b/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java index 4617a8f..c90adb5 100644 --- a/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java +++ b/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java @@ -292,7 +292,7 @@ public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, P final Matcher link_m = LINK_TAXA_PATTERN.matcher( line ); if ( link_m.lookingAt() ) { final String link = link_m.group( 1 ); - System.out.println( "link taxa:" + link ); + //System.out.println( "link taxa:" + link ); } } else if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) { @@ -398,14 +398,14 @@ public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, P final Matcher link_m = LINK_TAXA_PATTERN.matcher( line ); if ( link_m.lookingAt() ) { final String link = link_m.group( 1 ); - System.out.println( "link taxa:" + link ); + //System.out.println( "link taxa:" + link ); } } else { final Matcher datatype_matcher = DATATYPE_PATTERN.matcher( line_lc ); if ( datatype_matcher.find() ) { _datatype = datatype_matcher.group( 1 ); - System.out.println( _datatype ); + //System.out.println( _datatype ); } else { if ( ( _datatype != null ) @@ -430,7 +430,7 @@ public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, P s = BasicSequence.createRnaSequence( id, seq ); } _seqs.put( id, s ); - System.out.println( s ); + //System.out.println( s ); } } } diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index ba18142..35b8600 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -139,7 +139,7 @@ public final class Test { private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" ) + ForesterUtil.getFileSeparator() + "test_data" + ForesterUtil.getFileSeparator(); - private final static boolean PERFORM_DB_TESTS = false; + private final static boolean PERFORM_DB_TESTS = true; private static final boolean PERFORM_WEB_TREE_ACCESS = true; private static final String PHYLOXML_LOCAL_XSD = PATH_TO_RESOURCES + "phyloxml_schema/" + ForesterConstants.PHYLO_XML_VERSION + "/" @@ -12695,6 +12695,14 @@ public final class Test { if ( !entry.getTaxonomyIdentifier().equals( "9986" ) ) { return false; } + if ( !entry + .getMolecularSequence() + .getMolecularSequenceAsString() + .startsWith( "MALLHSARVLSGVASAFHPGLAAAASARASSWWAHVEMGPPDPILGVTEAYKRDTNSKKMNLGVGAYRDDNGKPYVLPSVRKAEAQIAAKGLDKEYLPIGGLAEFCRASAELALGENSEV" ) + || !entry.getMolecularSequence().getMolecularSequenceAsString().endsWith( "LAHAIHQVTK" ) ) { + System.out.println( entry.getMolecularSequence().getMolecularSequenceAsString() ); + return false; + } } catch ( final IOException e ) { System.out.println(); diff --git a/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java b/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java index 00c52fc..df739ed 100644 --- a/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java +++ b/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java @@ -34,6 +34,7 @@ import java.util.regex.Pattern; import org.forester.go.GoTerm; import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.Annotation; +import org.forester.sequence.MolecularSequence; import org.forester.util.ForesterUtil; public final class EbiDbEntry implements SequenceDatabaseEntry { @@ -675,4 +676,10 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { } sb.append( s.trim() ); } + + @Override + public MolecularSequence getMolecularSequence() { + // TODO Auto-generated method stub + return null; + } } diff --git a/forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java b/forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java index 808da38..d1cd293 100644 --- a/forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java +++ b/forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java @@ -30,6 +30,7 @@ import java.util.SortedSet; import org.forester.go.GoTerm; import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.Annotation; +import org.forester.sequence.MolecularSequence; public interface SequenceDatabaseEntry { @@ -58,4 +59,6 @@ public interface SequenceDatabaseEntry { public String getMap(); public String getChromosome(); + + public MolecularSequence getMolecularSequence(); } \ No newline at end of file diff --git a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java index ed6387e..cd0ba0e 100644 --- a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java +++ b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java @@ -49,6 +49,7 @@ import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; +import org.forester.sequence.MolecularSequence.TYPE; import org.forester.util.ForesterUtil; import org.forester.util.SequenceAccessionTools; @@ -380,6 +381,19 @@ public final class SequenceDbWsTools { // Eat this exception. } } + if ( ( db_entry.getMolecularSequence() != null ) + && !ForesterUtil.isEmpty( db_entry.getMolecularSequence().getMolecularSequenceAsString() ) ) { + seq.setMolecularSequence( db_entry.getMolecularSequence().getMolecularSequenceAsString() ); + if ( db_entry.getMolecularSequence().getType() == TYPE.AA ) { + seq.setType( "protein" ); + } + else if ( db_entry.getMolecularSequence().getType() == TYPE.DNA ) { + seq.setType( "dna" ); + } + else if ( db_entry.getMolecularSequence().getType() == TYPE.RNA ) { + seq.setType( "rna" ); + } + } if ( ( db_entry.getGoTerms() != null ) && !db_entry.getGoTerms().isEmpty() ) { for( final GoTerm go : db_entry.getGoTerms() ) { final Annotation ann = new Annotation( go.getGoId().getId() ); diff --git a/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java b/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java index 00f4b67..fa70bf3 100644 --- a/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java +++ b/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java @@ -36,6 +36,8 @@ import org.forester.go.GoNameSpace; import org.forester.go.GoTerm; import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.Annotation; +import org.forester.sequence.BasicSequence; +import org.forester.sequence.MolecularSequence; import org.forester.util.ForesterUtil; public final class UniProtEntry implements SequenceDatabaseEntry { @@ -59,6 +61,7 @@ public final class UniProtEntry implements SequenceDatabaseEntry { private String _os_scientific_name; private String _symbol; private String _tax_id; + private MolecularSequence _mol_seq; private UniProtEntry() { } @@ -142,6 +145,10 @@ public final class UniProtEntry implements SequenceDatabaseEntry { } } + private void setMolecularSequence( final MolecularSequence mol_seq ) { + _mol_seq = mol_seq; + } + private void setGeneName( final String gene_name ) { if ( _gene_name == null ) { _gene_name = gene_name; @@ -172,6 +179,9 @@ public final class UniProtEntry implements SequenceDatabaseEntry { public static SequenceDatabaseEntry createInstanceFromPlainText( final List lines ) { final UniProtEntry e = new UniProtEntry(); + boolean saw_sq = false; + final StringBuffer sq_buffer = new StringBuffer(); + boolean is_aa = false; for( final String line : lines ) { //System.out.println( line ); if ( line.startsWith( "AC" ) ) { @@ -286,6 +296,18 @@ public final class UniProtEntry implements SequenceDatabaseEntry { e.setTaxId( SequenceDbWsTools.extractFromTo( line, "NCBI_TaxID=", ";" ) ); } } + else if ( line.startsWith( "SQ" ) ) { + saw_sq = true; + if ( line.contains( "AA;" ) ) { + is_aa = true; + } + } + else if ( saw_sq && line.startsWith( " " ) ) { + sq_buffer.append( line.replaceAll( "\\s+", "" ) ); + } + } + if ( ( sq_buffer.length() > 0 ) && is_aa ) { + e.setMolecularSequence( BasicSequence.createAaSequence( e.getAccession(), sq_buffer.toString() ) ); } return e; } @@ -304,4 +326,9 @@ public final class UniProtEntry implements SequenceDatabaseEntry { public String getChromosome() { return null; } + + @Override + public MolecularSequence getMolecularSequence() { + return _mol_seq; + } }