final Matcher link_m = LINK_TAXA_PATTERN.matcher( line );
if ( link_m.lookingAt() ) {
final String link = link_m.group( 1 );
- System.out.println( "link taxa:" + link );
+ //System.out.println( "link taxa:" + link );
}
}
else if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) {
final Matcher link_m = LINK_TAXA_PATTERN.matcher( line );
if ( link_m.lookingAt() ) {
final String link = link_m.group( 1 );
- System.out.println( "link taxa:" + link );
+ //System.out.println( "link taxa:" + link );
}
}
else {
final Matcher datatype_matcher = DATATYPE_PATTERN.matcher( line_lc );
if ( datatype_matcher.find() ) {
_datatype = datatype_matcher.group( 1 );
- System.out.println( _datatype );
+ //System.out.println( _datatype );
}
else {
if ( ( _datatype != null )
s = BasicSequence.createRnaSequence( id, seq );
}
_seqs.put( id, s );
- System.out.println( s );
+ //System.out.println( s );
}
}
}
private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" )
+ ForesterUtil.getFileSeparator() + "test_data"
+ ForesterUtil.getFileSeparator();
- private final static boolean PERFORM_DB_TESTS = false;
+ private final static boolean PERFORM_DB_TESTS = true;
private static final boolean PERFORM_WEB_TREE_ACCESS = true;
private static final String PHYLOXML_LOCAL_XSD = PATH_TO_RESOURCES + "phyloxml_schema/"
+ ForesterConstants.PHYLO_XML_VERSION + "/"
if ( !entry.getTaxonomyIdentifier().equals( "9986" ) ) {
return false;
}
+ if ( !entry
+ .getMolecularSequence()
+ .getMolecularSequenceAsString()
+ .startsWith( "MALLHSARVLSGVASAFHPGLAAAASARASSWWAHVEMGPPDPILGVTEAYKRDTNSKKMNLGVGAYRDDNGKPYVLPSVRKAEAQIAAKGLDKEYLPIGGLAEFCRASAELALGENSEV" )
+ || !entry.getMolecularSequence().getMolecularSequenceAsString().endsWith( "LAHAIHQVTK" ) ) {
+ System.out.println( entry.getMolecularSequence().getMolecularSequenceAsString() );
+ return false;
+ }
}
catch ( final IOException e ) {
System.out.println();
import org.forester.go.GoTerm;
import org.forester.phylogeny.data.Accession;
import org.forester.phylogeny.data.Annotation;
+import org.forester.sequence.MolecularSequence;
import org.forester.util.ForesterUtil;
public final class EbiDbEntry implements SequenceDatabaseEntry {
}
sb.append( s.trim() );
}
+
+ @Override
+ public MolecularSequence getMolecularSequence() {
+ // TODO Auto-generated method stub
+ return null;
+ }
}
import org.forester.go.GoTerm;
import org.forester.phylogeny.data.Accession;
import org.forester.phylogeny.data.Annotation;
+import org.forester.sequence.MolecularSequence;
public interface SequenceDatabaseEntry {
public String getMap();
public String getChromosome();
+
+ public MolecularSequence getMolecularSequence();
}
\ No newline at end of file
import org.forester.phylogeny.data.Sequence;
import org.forester.phylogeny.data.Taxonomy;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.sequence.MolecularSequence.TYPE;
import org.forester.util.ForesterUtil;
import org.forester.util.SequenceAccessionTools;
// Eat this exception.
}
}
+ if ( ( db_entry.getMolecularSequence() != null )
+ && !ForesterUtil.isEmpty( db_entry.getMolecularSequence().getMolecularSequenceAsString() ) ) {
+ seq.setMolecularSequence( db_entry.getMolecularSequence().getMolecularSequenceAsString() );
+ if ( db_entry.getMolecularSequence().getType() == TYPE.AA ) {
+ seq.setType( "protein" );
+ }
+ else if ( db_entry.getMolecularSequence().getType() == TYPE.DNA ) {
+ seq.setType( "dna" );
+ }
+ else if ( db_entry.getMolecularSequence().getType() == TYPE.RNA ) {
+ seq.setType( "rna" );
+ }
+ }
if ( ( db_entry.getGoTerms() != null ) && !db_entry.getGoTerms().isEmpty() ) {
for( final GoTerm go : db_entry.getGoTerms() ) {
final Annotation ann = new Annotation( go.getGoId().getId() );
import org.forester.go.GoTerm;
import org.forester.phylogeny.data.Accession;
import org.forester.phylogeny.data.Annotation;
+import org.forester.sequence.BasicSequence;
+import org.forester.sequence.MolecularSequence;
import org.forester.util.ForesterUtil;
public final class UniProtEntry implements SequenceDatabaseEntry {
private String _os_scientific_name;
private String _symbol;
private String _tax_id;
+ private MolecularSequence _mol_seq;
private UniProtEntry() {
}
}
}
+ private void setMolecularSequence( final MolecularSequence mol_seq ) {
+ _mol_seq = mol_seq;
+ }
+
private void setGeneName( final String gene_name ) {
if ( _gene_name == null ) {
_gene_name = gene_name;
public static SequenceDatabaseEntry createInstanceFromPlainText( final List<String> lines ) {
final UniProtEntry e = new UniProtEntry();
+ boolean saw_sq = false;
+ final StringBuffer sq_buffer = new StringBuffer();
+ boolean is_aa = false;
for( final String line : lines ) {
//System.out.println( line );
if ( line.startsWith( "AC" ) ) {
e.setTaxId( SequenceDbWsTools.extractFromTo( line, "NCBI_TaxID=", ";" ) );
}
}
+ else if ( line.startsWith( "SQ" ) ) {
+ saw_sq = true;
+ if ( line.contains( "AA;" ) ) {
+ is_aa = true;
+ }
+ }
+ else if ( saw_sq && line.startsWith( " " ) ) {
+ sq_buffer.append( line.replaceAll( "\\s+", "" ) );
+ }
+ }
+ if ( ( sq_buffer.length() > 0 ) && is_aa ) {
+ e.setMolecularSequence( BasicSequence.createAaSequence( e.getAccession(), sq_buffer.toString() ) );
}
return e;
}
public String getChromosome() {
return null;
}
+
+ @Override
+ public MolecularSequence getMolecularSequence() {
+ return _mol_seq;
+ }
}