X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fws%2Fseqdb%2FUniProtEntry.java;h=a3ea1e383c86bc63957998785640a78e510e0540;hb=10297bd8b8a4b4ab198a17a42fc6ff24ae2ed49b;hp=00f4b67ebc064dcdd537728b811c7e478668bc16;hpb=3b5c2fab212d221196ed1734ee9c623f45310bb7;p=jalview.git diff --git a/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java b/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java index 00f4b67..a3ea1e3 100644 --- a/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java +++ b/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java @@ -36,6 +36,8 @@ import org.forester.go.GoNameSpace; import org.forester.go.GoTerm; import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.Annotation; +import org.forester.sequence.BasicSequence; +import org.forester.sequence.MolecularSequence; import org.forester.util.ForesterUtil; public final class UniProtEntry implements SequenceDatabaseEntry { @@ -51,6 +53,7 @@ public final class UniProtEntry implements SequenceDatabaseEntry { public final static Pattern PDB_PATTERN = Pattern.compile( "PDB;\\s+([0-9A-Z]{4});\\s+([^;]+)" ); public final static Pattern PharmGKB_PATTERN = Pattern.compile( "PharmGKB;\\s+([0-9A-Z]+);" ); public final static Pattern Reactome_PATTERN = Pattern.compile( "Reactome;\\s+([0-9A-Z]+);\\s+([^\\.]+)" ); + public final static Pattern HGNC_PATTERN = Pattern.compile( "HGNC;\\s+HGNC:(\\d+);" ); private String _ac; private SortedSet _cross_references; private String _gene_name; @@ -59,6 +62,7 @@ public final class UniProtEntry implements SequenceDatabaseEntry { private String _os_scientific_name; private String _symbol; private String _tax_id; + private MolecularSequence _mol_seq; private UniProtEntry() { } @@ -119,7 +123,7 @@ public final class UniProtEntry implements SequenceDatabaseEntry { && ForesterUtil.isEmpty( getTaxonomyScientificName() ) && ForesterUtil.isEmpty( getSequenceSymbol() ) && ForesterUtil.isEmpty( getGeneName() ) && ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil.isEmpty( getSequenceSymbol() ) && ( ( getGoTerms() == null ) || getGoTerms().isEmpty() ) && ( ( getCrossReferences() == null ) || getCrossReferences() - .isEmpty() ) ); + .isEmpty() ) ); } private void addCrossReference( final Accession accession ) { @@ -142,6 +146,10 @@ public final class UniProtEntry implements SequenceDatabaseEntry { } } + private void setMolecularSequence( final MolecularSequence mol_seq ) { + _mol_seq = mol_seq; + } + private void setGeneName( final String gene_name ) { if ( _gene_name == null ) { _gene_name = gene_name; @@ -172,6 +180,9 @@ public final class UniProtEntry implements SequenceDatabaseEntry { public static SequenceDatabaseEntry createInstanceFromPlainText( final List lines ) { final UniProtEntry e = new UniProtEntry(); + boolean saw_sq = false; + final StringBuffer sq_buffer = new StringBuffer(); + boolean is_aa = false; for( final String line : lines ) { //System.out.println( line ); if ( line.startsWith( "AC" ) ) { @@ -272,6 +283,12 @@ public final class UniProtEntry implements SequenceDatabaseEntry { e.addCrossReference( new Accession( m.group( 1 ), "Reactome", m.group( 2 ) ) ); } } + else if ( line.indexOf( "HGNC;" ) > 0 ) { + final Matcher m = HGNC_PATTERN.matcher( line ); + if ( m.find() ) { + e.addCrossReference( new Accession( m.group( 1 ), "HGNC" ) ); + } + } } else if ( line.startsWith( "OS" ) ) { if ( line.indexOf( "(" ) > 0 ) { @@ -286,6 +303,18 @@ public final class UniProtEntry implements SequenceDatabaseEntry { e.setTaxId( SequenceDbWsTools.extractFromTo( line, "NCBI_TaxID=", ";" ) ); } } + else if ( line.startsWith( "SQ" ) ) { + saw_sq = true; + if ( line.contains( "AA;" ) ) { + is_aa = true; + } + } + else if ( saw_sq && line.startsWith( " " ) ) { + sq_buffer.append( line.replaceAll( "\\s+", "" ) ); + } + } + if ( ( sq_buffer.length() > 0 ) && is_aa ) { + e.setMolecularSequence( BasicSequence.createAaSequence( e.getAccession(), sq_buffer.toString() ) ); } return e; } @@ -304,4 +333,9 @@ public final class UniProtEntry implements SequenceDatabaseEntry { public String getChromosome() { return null; } + + @Override + public MolecularSequence getMolecularSequence() { + return _mol_seq; + } }