+ private void setSequenceSymbol( final String symbol ) {
+ _symbol = symbol;
+ }
+
+ private void setTaxId( final String tax_id ) {
+ if ( _tax_id == null ) {
+ _tax_id = tax_id;
+ }
+ }
+
+ public static SequenceDatabaseEntry createInstanceFromPlainText( final List<String> lines ) {
+ final UniProtEntry e = new UniProtEntry();
+ for( final String line : lines ) {
+ //System.out.println( line );
+ if ( line.startsWith( "AC" ) ) {
+ e.setAc( SequenceDbWsTools.extractFromTo( line, "AC", ";" ) );
+ }
+ else if ( line.startsWith( "DE" ) && ForesterUtil.isEmpty( e.getSequenceName() ) ) {
+ if ( ( line.indexOf( "RecName:" ) > 0 ) && ( line.indexOf( "Full=" ) > 0 ) ) {
+ e.setSequenceName( SequenceDbWsTools.extractFromTo( line, "Full=", ";" ) );
+ }
+ else if ( ( line.indexOf( "SubName:" ) > 0 ) && ( line.indexOf( "Full=" ) > 0 ) ) {
+ e.setSequenceName( SequenceDbWsTools.extractFromTo( line, "Full=", ";" ) );
+ }
+ }
+ else if ( line.startsWith( "DE" ) && ForesterUtil.isEmpty( e.getSequenceSymbol() ) ) {
+ if ( line.indexOf( "Short=" ) > 0 ) {
+ e.setSequenceSymbol( SequenceDbWsTools.extractFromTo( line, "Short=", ";" ) );
+ }
+ }
+ else if ( line.startsWith( "GN" ) && ForesterUtil.isEmpty( e.getGeneName() ) ) {
+ if ( line.indexOf( "Name=" ) > 0 ) {
+ e.setGeneName( SequenceDbWsTools.extractFromTo( line, "Name=", ";" ) );
+ }
+ }
+ else if ( line.startsWith( "DR" ) ) {
+ if ( line.indexOf( "GO;" ) > 0 ) {
+ final Matcher m = GO_PATTERN.matcher( line );
+ if ( m.find() ) {
+ final String id = m.group( 1 );
+ final String ns_str = m.group( 2 );
+ final String desc = m.group( 3 );
+ String gns = GoNameSpace.BIOLOGICAL_PROCESS_STR;
+ if ( ns_str.equals( "F" ) ) {
+ gns = GoNameSpace.MOLECULAR_FUNCTION_STR;
+ }
+ else if ( ns_str.equals( "C" ) ) {
+ gns = GoNameSpace.CELLULAR_COMPONENT_STR;
+ }
+ e.addGoTerm( new BasicGoTerm( id, desc, gns, false ) );
+ }
+ }
+ else if ( line.indexOf( "PDB;" ) > 0 ) {
+ final Matcher m = PDB_PATTERN.matcher( line );
+ if ( m.find() ) {
+ e.addCrossReference( new Accession( m.group( 1 ), "PDB", m.group( 2 ) ) );
+ }
+ }
+ else if ( line.indexOf( "KEGG;" ) > 0 ) {
+ final Matcher m = KEGG_PATTERN.matcher( line );
+ if ( m.find() ) {
+ e.addCrossReference( new Accession( m.group( 1 ), "KEGG" ) );
+ }
+ }
+ else if ( line.indexOf( "CTD;" ) > 0 ) {
+ final Matcher m = CTD_PATTERN.matcher( line );
+ if ( m.find() ) {
+ e.addCrossReference( new Accession( m.group( 1 ), "CTD" ) );
+ }
+ }
+ else if ( line.indexOf( "MIM;" ) > 0 ) {
+ final Matcher m = MIM_PATTERN.matcher( line );
+ if ( m.find() ) {
+ e.addCrossReference( new Accession( m.group( 1 ), "MIM" ) );
+ }
+ }
+ else if ( line.indexOf( "Orphanet;" ) > 0 ) {
+ final Matcher m = Orphanet_PATTERN.matcher( line );
+ if ( m.find() ) {
+ e.addCrossReference( new Accession( m.group( 1 ), "Orphanet", m.group( 2 ) ) );
+ }
+ }
+ else if ( line.indexOf( "PharmGKB;" ) > 0 ) {
+ final Matcher m = PharmGKB_PATTERN.matcher( line );
+ if ( m.find() ) {
+ e.addCrossReference( new Accession( m.group( 1 ), "PharmGKB" ) );
+ }
+ }
+ else if ( line.indexOf( "BindingDB;" ) > 0 ) {
+ final Matcher m = BindingDB_PATTERN.matcher( line );
+ if ( m.find() ) {
+ e.addCrossReference( new Accession( m.group( 1 ), "BindingDB" ) );
+ }
+ }
+ else if ( line.indexOf( "DrugBank;" ) > 0 ) {
+ final Matcher m = DrugBank_PATTERN.matcher( line );
+ if ( m.find() ) {
+ e.addCrossReference( new Accession( m.group( 1 ), "DrugBank", m.group( 2 ) ) );
+ }
+ }
+ else if ( line.indexOf( "NextBio;" ) > 0 ) {
+ final Matcher m = NextBio_PATTERN.matcher( line );
+ if ( m.find() ) {
+ e.addCrossReference( new Accession( m.group( 1 ), "NextBio" ) );
+ }
+ }
+ else if ( line.indexOf( "Reactome;" ) > 0 ) {
+ final Matcher m = Reactome_PATTERN.matcher( line );
+ if ( m.find() ) {
+ e.addCrossReference( new Accession( m.group( 1 ), "Reactome", m.group( 2 ) ) );
+ }
+ }
+ }
+ else if ( line.startsWith( "OS" ) ) {
+ if ( line.indexOf( "(" ) > 0 ) {
+ e.setOsScientificName( SequenceDbWsTools.extractFromTo( line, "OS", "(" ) );
+ }
+ else {
+ e.setOsScientificName( SequenceDbWsTools.extractFromTo( line, "OS", "." ) );
+ }
+ }
+ else if ( line.startsWith( "OX" ) ) {
+ if ( line.indexOf( "NCBI_TaxID=" ) > 0 ) {
+ e.setTaxId( SequenceDbWsTools.extractFromTo( line, "NCBI_TaxID=", ";" ) );
+ }
+ }
+ }
+ return e;