From ea7f74cc42770ce2f0dbb6745cb221eabf98e181 Mon Sep 17 00:00:00 2001 From: jprocter Date: Tue, 14 Jun 2005 15:44:03 +0000 Subject: [PATCH] Arbitrary decision to keep friendly id name when reading in uniprot headered fastA files. --- src/jalview/io/FastaFile.java | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/jalview/io/FastaFile.java b/src/jalview/io/FastaFile.java index 7e7e06b..adfa967 100755 --- a/src/jalview/io/FastaFile.java +++ b/src/jalview/io/FastaFile.java @@ -53,6 +53,9 @@ public class FastaFile extends AlignFile { while ((line = nextLine()) != null) { if (line.length() > 0) { // Do we have an id line? + // JBPNote - this code needs to be standardised to EBI/whatever for the + // >dbref/dbref/dbref|refid1|refid2|refid3 'human-readable' style of naming (should it really exist) + if (line.substring(0, 1).equals(">")) { if (count != 0) { if (sstart != 0) { @@ -73,17 +76,18 @@ public class FastaFile extends AlignFile { id = id.substring(1); com.stevesoft.pat.Regex dbId = new com.stevesoft.pat.Regex( - "[A-Za-z-]+/[A-Za-z-]+\\|(\\w+)\\|(.+)"); - + "[A-Za-z-]+/?[A-Za-z-]+\\|(\\w+)\\|(.+)"); + // JBPNote At the moment - we don't get rid of the friendly names but this + // behaviour is probably wrong in the long run. if (dbId.search(id)) { String dbid = dbId.stringMatched(1); String idname = dbId.stringMatched(2); - - if ((idname.length() > 0) && - (idname.indexOf("_") > -1)) { - id = idname; // just use friendly name // JBPNote: we may lose uniprot standardised ID here. - } else { - id = dbid; // use dbid to ensure sensible queries + if ( (idname.length() > 0) && + (idname.indexOf("_") > -1)) { + id = idname; // use the friendly name - apparently no dbid + } else + if (dbid.length()>1) { + id = dbid; // ignore the friendly name - we lose uniprot accession ID otherwise } } -- 1.7.10.2