4 package jalview.ws.dbsources;
\r
7 import java.io.FileReader;
\r
8 import java.io.IOException;
\r
9 import java.util.Enumeration;
\r
10 import java.util.Hashtable;
\r
11 import java.util.Vector;
\r
13 import org.exolab.castor.xml.Unmarshaller;
\r
15 import com.stevesoft.pat.Regex;
\r
17 import jalview.datamodel.Alignment;
\r
18 import jalview.datamodel.AlignmentI;
\r
19 import jalview.datamodel.DBRefEntry;
\r
20 import jalview.datamodel.DBRefSource;
\r
21 import jalview.datamodel.PDBEntry;
\r
22 import jalview.datamodel.SequenceFeature;
\r
23 import jalview.datamodel.SequenceI;
\r
24 import jalview.datamodel.UniprotEntry;
\r
25 import jalview.datamodel.UniprotFile;
\r
26 import jalview.io.FormatAdapter;
\r
27 import jalview.io.IdentifyFile;
\r
28 import jalview.ws.DBRefFetcher;
\r
29 import jalview.ws.ebi.EBIFetchClient;
\r
30 import jalview.ws.seqfetcher.DbSourceProxy;
\r
31 import jalview.ws.seqfetcher.DbSourceProxyImpl;
\r
37 public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
\r
41 addDbSourceProperty(DBRefSource.SEQDB, DBRefSource.SEQDB);
\r
42 addDbSourceProperty(DBRefSource.PROTSEQDB);
\r
43 addDbSourceProperty(DBRefSource.MULTIACC, new Integer(50));
\r
49 * @see jalview.ws.DbSourceProxy#getAccessionSeparator()
\r
51 public String getAccessionSeparator()
\r
59 * @see jalview.ws.DbSourceProxy#getAccessionValidator()
\r
61 public Regex getAccessionValidator()
\r
69 * @see jalview.ws.DbSourceProxy#getDbSource()
\r
71 public String getDbSource()
\r
73 return DBRefSource.UNIPROT;
\r
79 * @see jalview.ws.DbSourceProxy#getDbVersion()
\r
81 public String getDbVersion()
\r
83 return "0"; // we really don't know what version we're on.
\r
86 private EBIFetchClient ebi = null;
\r
88 public Vector getUniprotEntries(File file)
\r
90 UniprotFile uni = new UniprotFile();
\r
93 // 1. Load the mapping information from the file
\r
94 org.exolab.castor.mapping.Mapping map = new org.exolab.castor.mapping.Mapping(uni.getClass().getClassLoader());
\r
95 java.net.URL url = getClass().getResource("/uniprot_mapping.xml");
\r
96 map.loadMapping(url);
\r
98 // 2. Unmarshal the data
\r
99 Unmarshaller unmar = new Unmarshaller(uni);
\r
100 unmar.setIgnoreExtraElements(true);
\r
101 unmar.setMapping(map);
\r
103 uni = (UniprotFile) unmar.unmarshal(new FileReader(file));
\r
105 catch (Exception e)
\r
107 System.out.println("Error getUniprotEntries() " + e);
\r
110 return uni.getUniprotEntries();
\r
116 * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
\r
118 public AlignmentI getSequenceRecords(String queries) throws Exception
\r
124 ebi = new EBIFetchClient();
\r
125 StringBuffer result=new StringBuffer();
\r
126 File file = ebi.fetchDataAsFile("uniprot:" + queries, "uniprotxml", null);
\r
127 Vector entries = getUniprotEntries(file);
\r
129 if (entries != null)
\r
131 // First, make the new sequences
\r
132 Enumeration en = entries.elements();
\r
133 while (en.hasMoreElements())
\r
135 UniprotEntry entry = (UniprotEntry) en.nextElement();
\r
137 StringBuffer name = new StringBuffer(">UniProt/Swiss-Prot");
\r
138 Enumeration en2 = entry.getAccession().elements();
\r
139 while (en2.hasMoreElements())
\r
142 name.append(en2.nextElement());
\r
144 en2 = entry.getName().elements();
\r
145 while (en2.hasMoreElements())
\r
148 name.append(en2.nextElement());
\r
151 if (entry.getProtein() != null)
\r
153 name.append(" " + entry.getProtein().getName().elementAt(0));
\r
156 result.append(name + "\n"
\r
157 + entry.getUniprotSequence().getContent() + "\n");
\r
161 // Then read in the features and apply them to the dataset
\r
162 al = parseResult(result.toString());
\r
165 // Decorate the alignment with database entries.
\r
166 addUniprotXrefs(al, entries);
\r
173 } catch (Exception e)
\r
181 * add an ordered set of UniprotEntry objects to an ordered set of seuqences.
\r
184 * a sequence of n sequences
\r
186 * a seuqence of n uniprot entries to be analysed.
\r
188 public void addUniprotXrefs(Alignment al, Vector entries)
\r
190 for (int i = 0; i < entries.size(); i++)
\r
192 UniprotEntry entry = (UniprotEntry) entries.elementAt(i);
\r
193 Enumeration e = entry.getDbReference().elements();
\r
194 Vector onlyPdbEntries = new Vector();
\r
195 while (e.hasMoreElements())
\r
197 PDBEntry pdb = (PDBEntry) e.nextElement();
\r
198 if (!pdb.getType().equals("PDB"))
\r
203 onlyPdbEntries.addElement(pdb);
\r
205 SequenceI sq = al.getSequenceAt(i);
\r
206 while (sq.getDatasetSequence()!=null)
\r
208 sq = sq.getDatasetSequence();
\r
211 Enumeration en2 = entry.getAccession().elements();
\r
212 while (en2.hasMoreElements())
\r
214 // we always add as uniprot if we retrieved from uniprot or uniprot name
\r
216 new DBRefEntry(DBRefSource.UNIPROT, getDbVersion(), en2.nextElement()
\r
219 sq.setPDBId(onlyPdbEntries);
\r
220 if (entry.getFeature() != null)
\r
222 e = entry.getFeature().elements();
\r
223 while (e.hasMoreElements())
\r
225 SequenceFeature sf = (SequenceFeature) e.nextElement();
\r
226 sf.setFeatureGroup("Uniprot");
\r
227 sq.addSequenceFeature(sf);
\r
236 * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
\r
238 public boolean isValidReference(String accession)
\r
243 * return LDHA_CHICK uniprot entry
\r
245 public String getTestQuery()
\r
249 public String getDbName()
\r
251 return getDbSource();
\r