2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.ws.sifts;
24 import java.io.FileInputStream;
25 import java.io.IOException;
26 import java.io.InputStream;
27 import java.io.PrintStream;
29 import java.net.URLConnection;
30 import java.util.ArrayList;
31 import java.util.Arrays;
32 import java.util.Collection;
33 import java.util.Collections;
34 import java.util.HashMap;
35 import java.util.HashSet;
36 import java.util.List;
39 import java.util.TreeMap;
40 import java.util.zip.GZIPInputStream;
42 import javax.xml.bind.JAXBContext;
43 import javax.xml.bind.JAXBElement;
44 import javax.xml.bind.Unmarshaller;
45 import javax.xml.stream.XMLInputFactory;
46 import javax.xml.stream.XMLStreamReader;
48 import jalview.analysis.AlignSeq;
49 import jalview.analysis.scoremodels.ScoreMatrix;
50 import jalview.analysis.scoremodels.ScoreModels;
51 import jalview.api.DBRefEntryI;
52 import jalview.api.SiftsClientI;
53 import jalview.datamodel.DBRefEntry;
54 import jalview.datamodel.DBRefSource;
55 import jalview.datamodel.SequenceI;
56 import jalview.io.StructureFile;
57 import jalview.schemes.ResidueProperties;
58 import jalview.structure.StructureMapping;
59 import jalview.util.Comparison;
60 import jalview.util.DBRefUtils;
61 import jalview.util.Format;
62 import jalview.util.Platform;
63 import jalview.xml.binding.sifts.Entry;
64 import jalview.xml.binding.sifts.Entry.Entity;
65 import jalview.xml.binding.sifts.Entry.Entity.Segment;
66 import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion;
67 import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue;
68 import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb;
69 import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.ResidueDetail;
71 import mc_view.PDBChain;
73 public class SiftsClient implements SiftsClientI
76 * for use in mocking out file fetch for tests only
77 * - reset to null after testing!
79 private static File mockSiftsFile;
81 private Entry siftsEntry;
83 private StructureFile pdb;
87 private String structId;
89 private CoordinateSys seqCoordSys = CoordinateSys.UNIPROT;
92 * PDB sequence position to sequence coordinate mapping as derived from SIFTS
93 * record for the identified SeqCoordSys Used for lift-over from sequence
94 * derived from PDB (with first extracted PDBRESNUM as 'start' to the sequence
95 * being annotated with PDB data
97 private jalview.datamodel.Mapping seqFromPdbMapping;
99 public static final int UNASSIGNED = Integer.MIN_VALUE;
101 private static final int PDB_RES_POS = 0;
103 private static final int PDB_ATOM_POS = 1;
105 private static final int PDBE_POS = 2;
107 private static final String NOT_OBSERVED = "Not_Observed";
109 private static final String SIFTS_FTP_BASE_URL = "http://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/";
111 private final static String NEWLINE = System.lineSeparator();
113 private static final boolean GET_STREAM = false;
114 private static final boolean CACHE_FILE = true;
116 private String curSourceDBRef;
118 private HashSet<String> curDBRefAccessionIdsString;
120 private boolean doCache = false;
122 private enum CoordinateSys
124 UNIPROT("UniProt"), PDB("PDBresnum"), PDBe("PDBe");
127 private CoordinateSys(String name)
132 public String getName()
138 private enum ResidueDetailType
140 NAME_SEC_STRUCTURE("nameSecondaryStructure"),
141 CODE_SEC_STRUCTURE("codeSecondaryStructure"), ANNOTATION("Annotation");
144 private ResidueDetailType(String code)
149 public String getCode()
156 * Fetch SIFTs file for the given PDBfile and construct an instance of
160 * @throws SiftsException
162 public SiftsClient(StructureFile pdb) throws SiftsException
165 this.pdbId = pdb.getId();
167 File siftsFile = getSiftsFile(pdbId);
168 siftsEntry = parseSIFTs(siftsFile);
170 siftsEntry = parseSIFTSStreamFor(pdbId);
175 * A more streamlined version of SIFT reading that allows for streaming of the data.
179 * @throws SiftsException
181 private static Entry parseSIFTSStreamFor(String pdbId) throws SiftsException
185 InputStream is = (InputStream) downloadSifts(pdbId, GET_STREAM);
186 return parseSIFTs(is);
187 } catch (Exception e)
189 throw new SiftsException(e.getMessage());
194 * Parse the given SIFTs File and return a JAXB POJO of parsed data
197 * - the GZipped SIFTs XML file to parse
200 * if a problem occurs while parsing the SIFTs XML
202 private Entry parseSIFTs(File siftFile) throws SiftsException
204 try (InputStream in = new FileInputStream(siftFile)) {
205 return parseSIFTs(in);
206 } catch (Exception e)
209 throw new SiftsException(e.getMessage());
213 private static Entry parseSIFTs(InputStream in) throws Exception {
214 try (GZIPInputStream gzis = new GZIPInputStream(in);)
216 // System.out.println("File : " + siftFile.getAbsolutePath());
217 JAXBContext jc = JAXBContext.newInstance("jalview.xml.binding.sifts");
218 XMLStreamReader streamReader = XMLInputFactory.newInstance()
219 .createXMLStreamReader(gzis);
220 Unmarshaller um = jc.createUnmarshaller();
221 JAXBElement<Entry> jbe = um.unmarshal(streamReader, Entry.class);
222 return jbe.getValue();
227 * Get a SIFTs XML file for a given PDB Id from Cache or download from FTP
228 * repository if not found in cache
231 * @return SIFTs XML file
232 * @throws SiftsException
234 public static File getSiftsFile(String pdbId) throws SiftsException
237 * return mocked file if it has been set
239 if (mockSiftsFile != null)
241 return mockSiftsFile;
244 String siftsFileName = SiftsSettings.getSiftDownloadDirectory()
245 + pdbId.toLowerCase() + ".xml.gz";
246 File siftsFile = new File(siftsFileName);
247 if (siftsFile.exists())
249 // The line below is required for unit testing... don't comment it out!!!
250 System.out.println(">>> SIFTS File already downloaded for " + pdbId);
252 if (Platform.isFileOlderThanThreshold(siftsFile,
253 SiftsSettings.getCacheThresholdInDays()))
255 File oldSiftsFile = new File(siftsFileName + "_old");
256 siftsFile.renameTo(oldSiftsFile);
259 siftsFile = downloadSiftsFile(pdbId);
260 oldSiftsFile.delete();
262 } catch (IOException e)
265 oldSiftsFile.renameTo(siftsFile);
266 return new File(siftsFileName);
276 siftsFile = downloadSiftsFile(pdbId);
277 } catch (IOException e)
279 throw new SiftsException(e.getMessage());
285 * Download a SIFTs XML file for a given PDB Id from an FTP repository
288 * @return downloaded SIFTs XML file
289 * @throws SiftsException
290 * @throws IOException
292 public static File downloadSiftsFile(String pdbId)
293 throws SiftsException, IOException
295 return (File) downloadSifts(pdbId, CACHE_FILE);
299 * Download SIFTs XML with the option to cache a file or to get a stream.
304 * @throws IOException
306 private static Object downloadSifts(String pdbId, boolean asFile) throws IOException
308 pdbId = pdbId.toLowerCase();
309 if (pdbId.contains(".cif"))
311 pdbId = pdbId.replace(".cif", "");
313 String siftFile = pdbId + ".xml.gz";
315 File downloadTo = null;
318 downloadTo = new File(
319 SiftsSettings.getSiftDownloadDirectory() + siftFile);
320 File siftsDownloadDir = new File(SiftsSettings.getSiftDownloadDirectory());
321 if (!siftsDownloadDir.exists())
323 siftsDownloadDir.mkdirs();
326 String siftsFileFTPURL = SIFTS_FTP_BASE_URL + siftFile;
327 URL url = new URL(siftsFileFTPURL);
328 URLConnection conn = url.openConnection();
329 InputStream is = conn.getInputStream();
332 // This is MUCH more efficent in JavaScript, as we already have the bytes
333 Platform.streamToFile(is, downloadTo);
339 * Delete the SIFTs file for the given PDB Id in the local SIFTs download
343 * @return true if the file was deleted or doesn't exist
345 public static boolean deleteSiftsFileByPDBId(String pdbId)
347 File siftsFile = new File(SiftsSettings.getSiftDownloadDirectory()
348 + pdbId.toLowerCase() + ".xml.gz");
349 if (siftsFile.exists())
351 return siftsFile.delete();
357 * Get a valid SIFTs DBRef for the given sequence current SIFTs entry
360 * - the target sequence for the operation
361 * @return a valid DBRefEntry that is SIFTs compatible
363 * if no valid source DBRefEntry was found for the given sequences
365 public DBRefEntryI getValidSourceDBRef(SequenceI seq)
366 throws SiftsException
368 List<DBRefEntry> dbRefs = seq.getPrimaryDBRefs();
369 if (dbRefs == null || dbRefs.size() < 1)
371 throw new SiftsException(
372 "Source DBRef could not be determined. DBRefs might not have been retrieved.");
375 for (DBRefEntry dbRef : dbRefs)
377 if (dbRef == null || dbRef.getAccessionId() == null
378 || dbRef.getSource() == null)
382 String canonicalSource = DBRefUtils
383 .getCanonicalName(dbRef.getSource());
384 if (isValidDBRefEntry(dbRef)
385 && (canonicalSource.equalsIgnoreCase(DBRefSource.UNIPROT)
386 || canonicalSource.equalsIgnoreCase(DBRefSource.PDB)))
391 throw new SiftsException("Could not get source DB Ref");
395 * Check that the DBRef Entry is properly populated and is available in this
396 * SiftClient instance
399 * - DBRefEntry to validate
400 * @return true validation is successful otherwise false is returned.
402 boolean isValidDBRefEntry(DBRefEntryI entry)
404 return entry != null && entry.getAccessionId() != null
405 && isFoundInSiftsEntry(entry.getAccessionId());
409 public HashSet<String> getAllMappingAccession()
411 HashSet<String> accessions = new HashSet<String>();
412 List<Entity> entities = siftsEntry.getEntity();
413 for (Entity entity : entities)
415 List<Segment> segments = entity.getSegment();
416 for (Segment segment : segments)
418 List<MapRegion> mapRegions = segment.getListMapRegion()
420 for (MapRegion mapRegion : mapRegions)
423 .add(mapRegion.getDb().getDbAccessionId().toLowerCase());
431 public StructureMapping getSiftsStructureMapping(SequenceI seq,
432 String pdbFile, String chain) throws SiftsException
434 SequenceI aseq = seq;
435 while (seq.getDatasetSequence() != null)
437 seq = seq.getDatasetSequence();
439 structId = (chain == null) ? pdbId : pdbId + "|" + chain;
440 System.out.println("Getting SIFTS mapping for " + structId + ": seq "
443 final StringBuilder mappingDetails = new StringBuilder(128);
444 PrintStream ps = new PrintStream(System.out)
447 public void print(String x)
449 mappingDetails.append(x);
453 public void println()
455 mappingDetails.append(NEWLINE);
458 HashMap<Integer, int[]> mapping = getGreedyMapping(chain, seq, ps);
460 String mappingOutput = mappingDetails.toString();
461 StructureMapping siftsMapping = new StructureMapping(aseq, pdbFile,
462 pdbId, chain, mapping, mappingOutput, seqFromPdbMapping);
468 public HashMap<Integer, int[]> getGreedyMapping(String entityId,
469 SequenceI seq, java.io.PrintStream os) throws SiftsException
471 List<Integer> omitNonObserved = new ArrayList<>();
472 int nonObservedShiftIndex = 0,pdbeNonObserved=0;
473 // System.out.println("Generating mappings for : " + entityId);
474 Entity entity = null;
475 entity = getEntityById(entityId);
476 String originalSeq = AlignSeq.extractGaps(
477 jalview.util.Comparison.GapChars, seq.getSequenceAsString());
478 HashMap<Integer, int[]> mapping = new HashMap<Integer, int[]>();
479 DBRefEntryI sourceDBRef;
480 sourceDBRef = getValidSourceDBRef(seq);
481 // TODO ensure sequence start/end is in the same coordinate system and
482 // consistent with the choosen sourceDBRef
484 // set sequence coordinate system - default value is UniProt
485 if (sourceDBRef.getSource().equalsIgnoreCase(DBRefSource.PDB))
487 seqCoordSys = CoordinateSys.PDB;
490 HashSet<String> dbRefAccessionIdsString = new HashSet<String>();
491 for (DBRefEntry dbref : seq.getDBRefs())
493 dbRefAccessionIdsString.add(dbref.getAccessionId().toLowerCase());
495 dbRefAccessionIdsString.add(sourceDBRef.getAccessionId().toLowerCase());
497 curDBRefAccessionIdsString = dbRefAccessionIdsString;
498 curSourceDBRef = sourceDBRef.getAccessionId();
500 TreeMap<Integer, String> resNumMap = new TreeMap<Integer, String>();
501 List<Segment> segments = entity.getSegment();
502 SegmentHelperPojo shp = new SegmentHelperPojo(seq, mapping, resNumMap,
503 omitNonObserved, nonObservedShiftIndex,pdbeNonObserved);
504 processSegments(segments, shp);
507 populateAtomPositions(entityId, mapping);
508 } catch (Exception e)
512 if (seqCoordSys == CoordinateSys.UNIPROT)
514 padWithGaps(resNumMap, omitNonObserved);
516 int seqStart = UNASSIGNED;
517 int seqEnd = UNASSIGNED;
518 int pdbStart = UNASSIGNED;
519 int pdbEnd = UNASSIGNED;
521 if (mapping.isEmpty())
523 throw new SiftsException("SIFTS mapping failed");
525 // also construct a mapping object between the seq-coord sys and the PDB seq's coord sys
527 Integer[] keys = mapping.keySet().toArray(new Integer[0]);
530 seqEnd = keys[keys.length - 1];
531 List<int[]> from=new ArrayList<>(),to=new ArrayList<>();
532 int[]_cfrom=null,_cto=null;
533 String matchedSeq = originalSeq;
534 if (seqStart != UNASSIGNED) // fixme! seqStart can map to -1 for a pdb sequence that starts <-1
538 int pdbpos = mapping.get(seqps)[PDBE_POS];
539 if (pdbpos == UNASSIGNED)
541 // not correct - pdbpos might be -1, but leave it for now
544 if (_cfrom==null || seqps!=_cfrom[1]+1)
546 _cfrom = new int[] { seqps,seqps};
548 _cto = null; // discontinuity
552 if (_cto==null || pdbpos!=1+_cto[1])
554 _cto = new int[] { pdbpos,pdbpos};
560 _cfrom = new int[from.size() * 2];
561 _cto = new int[to.size() * 2];
563 for (int[] range : from)
565 _cfrom[p++] = range[0];
566 _cfrom[p++] = range[1];
570 for (int[] range : to)
572 _cto[p++] = range[0];
573 _cto[p++] = range[1];
577 seqFromPdbMapping = new jalview.datamodel.Mapping(null, _cto, _cfrom,
580 pdbStart = mapping.get(seqStart)[PDB_RES_POS];
581 pdbEnd = mapping.get(seqEnd)[PDB_RES_POS];
582 int orignalSeqStart = seq.getStart();
583 if (orignalSeqStart >= 1)
585 int subSeqStart = (seqStart >= orignalSeqStart)
586 ? seqStart - orignalSeqStart
588 int subSeqEnd = seqEnd - (orignalSeqStart - 1);
589 subSeqEnd = originalSeq.length() < subSeqEnd ? originalSeq.length()
591 matchedSeq = originalSeq.substring(subSeqStart, subSeqEnd);
595 matchedSeq = originalSeq.substring(1, originalSeq.length());
599 StringBuilder targetStrucSeqs = new StringBuilder();
600 for (String res : resNumMap.values())
602 targetStrucSeqs.append(res);
607 MappingOutputPojo mop = new MappingOutputPojo();
608 mop.setSeqStart(seqStart);
609 mop.setSeqEnd(seqEnd);
610 mop.setSeqName(seq.getName());
611 mop.setSeqResidue(matchedSeq);
613 mop.setStrStart(pdbStart);
614 mop.setStrEnd(pdbEnd);
615 mop.setStrName(structId);
616 mop.setStrResidue(targetStrucSeqs.toString());
619 os.print(getMappingOutput(mop).toString());
625 void processSegments(List<Segment> segments, SegmentHelperPojo shp)
627 SequenceI seq = shp.getSeq();
628 HashMap<Integer, int[]> mapping = shp.getMapping();
629 TreeMap<Integer, String> resNumMap = shp.getResNumMap();
630 List<Integer> omitNonObserved = shp.getOmitNonObserved();
631 int nonObservedShiftIndex = shp.getNonObservedShiftIndex();
632 int pdbeNonObservedCount = shp.getPdbeNonObserved();
633 int firstPDBResNum = UNASSIGNED;
634 for (Segment segment : segments)
636 // System.out.println("Mapping segments : " + segment.getSegId() + "\\"s
638 List<Residue> residues = segment.getListResidue().getResidue();
639 for (Residue residue : residues)
641 boolean isObserved = isResidueObserved(residue);
642 int pdbeIndex = Platform.getLeadingIntegerValue(residue.getDbResNum(),
644 int currSeqIndex = UNASSIGNED;
645 List<CrossRefDb> cRefDbs = residue.getCrossRefDb();
646 CrossRefDb pdbRefDb = null;
647 for (CrossRefDb cRefDb : cRefDbs)
649 if (cRefDb.getDbSource().equalsIgnoreCase(DBRefSource.PDB))
652 if (firstPDBResNum == UNASSIGNED)
654 firstPDBResNum = Platform.getLeadingIntegerValue(cRefDb.getDbResNum(),
661 // after we find the first observed residue we just increment
666 if (cRefDb.getDbCoordSys().equalsIgnoreCase(seqCoordSys.getName())
667 && isAccessionMatched(cRefDb.getDbAccessionId()))
669 currSeqIndex = Platform.getLeadingIntegerValue(cRefDb.getDbResNum(),
671 if (pdbRefDb != null)
673 break;// exit loop if pdb and uniprot are already found
679 ++pdbeNonObservedCount;
681 if (seqCoordSys == seqCoordSys.PDB) // FIXME: is seqCoordSys ever PDBe
684 // if the sequence has a primary reference to the PDB, then we are
685 // dealing with a sequence extracted directly from the PDB. In that
686 // case, numbering is PDBe - non-observed residues
687 currSeqIndex = seq.getStart() - 1 + pdbeIndex;
691 if (seqCoordSys != CoordinateSys.UNIPROT) // FIXME: PDB or PDBe only
694 // mapping to PDB or PDBe so we need to bookkeep for the
697 omitNonObserved.add(currSeqIndex);
698 ++nonObservedShiftIndex;
701 if (currSeqIndex == UNASSIGNED)
703 // change in logic - unobserved residues with no currSeqIndex
704 // corresponding are still counted in both nonObservedShiftIndex and
708 // if (currSeqIndex >= seq.getStart() && currSeqIndex <= seqlength) //
718 int resNum = (pdbRefDb == null)
719 ? Platform.getLeadingIntegerValue(residue.getDbResNum(),
721 : Platform.getLeadingIntegerValue(pdbRefDb.getDbResNum(),
726 char resCharCode = ResidueProperties
727 .getSingleCharacterCode(ResidueProperties
728 .getCanonicalAminoAcid(residue.getDbResName()));
729 resNumMap.put(currSeqIndex, String.valueOf(resCharCode));
731 int[] mappingcols = new int[] { Integer.valueOf(resNum),
732 UNASSIGNED, isObserved ? firstPDBResNum : UNASSIGNED };
734 mapping.put(currSeqIndex - nonObservedShiftIndex, mappingcols);
744 * Target chain to populate mapping of its atom positions.
746 * Two dimension array of residue index versus atom position
747 * @throws IllegalArgumentException
748 * Thrown if chainId or mapping is null
749 * @throws SiftsException
751 void populateAtomPositions(String chainId, Map<Integer, int[]> mapping)
752 throws IllegalArgumentException, SiftsException
756 PDBChain chain = pdb.findChain(chainId);
758 if (chain == null || mapping == null)
760 throw new IllegalArgumentException(
761 "Chain id or mapping must not be null.");
763 for (int[] map : mapping.values())
765 if (map[PDB_RES_POS] != UNASSIGNED)
767 map[PDB_ATOM_POS] = getAtomIndex(map[PDB_RES_POS], chain.atoms);
770 } catch (NullPointerException e)
772 throw new SiftsException(e.getMessage());
773 } catch (Exception e)
775 throw new SiftsException(e.getMessage());
781 * @param residueIndex
782 * The residue index used for the search
784 * A collection of Atom to search
785 * @return atom position for the given residue index
787 int getAtomIndex(int residueIndex, Collection<Atom> atoms)
791 throw new IllegalArgumentException(
792 "atoms collection must not be null!");
794 for (Atom atom : atoms)
796 if (atom.resNumber == residueIndex)
798 return atom.atomIndex;
805 * Checks if the residue instance is marked 'Not_observed' or not
810 private boolean isResidueObserved(Residue residue)
812 Set<String> annotations = getResidueAnnotaitons(residue,
813 ResidueDetailType.ANNOTATION);
814 if (annotations == null || annotations.isEmpty())
818 for (String annotation : annotations)
820 if (annotation.equalsIgnoreCase(NOT_OBSERVED))
829 * Get annotation String for a given residue and annotation type
835 private Set<String> getResidueAnnotaitons(Residue residue,
836 ResidueDetailType type)
838 HashSet<String> foundAnnotations = new HashSet<String>();
839 List<ResidueDetail> resDetails = residue.getResidueDetail();
840 for (ResidueDetail resDetail : resDetails)
842 if (resDetail.getProperty().equalsIgnoreCase(type.getCode()))
844 foundAnnotations.add(resDetail.getContent());
847 return foundAnnotations;
851 public boolean isAccessionMatched(String accession)
853 boolean isStrictMatch = true;
854 return isStrictMatch ? curSourceDBRef.equalsIgnoreCase(accession)
855 : curDBRefAccessionIdsString.contains(accession.toLowerCase());
858 private boolean isFoundInSiftsEntry(String accessionId)
860 Set<String> siftsDBRefs = getAllMappingAccession();
861 return accessionId != null
862 && siftsDBRefs.contains(accessionId.toLowerCase());
866 * Pad omitted residue positions in PDB sequence with gaps
870 void padWithGaps(Map<Integer, String> resNumMap,
871 List<Integer> omitNonObserved)
873 if (resNumMap == null || resNumMap.isEmpty())
877 Integer[] keys = resNumMap.keySet().toArray(new Integer[0]);
878 // Arrays.sort(keys);
879 int firstIndex = keys[0];
880 int lastIndex = keys[keys.length - 1];
881 // System.out.println("Min value " + firstIndex);
882 // System.out.println("Max value " + lastIndex);
883 for (int x = firstIndex; x <= lastIndex; x++)
885 if (!resNumMap.containsKey(x) && !omitNonObserved.contains(x))
887 resNumMap.put(x, "-");
893 public Entity getEntityById(String id) throws SiftsException
895 // Determines an entity to process by performing a heuristic matching of all
896 // Entities with the given chainId and choosing the best matching Entity
897 Entity entity = getEntityByMostOptimalMatchedId(id);
902 throw new SiftsException("Entity " + id + " not found");
906 * This method was added because EntityId is NOT always equal to ChainId.
907 * Hence, it provides the logic to greedily detect the "true" Entity for a
908 * given chainId where discrepancies exist.
913 public Entity getEntityByMostOptimalMatchedId(String chainId)
915 // System.out.println("---> advanced greedy entityId matching block
917 List<Entity> entities = siftsEntry.getEntity();
918 SiftsEntitySortPojo[] sPojo = new SiftsEntitySortPojo[entities.size()];
920 for (Entity entity : entities)
922 sPojo[count] = new SiftsEntitySortPojo();
923 sPojo[count].entityId = entity.getEntityId();
925 List<Segment> segments = entity.getSegment();
926 for (Segment segment : segments)
928 List<Residue> residues = segment.getListResidue().getResidue();
929 for (Residue residue : residues)
931 List<CrossRefDb> cRefDbs = residue.getCrossRefDb();
932 for (CrossRefDb cRefDb : cRefDbs)
934 if (!cRefDb.getDbSource().equalsIgnoreCase("PDB"))
938 ++sPojo[count].resCount;
939 if (cRefDb.getDbChainId().equalsIgnoreCase(chainId))
941 ++sPojo[count].chainIdFreq;
946 sPojo[count].pid = (100 * sPojo[count].chainIdFreq)
947 / sPojo[count].resCount;
950 Arrays.sort(sPojo, Collections.reverseOrder());
951 // System.out.println("highest matched entity : " + sPojo[0].entityId);
952 // System.out.println("highest matched pid : " + sPojo[0].pid);
954 if (sPojo[0].entityId != null)
956 if (sPojo[0].pid < 1)
960 for (Entity entity : entities)
962 if (!entity.getEntityId().equalsIgnoreCase(sPojo[0].entityId))
972 private class SiftsEntitySortPojo
973 implements Comparable<SiftsEntitySortPojo>
975 public String entityId;
977 public int chainIdFreq;
984 public int compareTo(SiftsEntitySortPojo o)
986 return this.pid - o.pid;
990 private class SegmentHelperPojo
992 private SequenceI seq;
994 private HashMap<Integer, int[]> mapping;
996 private TreeMap<Integer, String> resNumMap;
998 private List<Integer> omitNonObserved;
1000 private int nonObservedShiftIndex;
1003 * count of number of 'not observed' positions in the PDB record's SEQRES
1004 * (total number of residues with coordinates == length(SEQRES) -
1007 private int pdbeNonObserved;
1009 public SegmentHelperPojo(SequenceI seq, HashMap<Integer, int[]> mapping,
1010 TreeMap<Integer, String> resNumMap,
1011 List<Integer> omitNonObserved, int nonObservedShiftIndex,
1012 int pdbeNonObserved)
1015 setMapping(mapping);
1016 setResNumMap(resNumMap);
1017 setOmitNonObserved(omitNonObserved);
1018 setNonObservedShiftIndex(nonObservedShiftIndex);
1019 setPdbeNonObserved(pdbeNonObserved);
1023 public void setPdbeNonObserved(int pdbeNonObserved2)
1025 this.pdbeNonObserved = pdbeNonObserved2;
1028 public int getPdbeNonObserved()
1030 return pdbeNonObserved;
1032 public SequenceI getSeq()
1037 public void setSeq(SequenceI seq)
1042 public HashMap<Integer, int[]> getMapping()
1047 public void setMapping(HashMap<Integer, int[]> mapping)
1049 this.mapping = mapping;
1052 public TreeMap<Integer, String> getResNumMap()
1057 public void setResNumMap(TreeMap<Integer, String> resNumMap)
1059 this.resNumMap = resNumMap;
1062 public List<Integer> getOmitNonObserved()
1064 return omitNonObserved;
1067 public void setOmitNonObserved(List<Integer> omitNonObserved)
1069 this.omitNonObserved = omitNonObserved;
1072 public int getNonObservedShiftIndex()
1074 return nonObservedShiftIndex;
1077 public void setNonObservedShiftIndex(int nonObservedShiftIndex)
1079 this.nonObservedShiftIndex = nonObservedShiftIndex;
1085 public StringBuilder getMappingOutput(MappingOutputPojo mp)
1086 throws SiftsException
1088 String seqRes = mp.getSeqResidue();
1089 String seqName = mp.getSeqName();
1090 int sStart = mp.getSeqStart();
1091 int sEnd = mp.getSeqEnd();
1093 String strRes = mp.getStrResidue();
1094 String strName = mp.getStrName();
1095 int pdbStart = mp.getStrStart();
1096 int pdbEnd = mp.getStrEnd();
1098 String type = mp.getType();
1100 int maxid = (seqName.length() >= strName.length()) ? seqName.length()
1102 int len = 72 - maxid - 1;
1104 int nochunks = ((seqRes.length()) / len)
1105 + ((seqRes.length()) % len > 0 ? 1 : 0);
1107 StringBuilder output = new StringBuilder(512);
1108 output.append(NEWLINE);
1109 output.append("Sequence \u27f7 Structure mapping details")
1111 output.append("Method: SIFTS");
1112 output.append(NEWLINE).append(NEWLINE);
1114 output.append(new Format("%" + maxid + "s").form(seqName));
1115 output.append(" : ");
1116 output.append(String.valueOf(sStart));
1117 output.append(" - ");
1118 output.append(String.valueOf(sEnd));
1119 output.append(" Maps to ");
1120 output.append(NEWLINE);
1121 output.append(new Format("%" + maxid + "s").form(structId));
1122 output.append(" : ");
1123 output.append(String.valueOf(pdbStart));
1124 output.append(" - ");
1125 output.append(String.valueOf(pdbEnd));
1126 output.append(NEWLINE).append(NEWLINE);
1128 ScoreMatrix pam250 = ScoreModels.getInstance().getPam250();
1129 int matchedSeqCount = 0;
1130 for (int j = 0; j < nochunks; j++)
1132 // Print the first aligned sequence
1133 output.append(new Format("%" + (maxid) + "s").form(seqName))
1136 for (int i = 0; i < len; i++)
1138 if ((i + (j * len)) < seqRes.length())
1140 output.append(seqRes.charAt(i + (j * len)));
1144 output.append(NEWLINE);
1145 output.append(new Format("%" + (maxid) + "s").form(" ")).append(" ");
1148 * Print out the match symbols:
1149 * | for exact match (ignoring case)
1150 * . if PAM250 score is positive
1153 for (int i = 0; i < len; i++)
1157 if ((i + (j * len)) < seqRes.length())
1159 char c1 = seqRes.charAt(i + (j * len));
1160 char c2 = strRes.charAt(i + (j * len));
1161 boolean sameChar = Comparison.isSameResidue(c1, c2, false);
1162 if (sameChar && !Comparison.isGap(c1))
1167 else if (type.equals("pep"))
1169 if (pam250.getPairwiseScore(c1, c2) > 0)
1183 } catch (IndexOutOfBoundsException e)
1188 // Now print the second aligned sequence
1189 output = output.append(NEWLINE);
1190 output = output.append(new Format("%" + (maxid) + "s").form(strName))
1192 for (int i = 0; i < len; i++)
1194 if ((i + (j * len)) < strRes.length())
1196 output.append(strRes.charAt(i + (j * len)));
1199 output.append(NEWLINE).append(NEWLINE);
1201 float pid = (float) matchedSeqCount / seqRes.length() * 100;
1202 if (pid < SiftsSettings.getFailSafePIDThreshold())
1204 throw new SiftsException(">>> Low PID detected for SIFTs mapping...");
1206 output.append("Length of alignment = " + seqRes.length())
1208 output.append(new Format("Percentage ID = %2.2f").form(pid));
1213 public int getEntityCount()
1215 return siftsEntry.getEntity().size();
1219 public String getDbAccessionId()
1221 return siftsEntry.getDbAccessionId();
1225 public String getDbCoordSys()
1227 return siftsEntry.getDbCoordSys();
1231 public String getDbSource()
1233 return siftsEntry.getDbSource();
1237 public String getDbVersion()
1239 return siftsEntry.getDbVersion();
1242 public static void setMockSiftsFile(File file)
1244 mockSiftsFile = file;