X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2Fsifts%2FSiftsClient.java;h=53a2b8c9c20a3ff8fa1c1f75223c2b7a3693f59e;hb=refs%2Fheads%2Ffeatures%2FJAL-2136_phyre2_integration_updated;hp=6e7b9880284ebbf81dca2b24c8fe4e7645042512;hpb=2c582d859f37cd06681f9f47a5e6eea47ac0b9ef;p=jalview.git diff --git a/src/jalview/ws/sifts/SiftsClient.java b/src/jalview/ws/sifts/SiftsClient.java index 6e7b988..53a2b8c 100644 --- a/src/jalview/ws/sifts/SiftsClient.java +++ b/src/jalview/ws/sifts/SiftsClient.java @@ -21,13 +21,19 @@ package jalview.ws.sifts; import jalview.analysis.AlignSeq; +import jalview.analysis.scoremodels.ScoreMatrix; +import jalview.analysis.scoremodels.ScoreModels; import jalview.api.DBRefEntryI; import jalview.api.SiftsClientI; import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; import jalview.datamodel.SequenceI; +import jalview.io.StructureFile; import jalview.schemes.ResidueProperties; import jalview.structure.StructureMapping; +import jalview.structures.models.MappingOutputModel; +import jalview.util.Comparison; +import jalview.util.DBRefUtils; import jalview.util.Format; import jalview.xml.binding.sifts.Entry; import jalview.xml.binding.sifts.Entry.Entity; @@ -36,11 +42,9 @@ import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion; import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue; import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb; import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.ResidueDetail; -import jalview.xml.binding.sifts.Entry.ListDB.Db; import java.io.File; import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; @@ -58,48 +62,48 @@ import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; +import java.util.Set; import java.util.TreeMap; import java.util.zip.GZIPInputStream; import javax.xml.bind.JAXBContext; -import javax.xml.bind.JAXBException; import javax.xml.bind.Unmarshaller; -import javax.xml.stream.FactoryConfigurationError; import javax.xml.stream.XMLInputFactory; -import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; import MCview.Atom; import MCview.PDBChain; -import MCview.PDBfile; public class SiftsClient implements SiftsClientI { + /* + * for use in mocking out file fetch for tests only + * - reset to null after testing! + */ + private static File mockSiftsFile; + + private static final int UNASSIGNED = StructureMapping.UNASSIGNED; // -1 + + private static final int PDB_RES_POS = StructureMapping.PDB_RES_NUM_INDEX; // 0 + private Entry siftsEntry; - private PDBfile pdb; + private StructureFile structureFile; private String pdbId; private String structId; - private String segStartEnd; - private CoordinateSys seqCoordSys = CoordinateSys.UNIPROT; private static final int BUFFER_SIZE = 4096; - public static final int UNASSIGNED = -1; - - private static final int PDB_RES_POS = 0; - private static final int PDB_ATOM_POS = 1; - private static final String NOT_FOUND = "Not_Found"; - private static final String NOT_OBSERVED = "Not_Observed"; - private static final String SIFTS_FTP_BASE_URL = "ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/"; + private static final String SIFTS_FTP_BASE_URL = "http://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/"; private final static String NEWLINE = System.lineSeparator(); @@ -107,7 +111,7 @@ public class SiftsClient implements SiftsClientI private HashSet curDBRefAccessionIdsString; - public enum CoordinateSys + private enum CoordinateSys { UNIPROT("UniProt"), PDB("PDBresnum"), PDBe("PDBe"); private String name; @@ -123,7 +127,7 @@ public class SiftsClient implements SiftsClientI } }; - public enum ResidueDetailType + private enum ResidueDetailType { NAME_SEC_STRUCTURE("nameSecondaryStructure"), CODE_SEC_STRUCTURE( "codeSecondaryStructure"), ANNOTATION("Annotation"); @@ -147,31 +151,15 @@ public class SiftsClient implements SiftsClientI * @param pdbId * @throws SiftsException */ - public SiftsClient(PDBfile pdb) throws SiftsException + public SiftsClient(StructureFile structureFile) throws SiftsException { - this.pdb = pdb; - this.pdbId = pdb.id; + this.structureFile = structureFile; + this.pdbId = structureFile.getId(); File siftsFile = getSiftsFile(pdbId); siftsEntry = parseSIFTs(siftsFile); } /** - * Construct an instance of SiftsClient using the supplied SIFTs file. Note: - * The SIFTs file should correspond to the PDB Id in PDBfile instance - * - * @param pdbId - * @param siftsFile - * @throws SiftsException - * @throws Exception - */ - public SiftsClient(PDBfile pdb, File siftsFile) throws SiftsException - { - this.pdb = pdb; - this.pdbId = pdb.id; - siftsEntry = parseSIFTs(siftsFile); - } - - /** * Parse the given SIFTs File and return a JAXB POJO of parsed data * * @param siftFile @@ -191,23 +179,7 @@ public class SiftsClient implements SiftsClientI .createXMLStreamReader(gzis); Unmarshaller um = jc.createUnmarshaller(); return (Entry) um.unmarshal(streamReader); - } catch (JAXBException e) - { - e.printStackTrace(); - throw new SiftsException(e.getMessage()); - } catch (FileNotFoundException e) - { - e.printStackTrace(); - throw new SiftsException(e.getMessage()); - } catch (XMLStreamException e) - { - e.printStackTrace(); - throw new SiftsException(e.getMessage()); - } catch (FactoryConfigurationError e) - { - e.printStackTrace(); - throw new SiftsException(e.getMessage()); - } catch (IOException e) + } catch (Exception e) { e.printStackTrace(); throw new SiftsException(e.getMessage()); @@ -224,8 +196,17 @@ public class SiftsClient implements SiftsClientI */ public static File getSiftsFile(String pdbId) throws SiftsException { - File siftsFile = new File(SiftsSettings.getSiftDownloadDirectory() - + pdbId.toLowerCase() + ".xml.gz"); + /* + * return mocked file if it has been set + */ + if (mockSiftsFile != null) + { + return mockSiftsFile; + } + + String siftsFileName = SiftsSettings.getSiftDownloadDirectory() + + pdbId.toLowerCase() + ".xml.gz"; + File siftsFile = new File(siftsFileName); if (siftsFile.exists()) { // The line below is required for unit testing... don't comment it out!!! @@ -234,12 +215,32 @@ public class SiftsClient implements SiftsClientI if (isFileOlderThanThreshold(siftsFile, SiftsSettings.getCacheThresholdInDays())) { - // System.out.println("Downloaded file is out of date, hence re-downloading..."); - siftsFile = downloadSiftsFile(pdbId.toLowerCase()); + File oldSiftsFile = new File(siftsFileName + "_old"); + siftsFile.renameTo(oldSiftsFile); + try + { + siftsFile = downloadSiftsFile(pdbId.toLowerCase()); + oldSiftsFile.delete(); + return siftsFile; + } catch (IOException e) + { + e.printStackTrace(); + oldSiftsFile.renameTo(siftsFile); + return new File(siftsFileName); + } } - return siftsFile; + else + { + return siftsFile; + } + } + try + { + siftsFile = downloadSiftsFile(pdbId.toLowerCase()); + } catch (IOException e) + { + throw new SiftsException(e.getMessage()); } - siftsFile = downloadSiftsFile(pdbId.toLowerCase()); return siftsFile; } @@ -277,8 +278,10 @@ public class SiftsClient implements SiftsClientI * @param pdbId * @return downloaded SIFTs XML file * @throws SiftsException + * @throws IOException */ - public static File downloadSiftsFile(String pdbId) throws SiftsException + public static File downloadSiftsFile(String pdbId) throws SiftsException, + IOException { if (pdbId.contains(".cif")) { @@ -294,27 +297,23 @@ public class SiftsClient implements SiftsClientI { siftsDownloadDir.mkdirs(); } - try + // System.out.println(">> Download ftp url : " + siftsFileFTPURL); + // long now = System.currentTimeMillis(); + URL url = new URL(siftsFileFTPURL); + URLConnection conn = url.openConnection(); + InputStream inputStream = conn.getInputStream(); + FileOutputStream outputStream = new FileOutputStream( + downloadedSiftsFile); + byte[] buffer = new byte[BUFFER_SIZE]; + int bytesRead = -1; + while ((bytesRead = inputStream.read(buffer)) != -1) { - // System.out.println(">> Download ftp url : " + siftsFileFTPURL); - URL url = new URL(siftsFileFTPURL); - URLConnection conn = url.openConnection(); - InputStream inputStream = conn.getInputStream(); - FileOutputStream outputStream = new FileOutputStream( - downloadedSiftsFile); - byte[] buffer = new byte[BUFFER_SIZE]; - int bytesRead = -1; - while ((bytesRead = inputStream.read(buffer)) != -1) - { - outputStream.write(buffer, 0, bytesRead); - } - outputStream.close(); - inputStream.close(); - // System.out.println(">>> File downloaded : " + downloadedSiftsFile); - } catch (IOException ex) - { - throw new SiftsException(ex.getMessage()); + outputStream.write(buffer, 0, bytesRead); } + outputStream.close(); + inputStream.close(); +// System.out.println(">>> File downloaded : " + downloadedSiftsFile +// + " took " + (System.currentTimeMillis() - now) + "ms"); return new File(downloadedSiftsFile); } @@ -348,39 +347,29 @@ public class SiftsClient implements SiftsClientI public DBRefEntryI getValidSourceDBRef(SequenceI seq) throws SiftsException { - DBRefEntryI sourceDBRef = null; - sourceDBRef = seq.getSourceDBRef(); - if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef)) + List dbRefs = seq.getPrimaryDBRefs(); + if (dbRefs == null || dbRefs.size() < 1) { - return sourceDBRef; + throw new SiftsException( + "Source DBRef could not be determined. DBRefs might not have been retrieved."); } - else + + for (DBRefEntry dbRef : dbRefs) { - DBRefEntry[] dbRefs = seq.getDBRefs(); - if (dbRefs == null || dbRefs.length < 1) + if (dbRef == null || dbRef.getAccessionId() == null + || dbRef.getSource() == null) { - throw new SiftsException("Could not get source DB Ref"); + continue; } - - for (DBRefEntryI dbRef : dbRefs) + String canonicalSource = DBRefUtils.getCanonicalName(dbRef + .getSource()); + if (isValidDBRefEntry(dbRef) + && (canonicalSource.equalsIgnoreCase(DBRefSource.UNIPROT) || canonicalSource + .equalsIgnoreCase(DBRefSource.PDB))) { - if (dbRef == null || dbRef.getAccessionId() == null - || dbRef.getSource() == null) - { - continue; - } - if (isFoundInSiftsEntry(dbRef.getAccessionId()) - && (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) || dbRef - .getSource().equalsIgnoreCase(DBRefSource.PDB))) - { - return dbRef; - } + return dbRef; } } - if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef)) - { - return sourceDBRef; - } throw new SiftsException("Could not get source DB Ref"); } @@ -392,7 +381,7 @@ public class SiftsClient implements SiftsClientI * - DBRefEntry to validate * @return true validation is successful otherwise false is returned. */ - private boolean isValidDBRefEntry(DBRefEntryI entry) + boolean isValidDBRefEntry(DBRefEntryI entry) { return entry != null && entry.getAccessionId() != null && isFoundInSiftsEntry(entry.getAccessionId()); @@ -412,7 +401,8 @@ public class SiftsClient implements SiftsClientI .getMapRegion(); for (MapRegion mapRegion : mapRegions) { - accessions.add(mapRegion.getDb().getDbAccessionId()); + accessions + .add(mapRegion.getDb().getDbAccessionId().toLowerCase()); } } } @@ -424,8 +414,8 @@ public class SiftsClient implements SiftsClientI String pdbFile, String chain) throws SiftsException { structId = (chain == null) ? pdbId : pdbId + "|" + chain; - System.out.println("Getting mapping for: " + pdbId + "|" + chain - + " : seq- " + seq.getName()); + System.out.println("Getting SIFTS mapping for " + structId + ": seq " + + seq.getName()); final StringBuilder mappingDetails = new StringBuilder(128); PrintStream ps = new PrintStream(System.out) @@ -454,21 +444,18 @@ public class SiftsClient implements SiftsClientI public HashMap getGreedyMapping(String entityId, SequenceI seq, java.io.PrintStream os) throws SiftsException { - ArrayList omitNonObserved = new ArrayList(); + List omitNonObserved = new ArrayList(); int nonObservedShiftIndex = 0; - System.out.println("Generating mappings for : " + entityId); + // System.out.println("Generating mappings for : " + entityId); Entity entity = null; entity = getEntityById(entityId); String originalSeq = AlignSeq.extractGaps( jalview.util.Comparison.GapChars, seq.getSequenceAsString()); HashMap mapping = new HashMap(); - DBRefEntryI sourceDBRef = seq.getSourceDBRef(); - if (sourceDBRef == null) - { - sourceDBRef = getValidSourceDBRef(seq); - // TODO ensure sequence start/end is in the same coordinate system and - // consistent with the choosen sourceDBRef - } + DBRefEntryI sourceDBRef; + sourceDBRef = getValidSourceDBRef(seq); + // TODO ensure sequence start/end is in the same coordinate system and + // consistent with the choosen sourceDBRef // set sequence coordinate system - default value is UniProt if (sourceDBRef.getSource().equalsIgnoreCase(DBRefSource.PDB)) @@ -488,81 +475,9 @@ public class SiftsClient implements SiftsClientI TreeMap resNumMap = new TreeMap(); List segments = entity.getSegment(); - for (Segment segment : segments) - { - segStartEnd = segment.getStart() + " - " + segment.getEnd(); - System.out.println("Mappging segments : " + segment.getSegId() + "\\" - + segStartEnd); - List residues = segment.getListResidue().getResidue(); - for (Residue residue : residues) - { - int currSeqIndex = UNASSIGNED; - List cRefDbs = residue.getCrossRefDb(); - CrossRefDb pdbRefDb = null; - for (CrossRefDb cRefDb : cRefDbs) - { - if (cRefDb.getDbSource().equalsIgnoreCase(DBRefSource.PDB)) - { - pdbRefDb = cRefDb; - } - if (cRefDb.getDbCoordSys() - .equalsIgnoreCase(seqCoordSys.getName()) - && isAccessionMatched(cRefDb.getDbAccessionId())) - { - String resNumIndexString = cRefDb.getDbResNum() - .equalsIgnoreCase("None") ? String.valueOf(UNASSIGNED) - : cRefDb.getDbResNum(); - try - { - currSeqIndex = Integer.valueOf(resNumIndexString); - } catch (NumberFormatException nfe) - { - currSeqIndex = Integer.valueOf(resNumIndexString - .split("[a-zA-Z]")[0]); - } - if (pdbRefDb != null) - { - break;// exit loop if pdb and uniprot are already found - } - } - } - if (currSeqIndex == UNASSIGNED) - { - continue; - } - if (currSeqIndex >= seq.getStart() && currSeqIndex <= seq.getEnd()) - { - int resNum; - try - { - resNum = (pdbRefDb == null) ? Integer.valueOf(residue - .getDbResNum()) : Integer.valueOf(pdbRefDb - .getDbResNum()); - } catch (NumberFormatException nfe) - { - resNum = (pdbRefDb == null) ? Integer.valueOf(residue - .getDbResNum()) : Integer.valueOf(pdbRefDb - .getDbResNum().split("[a-zA-Z]")[0]); - } - - if (isResidueObserved(residue) - || seqCoordSys == CoordinateSys.UNIPROT) - { - char resCharCode = ResidueProperties - .getSingleCharacterCode(ResidueProperties - .getCanonicalAminoAcid(residue.getDbResName())); - resNumMap.put(currSeqIndex, String.valueOf(resCharCode)); - } - else - { - omitNonObserved.add(currSeqIndex); - ++nonObservedShiftIndex; - } - mapping.put(currSeqIndex - nonObservedShiftIndex, new int[] { - Integer.valueOf(resNum), UNASSIGNED }); - } - } - } + SegmentHelperPojo shp = new SegmentHelperPojo(seq, mapping, resNumMap, + omitNonObserved, nonObservedShiftIndex); + processSegments(segments, shp); try { populateAtomPositions(entityId, mapping); @@ -579,12 +494,13 @@ public class SiftsClient implements SiftsClientI int pdbStart = UNASSIGNED; int pdbEnd = UNASSIGNED; - Integer[] keys = mapping.keySet().toArray(new Integer[0]); - Arrays.sort(keys); - if (keys.length < 1) + if (mapping.isEmpty()) { - throw new SiftsException(">>> Empty SIFTS mapping generated!!"); + throw new SiftsException("SIFTS mapping failed"); } + + Integer[] keys = mapping.keySet().toArray(new Integer[0]); + Arrays.sort(keys); seqStart = keys[0]; seqEnd = keys[keys.length - 1]; @@ -617,23 +533,115 @@ public class SiftsClient implements SiftsClientI if (os != null) { - MappingOutputPojo mop = new MappingOutputPojo(); - mop.setSeqStart(pdbStart); - mop.setSeqEnd(pdbEnd); + MappingOutputModel mop = new MappingOutputModel(); + mop.setSeqStart(seqStart); + mop.setSeqEnd(seqEnd); mop.setSeqName(seq.getName()); mop.setSeqResidue(matchedSeq); - mop.setStrStart(seqStart); - mop.setStrEnd(seqEnd); + mop.setStrStart(pdbStart); + mop.setStrEnd(pdbEnd); mop.setStrName(structId); mop.setStrResidue(targetStrucSeqs.toString()); mop.setType("pep"); os.print(getMappingOutput(mop).toString()); + os.println(); } return mapping; } + void processSegments(List segments, SegmentHelperPojo shp) + { + SequenceI seq = shp.getSeq(); + HashMap mapping = shp.getMapping(); + TreeMap resNumMap = shp.getResNumMap(); + List omitNonObserved = shp.getOmitNonObserved(); + int nonObservedShiftIndex = shp.getNonObservedShiftIndex(); + for (Segment segment : segments) + { + // System.out.println("Mapping segments : " + segment.getSegId() + "\\"s + // + segStartEnd); + List residues = segment.getListResidue().getResidue(); + for (Residue residue : residues) + { + int currSeqIndex = UNASSIGNED; + List cRefDbs = residue.getCrossRefDb(); + CrossRefDb pdbRefDb = null; + for (CrossRefDb cRefDb : cRefDbs) + { + if (cRefDb.getDbSource().equalsIgnoreCase(DBRefSource.PDB)) + { + pdbRefDb = cRefDb; + } + if (cRefDb.getDbCoordSys() + .equalsIgnoreCase(seqCoordSys.getName()) + && isAccessionMatched(cRefDb.getDbAccessionId())) + { + currSeqIndex = getLeadingIntegerValue( + cRefDb.getDbResNum(), UNASSIGNED); + if (pdbRefDb != null) + { + break;// exit loop if pdb and uniprot are already found + } + } + } + if (currSeqIndex == UNASSIGNED) + { + continue; + } + if (currSeqIndex >= seq.getStart() && currSeqIndex <= seq.getEnd()) + { + + int resNum = (pdbRefDb == null) ? getLeadingIntegerValue( + residue.getDbResNum(), UNASSIGNED) + : getLeadingIntegerValue(pdbRefDb.getDbResNum(), + UNASSIGNED); + + if (isResidueObserved(residue) + || seqCoordSys == CoordinateSys.UNIPROT) + { + char resCharCode = ResidueProperties + .getSingleCharacterCode(ResidueProperties + .getCanonicalAminoAcid(residue.getDbResName())); + resNumMap.put(currSeqIndex, String.valueOf(resCharCode)); + } + else + { + omitNonObserved.add(currSeqIndex); + ++nonObservedShiftIndex; + } + mapping.put(currSeqIndex - nonObservedShiftIndex, new int[] { + Integer.valueOf(resNum), UNASSIGNED }); + } + } + } + } + + /** + * Get the leading integer part of a string that begins with an integer. + * + * @param input + * - the string input to process + * @param failValue + * - value returned if unsuccessful + * @return + */ + static int getLeadingIntegerValue(String input, int failValue) + { + if (input == null) + { + return failValue; + } + String[] parts = input.split("(?=\\D)(?<=\\d)"); + if (parts != null && parts.length > 0 && parts[0].matches("[0-9]+")) + { + return Integer.valueOf(parts[0]); + } + return failValue; + } + + /** * * @param chainId @@ -642,22 +650,33 @@ public class SiftsClient implements SiftsClientI * Two dimension array of residue index versus atom position * @throws IllegalArgumentException * Thrown if chainId or mapping is null + * @throws SiftsException */ - void populateAtomPositions(String chainId, - HashMap mapping) throws IllegalArgumentException + void populateAtomPositions(String chainId, Map mapping) + throws IllegalArgumentException, SiftsException { - PDBChain chain = pdb.findChain(chainId); - if (chain == null || mapping == null) - { - throw new IllegalArgumentException( - "Chain id or mapping must not be null."); - } - for (int[] map : mapping.values()) + try { - if (map[PDB_RES_POS] != UNASSIGNED) + PDBChain chain = structureFile.findChain(chainId); + + if (chain == null || mapping == null) + { + throw new IllegalArgumentException( + "Chain id or mapping must not be null."); + } + for (int[] map : mapping.values()) { - map[PDB_ATOM_POS] = getAtomIndex(map[PDB_RES_POS], chain.atoms); + if (map[PDB_RES_POS] != UNASSIGNED) + { + map[PDB_ATOM_POS] = getAtomIndex(map[PDB_RES_POS], chain.atoms); + } } + } catch (NullPointerException e) + { + throw new SiftsException(e.getMessage()); + } catch (Exception e) + { + throw new SiftsException(e.getMessage()); } } @@ -694,7 +713,7 @@ public class SiftsClient implements SiftsClientI */ private boolean isResidueObserved(Residue residue) { - HashSet annotations = getResidueAnnotaitons(residue, + Set annotations = getResidueAnnotaitons(residue, ResidueDetailType.ANNOTATION); if (annotations == null || annotations.isEmpty()) { @@ -717,7 +736,7 @@ public class SiftsClient implements SiftsClientI * @param type * @return */ - private HashSet getResidueAnnotaitons(Residue residue, + private Set getResidueAnnotaitons(Residue residue, ResidueDetailType type) { HashSet foundAnnotations = new HashSet(); @@ -742,8 +761,9 @@ public class SiftsClient implements SiftsClientI private boolean isFoundInSiftsEntry(String accessionId) { + Set siftsDBRefs = getAllMappingAccession(); return accessionId != null - && getAllMappingAccession().contains(accessionId); + && siftsDBRefs.contains(accessionId.toLowerCase()); } /** @@ -751,19 +771,19 @@ public class SiftsClient implements SiftsClientI * * @param resNumMap */ - void padWithGaps(TreeMap resNumMap, - ArrayList omitNonObserved) + void padWithGaps(Map resNumMap, + List omitNonObserved) { if (resNumMap == null || resNumMap.isEmpty()) { return; } Integer[] keys = resNumMap.keySet().toArray(new Integer[0]); - Arrays.sort(keys); + // Arrays.sort(keys); int firstIndex = keys[0]; int lastIndex = keys[keys.length - 1]; - System.out.println("Min value " + firstIndex); - System.out.println("Max value " + lastIndex); + // System.out.println("Min value " + firstIndex); + // System.out.println("Max value " + lastIndex); for (int x = firstIndex; x <= lastIndex; x++) { if (!resNumMap.containsKey(x) && !omitNonObserved.contains(x)) @@ -773,33 +793,11 @@ public class SiftsClient implements SiftsClientI } } - - @Override public Entity getEntityById(String id) throws SiftsException { - // Sometimes SIFTS mappings are wrongly swapped between different chains of - // a PDB entry. This results to wrong mappings being generated. The boolean - // flag 'isGetEntityIdDirectly, determines whether an entity to process is - // determined by a greedy heuristic search or by just matching the Chain Id - // directly against the entity Id tag. Setting the default value to 'false' - // utilise the heuristic search which always produces correct mappings but - // less optimised processing, where as changing the value to 'true' - // optimises performance but might result to incorrect mapping in some cases - // where SIFTS mappings are wrongly swapped between different chains. - boolean isGetEntityIdDirectly = false; - if (isGetEntityIdDirectly) - { - List entities = siftsEntry.getEntity(); - for (Entity entity : entities) - { - if (!entity.getEntityId().equalsIgnoreCase(id)) - { - continue; - } - return entity; - } - } + // Determines an entity to process by performing a heuristic matching of all + // Entities with the given chainId and choosing the best matching Entity Entity entity = getEntityByMostOptimalMatchedId(id); if (entity != null) { @@ -848,15 +846,20 @@ public class SiftsClient implements SiftsClientI } } } - sPojo[count].pid = 100 * (sPojo[count].chainIdFreq / sPojo[count].resCount); + sPojo[count].pid = (100 * sPojo[count].chainIdFreq) + / sPojo[count].resCount; ++count; } Arrays.sort(sPojo, Collections.reverseOrder()); - System.out.println("highest matched entity : " + sPojo[0].entityId); - System.out.println("highest matched pid : " + sPojo[0].pid); + // System.out.println("highest matched entity : " + sPojo[0].entityId); + // System.out.println("highest matched pid : " + sPojo[0].pid); if (sPojo[0].entityId != null) { + if (sPojo[0].pid < 1) + { + return null; + } for (Entity entity : entities) { if (!entity.getEntityId().equalsIgnoreCase(sPojo[0].entityId)) @@ -869,7 +872,7 @@ public class SiftsClient implements SiftsClientI return null; } - public class SiftsEntitySortPojo implements + private class SiftsEntitySortPojo implements Comparable { public String entityId; @@ -887,22 +890,83 @@ public class SiftsClient implements SiftsClientI } } - @Override - public String[] getEntryDBs() + private class SegmentHelperPojo { - System.out.println("\nListing DB entries..."); - List availDbs = new ArrayList(); - List dbs = siftsEntry.getListDB().getDb(); - for (Db db : dbs) + private SequenceI seq; + + private HashMap mapping; + + private TreeMap resNumMap; + + private List omitNonObserved; + + private int nonObservedShiftIndex; + + public SegmentHelperPojo(SequenceI seq, + HashMap mapping, + TreeMap resNumMap, + List omitNonObserved, int nonObservedShiftIndex) + { + setSeq(seq); + setMapping(mapping); + setResNumMap(resNumMap); + setOmitNonObserved(omitNonObserved); + setNonObservedShiftIndex(nonObservedShiftIndex); + } + + public SequenceI getSeq() + { + return seq; + } + + public void setSeq(SequenceI seq) + { + this.seq = seq; + } + + public HashMap getMapping() + { + return mapping; + } + + public void setMapping(HashMap mapping) + { + this.mapping = mapping; + } + + public TreeMap getResNumMap() + { + return resNumMap; + } + + public void setResNumMap(TreeMap resNumMap) + { + this.resNumMap = resNumMap; + } + + public List getOmitNonObserved() + { + return omitNonObserved; + } + + public void setOmitNonObserved(List omitNonObserved) { - availDbs.add(db.getDbSource()); - System.out.println(db.getDbSource() + " | " + db.getDbCoordSys()); + this.omitNonObserved = omitNonObserved; + } + + public int getNonObservedShiftIndex() + { + return nonObservedShiftIndex; + } + + public void setNonObservedShiftIndex(int nonObservedShiftIndex) + { + this.nonObservedShiftIndex = nonObservedShiftIndex; } - return availDbs.toArray(new String[0]); } @Override - public StringBuffer getMappingOutput(MappingOutputPojo mp) + public StringBuilder getMappingOutput(MappingOutputModel mp) throws SiftsException { String seqRes = mp.getSeqResidue(); @@ -924,9 +988,10 @@ public class SiftsClient implements SiftsClientI int nochunks = ((seqRes.length()) / len) + ((seqRes.length()) % len > 0 ? 1 : 0); // output mappings - StringBuffer output = new StringBuffer(); + StringBuilder output = new StringBuilder(512); output.append(NEWLINE); - output.append("Sequence ⟷ Structure mapping details").append(NEWLINE); + output.append("Sequence \u27f7 Structure mapping details").append( + NEWLINE); output.append("Method: SIFTS"); output.append(NEWLINE).append(NEWLINE); @@ -944,6 +1009,7 @@ public class SiftsClient implements SiftsClientI output.append(String.valueOf(pdbEnd)); output.append(NEWLINE).append(NEWLINE); + ScoreMatrix pam250 = ScoreModels.getInstance().getPam250(); int matchedSeqCount = 0; for (int j = 0; j < nochunks; j++) { @@ -962,25 +1028,29 @@ public class SiftsClient implements SiftsClientI output.append(NEWLINE); output.append(new Format("%" + (maxid) + "s").form(" ")).append(" "); - // Print out the matching chars + /* + * Print out the match symbols: + * | for exact match (ignoring case) + * . if PAM250 score is positive + * else a space + */ for (int i = 0; i < len; i++) { try { if ((i + (j * len)) < seqRes.length()) { - if (seqRes.charAt(i + (j * len)) == strRes - .charAt(i + (j * len)) - && !jalview.util.Comparison.isGap(seqRes.charAt(i - + (j * len)))) + char c1 = seqRes.charAt(i + (j * len)); + char c2 = strRes.charAt(i + (j * len)); + boolean sameChar = Comparison.isSameResidue(c1, c2, false); + if (sameChar && !Comparison.isGap(c1)) { matchedSeqCount++; output.append("|"); } else if (type.equals("pep")) { - if (ResidueProperties.getPAM250(seqRes.charAt(i + (j * len)), - strRes.charAt(i + (j * len))) > 0) + if (pam250.getPairwiseScore(c1, c2) > 0) { output.append("."); } @@ -1020,7 +1090,6 @@ public class SiftsClient implements SiftsClientI output.append("Length of alignment = " + seqRes.length()).append( NEWLINE); output.append(new Format("Percentage ID = %2.2f").form(pid)); - output.append(NEWLINE); return output; } @@ -1043,12 +1112,6 @@ public class SiftsClient implements SiftsClientI } @Override - public String getDbEvidence() - { - return siftsEntry.getDbEvidence(); - } - - @Override public String getDbSource() { return siftsEntry.getDbSource(); @@ -1060,4 +1123,10 @@ public class SiftsClient implements SiftsClientI return siftsEntry.getDbVersion(); } + public static void setMockSiftsFile(File file) + { + mockSiftsFile = file; + } + + }