/*
* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
* Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see .
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.io;
import jalview.analysis.AlignSeq;
import jalview.api.DBRefEntryI;
import jalview.api.SiftsClientI;
import jalview.datamodel.DBRefEntry;
import jalview.datamodel.SequenceI;
import jalview.schemes.ResidueProperties;
import jalview.structure.StructureMapping;
import jalview.util.Format;
import jalview.xml.binding.sifts.Entry;
import jalview.xml.binding.sifts.Entry.Entity;
import jalview.xml.binding.sifts.Entry.Entity.Segment;
import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion;
import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue;
import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb;
import jalview.xml.binding.sifts.Entry.EntryDetail;
import jalview.xml.binding.sifts.Entry.ListDB.Db;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.zip.GZIPInputStream;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Unmarshaller;
import javax.xml.stream.FactoryConfigurationError;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
public class SiftsClient implements SiftsClientI
{
private Entry siftsEntry;
private String pdbId;
private static final int BUFFER_SIZE = 4096;
private static final String SIFTS_FTP_BASE_URL = "ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/";
public static final String DEFAULT_SIFTS_DOWNLOAD_DIR = System
.getProperty("user.home")
+ File.separatorChar
+ ".sifts_downloads" + File.separatorChar;
public static final String SIFTS_DOWNLOAD_DIR = jalview.bin.Cache
.getDefault("sifts_download_dir", DEFAULT_SIFTS_DOWNLOAD_DIR);
private final static String NEWLINE = System.lineSeparator();
/**
* Fetch SIFTs file for the given PDB Id and construct an instance of
* SiftsClient
*
* @param pdbId
*/
public SiftsClient(String pdbId)
{
this.pdbId = pdbId;
try
{
File siftsFile = getSiftsFile(pdbId);
siftsEntry = parseSIFTs(siftsFile);
} catch (Exception e)
{
e.printStackTrace();
}
}
/**
* Construct an instance of SiftsClient using the supplied SIFTs file -
* the SIFTs file should correspond to the given PDB Id
*
* @param pdbId
* @param siftsFile
*/
public SiftsClient(String pdbId, File siftsFile)
{
this.pdbId = pdbId;
try
{
siftsEntry = parseSIFTs(siftsFile);
} catch (Exception e)
{
e.printStackTrace();
}
}
/**
* Parse the given SIFTs File and return a JAXB POJO of parsed data
*
* @param siftFile
* - the GZipped SIFTs XML file to parse
* @return
* @throws Exception
* if a problem occurs while parsing the SIFTs XML
*/
private Entry parseSIFTs(File siftFile) throws Exception
{
try
{
System.out.println("File : " + siftFile.getAbsolutePath());
JAXBContext jc = JAXBContext.newInstance("jalview.xml.binding.sifts");
InputStream in = new FileInputStream(siftFile);
GZIPInputStream gzis = new GZIPInputStream(in);
XMLStreamReader streamReader = XMLInputFactory.newInstance()
.createXMLStreamReader(gzis);
Unmarshaller um = jc.createUnmarshaller();
return (Entry) um.unmarshal(streamReader);
} catch (JAXBException e)
{
e.printStackTrace();
} catch (FileNotFoundException e)
{
e.printStackTrace();
} catch (XMLStreamException e)
{
e.printStackTrace();
} catch (FactoryConfigurationError e)
{
e.printStackTrace();
} catch (IOException e)
{
e.printStackTrace();
}
throw new Exception("Error parsing siftFile");
}
/**
* Get a SIFTs XML file for a given PDB Id
*
* @param pdbId
* @return SIFTs XML file
*/
public static File getSiftsFile(String pdbId)
{
File siftsFile = new File(SIFTS_DOWNLOAD_DIR + pdbId.toLowerCase()
+ ".xml.gz");
if (siftsFile.exists())
{
// TODO it may be worth performing a timestamp age check to determine if a
// new SIFTs file should be re-downloaded as SIFTs entries are usually
// updated weekly
System.out.println(">>> SIFTS File already downloaded for " + pdbId);
return siftsFile;
}
siftsFile = downloadSiftsFile(pdbId.toLowerCase());
return siftsFile;
}
/**
* Download a SIFTs XML file for a given PDB Id
*
* @param pdbId
* @return downloaded SIFTs XML file
*/
public static File downloadSiftsFile(String pdbId)
{
String siftFile = pdbId + ".xml.gz";
String siftsFileFTPURL = SIFTS_FTP_BASE_URL + siftFile;
String downloadedSiftsFile = SIFTS_DOWNLOAD_DIR + siftFile;
File siftsDownloadDir = new File(SIFTS_DOWNLOAD_DIR);
if (!siftsDownloadDir.exists())
{
siftsDownloadDir.mkdirs();
}
try
{
System.out.println(">> Download ftp url : " + siftsFileFTPURL);
URL url = new URL(siftsFileFTPURL);
URLConnection conn = url.openConnection();
InputStream inputStream = conn.getInputStream();
FileOutputStream outputStream = new FileOutputStream(
downloadedSiftsFile);
byte[] buffer = new byte[BUFFER_SIZE];
int bytesRead = -1;
while ((bytesRead = inputStream.read(buffer)) != -1)
{
outputStream.write(buffer, 0, bytesRead);
}
outputStream.close();
inputStream.close();
System.out.println(">>> File downloaded : " + downloadedSiftsFile);
} catch (IOException ex)
{
ex.printStackTrace();
}
return new File(downloadedSiftsFile);
}
/**
* Delete the SIFTs file for the given PDB Id in the local SIFTs download
* directory
*
* @param pdbId
* @return true if the file was deleted or doesn't exist
*/
public static boolean deleteSiftsFileByPDBId(String pdbId)
{
File siftsFile = new File(SIFTS_DOWNLOAD_DIR + pdbId.toLowerCase()
+ ".xml.gz");
if (siftsFile.exists())
{
return siftsFile.delete();
}
return true;
}
/**
* Get a valid SIFTs DBRef for the given sequence current SIFTs entry
*
* @param seq
* - the target sequence for the operation
* @return a valid DBRefEntry that is SIFTs compatible
* @throws Exception
* if no valid source DBRefEntry was found for the given sequences
*/
public DBRefEntryI getValidSourceDBRef(SequenceI seq) throws Exception
{
DBRefEntryI sourceDBRef = null;
sourceDBRef = seq.getSourceDBRef();
if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef))
{
return sourceDBRef;
}
else
{
DBRefEntry[] dbRefs = seq.getDBRefs();
if (dbRefs == null || dbRefs.length < 1)
{
final SequenceI[] seqs = new SequenceI[] { seq };
new jalview.ws.DBRefFetcher(seqs, null, null, null, false)
.fetchDBRefs(true);
dbRefs = seq.getDBRefs();
}
if (dbRefs == null || dbRefs.length < 1)
{
throw new Exception("Could not get source DB Ref");
}
for (DBRefEntryI dbRef : dbRefs)
{
if (dbRef == null || dbRef.getAccessionId() == null
|| dbRef.getSource() == null)
{
continue;
}
if (isFoundInSiftsEntry(dbRef.getAccessionId())
&& (dbRef.getSource().equalsIgnoreCase("uniprot") || dbRef
.getSource().equalsIgnoreCase("pdb")))
{
return dbRef;
}
}
}
if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef))
{
return sourceDBRef;
}
throw new Exception("Could not get source DB Ref");
}
/**
* Check that the DBRef Entry is properly populated and is available in the
* instantiated SIFTs Entry
*
* @param entry
* - DBRefEntry to validate
* @return true validation is successful otherwise false is returned.
*/
private boolean isValidDBRefEntry(DBRefEntryI entry)
{
return entry != null && entry.getAccessionId() != null
&& isFoundInSiftsEntry(entry.getAccessionId());
// & entry.getStartRes() > 0;
}
@Override
public HashSet getAllMappingAccession()
{
HashSet accessions = new HashSet();
List entities = siftsEntry.getEntity();
for (Entity entity : entities)
{
List segments = entity.getSegment();
for (Segment segment : segments)
{
List mapRegions = segment.getListMapRegion()
.getMapRegion();
for (MapRegion mapRegion : mapRegions)
{
accessions.add(mapRegion.getDb().getDbAccessionId());
}
}
}
return accessions;
}
@Override
public int[][] getGreedyMapping(String entityId, SequenceI seq,
java.io.PrintStream os)
throws Exception
{
System.out.println("Generating mappings for : " + entityId);
Entity entity = null;
entity = getEntityById(entityId);
String seqStr = AlignSeq.extractGaps(jalview.util.Comparison.GapChars,
seq.getSequenceAsString());
// StringBuilder mappedStrucSeq = new StringBuilder(seqStr.length());
String[] mappedStrucSeq = new String[seqStr.length()];
int mapping[][] = new int[seqStr.length()][2];
DBRefEntryI sourceDBRef = seq.getSourceDBRef();
if (sourceDBRef == null)
{
sourceDBRef = getValidSourceDBRef(seq);
// TODO if sourceDBRef is null at this point then throw an Exception
// TODO update sequence start/end with sourceDBRef start/end
// seq.setStart(sourceDBRef.getStartRes());
// seq.setEnd(sourceDBRef.getEndRes());
}
String crossRefAccessionId = sourceDBRef.getAccessionId();
int start = seq.getStart() - 1;
for (int residue[] : mapping)
{
residue[1] = start++;
}
HashMap resNumMap = new HashMap();
List segments = entity.getSegment();
for (Segment segment : segments)
{
System.out.println("Mappging segments : " + segment.getSegId() + "\\"
+ segment.getStart() + "-" + segment.getEnd());
List residues = segment.getListResidue().getResidue();
for (Residue residue : residues)
{
int refDbResNum = -1;
List cRefDbs = residue.getCrossRefDb();
for (CrossRefDb cRefDb : cRefDbs)
{
if (cRefDb.getDbAccessionId().equalsIgnoreCase(
crossRefAccessionId))
{
refDbResNum = Integer.valueOf(cRefDb.getDbResNum());
}
}
if (refDbResNum == -1)
{
continue;
}
for (int[] x : mapping)
{
if (x[1] == refDbResNum)
{
int resNum = Integer.valueOf(residue.getDbResNum());
x[0] = resNum;
String value = "x";
resNumMap.put(resNum, value);
}
}
}
}
//Generate visual mapping output
// StringBuilder strucSeq = new StringBuilder();
// for(int[] x : mapping){
// if(mapping[0] == 0){
// strucSeq.append(b)
// }
// }
mappedStrucSeq[1] = "x";
try
{
System.out.println(">>>> seq: " + seqStr + "\nlength "
+ seqStr.length());
System.out.println(">>>> pdb: " + mappedStrucSeq.toString()
+ "\nlength " + mappedStrucSeq.toString().length());
String printedMapping = getMappingOutput(mappedStrucSeq.toString(),
seqStr, "seqAccession", "strucAccession", "pep", 3)
.toString();
if (os != null)
{
os.print(printedMapping);
}
System.out.println();
} catch (Exception ex)
{
ex.printStackTrace();
}
return mapping;
}
@Override
public boolean isFoundInSiftsEntry(String accessionId)
{
return accessionId != null
&& getAllMappingAccession().contains(accessionId);
}
@Override
public StructureMapping getSiftsStructureMapping(SequenceI seq,
String pdbFile, String chain)
{
System.out.println("Getting mapping for: " + pdbId + "|" + chain
+ " : seq- " + seq.getName());
final StringBuilder mappingDetails = new StringBuilder(128);
PrintStream ps = new PrintStream(System.out)
{
@Override
public void print(String x)
{
mappingDetails.append(x);
}
@Override
public void println()
{
mappingDetails.append(NEWLINE);
}
};
int[][] mapping = null;
try
{
mapping = getGreedyMapping(chain, seq, ps);
} catch (Exception e)
{
e.printStackTrace();
}
// String mappingOutput = mappingDetails.toString();
String mappingOutput = null;
return new StructureMapping(seq, pdbFile, pdbId, chain, mapping,
mappingOutput);
}
@Override
public Entity getEntityById(String id) throws Exception
{
List entities = siftsEntry.getEntity();
for (Entity entity : entities)
{
if (!entity.getEntityId().equalsIgnoreCase(id))
{
continue;
}
return entity;
}
throw new Exception("Entity " + id + " not found");
}
@Override
public String[] getEntryDBs()
{
System.out.println("\nListing DB entries...");
List dbs = siftsEntry.getListDB().getDb();
for (Db db : dbs)
{
System.out.println(db.getDbSource() + " | " + db.getDbCoordSys());
}
return null;
}
@Override
public void getEntryDetails()
{
List eds = siftsEntry.getEntryDetail();
for (EntryDetail ed : eds)
{
System.out.println("Entry Details: " + ed.getContent() + " "
+ ed.getDbSource() + " " + ed.getProperty() + " "
+ ed.toString());
}
}
@Override
public StringBuffer getMappingOutput(String astr1, String astr2, String s1id,
String s2id, String type, int nochunks)
{
int maxid = s1id.length();
int len = 72 - maxid - 1;
StringBuffer output = new StringBuffer();
// output mappings
float pid = 0;
for (int j = 0; j < nochunks; j++)
{
// Print the first aligned sequence
output.append(new Format("%" + (maxid) + "s").form(s1id)).append(" ");
for (int i = 0; i < len; i++)
{
if ((i + (j * len)) < astr1.length())
{
output.append(astr1.charAt(i + (j * len)));
}
}
output.append(NEWLINE);
output.append(new Format("%" + (maxid) + "s").form(" ")).append(" ");
// Print out the matching chars
for (int i = 0; i < len; i++)
{
if ((i + (j * len)) < astr1.length())
{
if (astr1.charAt(i + (j * len)) == astr2.charAt(i + (j * len))
&& !jalview.util.Comparison.isGap(astr1.charAt(i
+ (j * len))))
{
pid++;
output.append("|");
}
else if (type.equals("pep"))
{
if (ResidueProperties.getPAM250(astr1.charAt(i + (j * len)),
astr2.charAt(i + (j * len))) > 0)
{
output.append(".");
}
else
{
output.append(" ");
}
}
else
{
output.append(" ");
}
}
}
// Now print the second aligned sequence
output = output.append(NEWLINE);
output = output.append(new Format("%" + (maxid) + "s").form(s2id))
.append(" ");
for (int i = 0; i < len; i++)
{
if ((i + (j * len)) < astr2.length())
{
output.append(astr2.charAt(i + (j * len)));
}
}
output.append(NEWLINE).append(NEWLINE);
}
pid = pid / (astr1.length()) * 100;
System.out.println(output);
System.out.println(pid);
// TODO return output & pid
return output;
}
@Override
public int getEntityCount()
{
return siftsEntry.getEntity().size();
}
@Override
public String getDbAccessionId()
{
return siftsEntry.getDbAccessionId();
}
@Override
public String getDbCoordSys()
{
return siftsEntry.getDbCoordSys();
}
@Override
public String getDbEvidence()
{
return siftsEntry.getDbEvidence();
}
@Override
public String getDbSource()
{
return siftsEntry.getDbSource();
}
@Override
public String getDbVersion()
{
return siftsEntry.getDbVersion();
}
}