import jalview.bin.Cache;
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefEntry;
import jalview.datamodel.DBRefSource;
+import jalview.datamodel.GeneLociI;
import jalview.datamodel.SequenceI;
+import jalview.ext.ensembl.EnsemblInfo;
+import jalview.ext.ensembl.EnsemblMap;
import jalview.io.gff.SequenceOntologyI;
import jalview.structure.StructureSelectionManager;
+import jalview.util.DBRefUtils;
+import jalview.util.MapList;
+import jalview.util.MappingUtils;
import jalview.util.MessageManager;
import jalview.ws.SequenceFetcher;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
-
-import javax.swing.JOptionPane;
+import java.util.Map;
+import java.util.Set;
/**
* Factory constructor and runnable for discovering and displaying
private SequenceI[] sel;
- private boolean _odna;
+ private final boolean _odna;
private String source;
- List<AlignmentViewPanel> xrefViews = new ArrayList<AlignmentViewPanel>();
+ List<AlignmentViewPanel> xrefViews = new ArrayList<>();
- public List<jalview.api.AlignmentViewPanel> getXrefViews()
+ List<AlignmentViewPanel> getXrefViews()
{
return xrefViews;
}
{
return;
}
+
+ /*
+ * try to look up chromosomal coordinates for nucleotide
+ * sequences (if not already retrieved)
+ */
+ findGeneLoci(xrefs.getSequences());
+
/*
* get display scheme (if any) to apply to features
*/
if (Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true))
{
- boolean copyAlignmentIsAligned = false;
- if (dna)
- {
- copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset,
- xrefsAlignment.getSequencesArray());
- if (copyAlignment.getHeight() == 0)
- {
- JvOptionPane.showMessageDialog(alignFrame,
- MessageManager.getString("label.cant_map_cds"),
- MessageManager.getString("label.operation_failed"),
- JvOptionPane.OK_OPTION);
- System.err.println("Failed to make CDS alignment");
- }
-
- /*
- * pending getting Embl transcripts to 'align',
- * we are only doing this for Ensembl
- */
- // TODO proper criteria for 'can align as cdna'
- if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)
- || AlignmentUtils.looksLikeEnsembl(alignment))
- {
- copyAlignment.alignAs(alignment);
- copyAlignmentIsAligned = true;
- }
- }
- else
+ copyAlignment = copyAlignmentForSplitFrame(alignment, dataset, dna,
+ xrefs, xrefsAlignment);
+ if (copyAlignment == null)
{
- copyAlignment = AlignmentUtils.makeCopyAlignment(sel,
- xrefs.getSequencesArray(), dataset);
- }
- copyAlignment
- .setGapCharacter(alignFrame.viewport.getGapCharacter());
-
- StructureSelectionManager ssm = StructureSelectionManager
- .getStructureSelectionManager(Desktop.instance);
-
- /*
- * register any new mappings for sequence mouseover etc
- * (will not duplicate any previously registered mappings)
- */
- ssm.registerMappings(dataset.getCodonFrames());
-
- if (copyAlignment.getHeight() <= 0)
- {
- System.err.println(
- "No Sequences generated for xRef type " + source);
- return;
- }
- /*
- * align protein to dna
- */
- if (dna && copyAlignmentIsAligned)
- {
- xrefsAlignment.alignAs(copyAlignment);
- }
- else
- {
- /*
- * align cdna to protein - currently only if
- * fetching and aligning Ensembl transcripts!
- */
- // TODO: generalise for other sources of locus/transcript/cds data
- if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source))
- {
- copyAlignment.alignAs(xrefsAlignment);
- }
+ return; // failed
}
}
+
/*
* build AlignFrame(s) according to available alignment data
*/
xrefViews.add(newFrame.alignPanel);
return; // via finally clause
}
+
AlignFrame copyThis = new AlignFrame(copyAlignment,
AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
copyThis.setTitle(alignFrame.getTitle());
}
/**
+ * Tries to add chromosomal coordinates to any nucleotide sequence which does
+ * not already have them. Coordinates are retrieved from Ensembl given an
+ * Ensembl identifier, either on the sequence itself or on a peptide sequence
+ * it has a reference to.
+ *
+ * <pre>
+ * Example (human):
+ * - fetch EMBLCDS cross-references for Uniprot entry P30419
+ * - the EMBL sequences do not have xrefs to Ensembl
+ * - the Uniprot entry has xrefs to
+ * ENSP00000258960, ENSP00000468424, ENST00000258960, ENST00000592782
+ * - either of the transcript ids can be used to retrieve gene loci e.g.
+ * http://rest.ensembl.org/map/cds/ENST00000592782/1..100000
+ * Example (invertebrate):
+ * - fetch EMBLCDS cross-references for Uniprot entry Q43517 (FER1_SOLLC)
+ * - the Uniprot entry has an xref to ENSEMBLPLANTS Solyc10g044520.1.1
+ * - can retrieve gene loci with
+ * http://rest.ensemblgenomes.org/map/cds/Solyc10g044520.1.1/1..100000
+ * </pre>
+ *
+ * @param sequences
+ */
+ public static void findGeneLoci(List<SequenceI> sequences)
+ {
+ Map<DBRefEntry, GeneLociI> retrievedLoci = new HashMap<>();
+ for (SequenceI seq : sequences)
+ {
+ findGeneLoci(seq, retrievedLoci);
+ }
+ }
+
+ /**
+ * Tres to find chromosomal coordinates for the sequence, by searching its
+ * direct and indirect cross-references for Ensembl. If the loci have already
+ * been retrieved, just reads them out of the map of retrievedLoci; this is
+ * the case of an alternative transcript for the same protein. Otherwise calls
+ * a REST service to retrieve the loci, and if successful, adds them to the
+ * sequence and to the retrievedLoci.
+ *
+ * @param seq
+ * @param retrievedLoci
+ */
+ static void findGeneLoci(SequenceI seq,
+ Map<DBRefEntry, GeneLociI> retrievedLoci)
+ {
+ /*
+ * don't replace any existing chromosomal coordinates
+ */
+ if (seq == null || seq.isProtein() || seq.getGeneLoci() != null
+ || seq.getDBRefs() == null)
+ {
+ return;
+ }
+
+ Set<String> ensemblDivisions = new EnsemblInfo().getDivisions();
+
+ /*
+ * first look for direct dbrefs from sequence to Ensembl
+ */
+ String[] divisionsArray = ensemblDivisions
+ .toArray(new String[ensemblDivisions.size()]);
+ DBRefEntry[] seqRefs = seq.getDBRefs();
+ DBRefEntry[] directEnsemblRefs = DBRefUtils.selectRefs(seqRefs,
+ divisionsArray);
+ if (directEnsemblRefs != null)
+ {
+ for (DBRefEntry ensemblRef : directEnsemblRefs)
+ {
+ if (fetchGeneLoci(seq, ensemblRef, retrievedLoci))
+ {
+ return;
+ }
+ }
+ }
+
+ /*
+ * else look for indirect dbrefs from sequence to Ensembl
+ */
+ for (DBRefEntry dbref : seq.getDBRefs())
+ {
+ if (dbref.getMap() != null && dbref.getMap().getTo() != null)
+ {
+ DBRefEntry[] dbrefs = dbref.getMap().getTo().getDBRefs();
+ DBRefEntry[] indirectEnsemblRefs = DBRefUtils.selectRefs(dbrefs,
+ divisionsArray);
+ if (indirectEnsemblRefs != null)
+ {
+ for (DBRefEntry ensemblRef : indirectEnsemblRefs)
+ {
+ if (fetchGeneLoci(seq, ensemblRef, retrievedLoci))
+ {
+ return;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Retrieves chromosomal coordinates for the Ensembl (or EnsemblGenomes)
+ * identifier in dbref. If successful, and the sequence length matches gene
+ * loci length, then add it to the sequence, and to the retrievedLoci map.
+ * Answers true if successful, else false.
+ *
+ * @param seq
+ * @param dbref
+ * @param retrievedLoci
+ * @return
+ */
+ static boolean fetchGeneLoci(SequenceI seq, DBRefEntry dbref,
+ Map<DBRefEntry, GeneLociI> retrievedLoci)
+ {
+ String accession = dbref.getAccessionId();
+ String division = dbref.getSource();
+
+ /*
+ * hack: ignore cross-references to Ensembl protein ids
+ * (can't fetch chromosomal mapping for these)
+ * todo: is there an equivalent in EnsemblGenomes?
+ */
+ if (accession.startsWith("ENSP"))
+ {
+ return false;
+ }
+ EnsemblMap mapper = new EnsemblMap();
+
+ /*
+ * try CDS mapping first
+ */
+ GeneLociI geneLoci = mapper.getCdsMapping(division, accession, 1,
+ seq.getLength());
+ if (geneLoci != null)
+ {
+ MapList map = geneLoci.getMap();
+ int mappedFromLength = MappingUtils.getLength(map.getFromRanges());
+ if (mappedFromLength == seq.getLength())
+ {
+ seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(),
+ geneLoci.getChromosomeId(), geneLoci.getMap());
+ retrievedLoci.put(dbref, geneLoci);
+ return true;
+ }
+ }
+
+ /*
+ * else try CDNA mapping
+ */
+ geneLoci = mapper.getCdnaMapping(division, accession, 1,
+ seq.getLength());
+ if (geneLoci != null)
+ {
+ MapList map = geneLoci.getMap();
+ int mappedFromLength = MappingUtils.getLength(map.getFromRanges());
+ if (mappedFromLength == seq.getLength())
+ {
+ seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(),
+ geneLoci.getChromosomeId(), geneLoci.getMap());
+ retrievedLoci.put(dbref, geneLoci);
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * @param alignment
+ * @param dataset
+ * @param dna
+ * @param xrefs
+ * @param xrefsAlignment
+ * @return
+ */
+ protected AlignmentI copyAlignmentForSplitFrame(AlignmentI alignment,
+ AlignmentI dataset, boolean dna, AlignmentI xrefs,
+ AlignmentI xrefsAlignment)
+ {
+ AlignmentI copyAlignment;
+ boolean copyAlignmentIsAligned = false;
+ if (dna)
+ {
+ copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset,
+ xrefsAlignment.getSequencesArray());
+ if (copyAlignment.getHeight() == 0)
+ {
+ JvOptionPane.showMessageDialog(alignFrame,
+ MessageManager.getString("label.cant_map_cds"),
+ MessageManager.getString("label.operation_failed"),
+ JvOptionPane.OK_OPTION);
+ System.err.println("Failed to make CDS alignment");
+ return null;
+ }
+
+ /*
+ * pending getting Embl transcripts to 'align',
+ * we are only doing this for Ensembl
+ */
+ // TODO proper criteria for 'can align as cdna'
+ if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)
+ || AlignmentUtils.looksLikeEnsembl(alignment))
+ {
+ copyAlignment.alignAs(alignment);
+ copyAlignmentIsAligned = true;
+ }
+ }
+ else
+ {
+ copyAlignment = AlignmentUtils.makeCopyAlignment(sel,
+ xrefs.getSequencesArray(), dataset);
+ }
+ copyAlignment
+ .setGapCharacter(alignFrame.viewport.getGapCharacter());
+
+ StructureSelectionManager ssm = StructureSelectionManager
+ .getStructureSelectionManager(Desktop.instance);
+
+ /*
+ * register any new mappings for sequence mouseover etc
+ * (will not duplicate any previously registered mappings)
+ */
+ ssm.registerMappings(dataset.getCodonFrames());
+
+ if (copyAlignment.getHeight() <= 0)
+ {
+ System.err.println(
+ "No Sequences generated for xRef type " + source);
+ return null;
+ }
+
+ /*
+ * align protein to dna
+ */
+ if (dna && copyAlignmentIsAligned)
+ {
+ xrefsAlignment.alignAs(copyAlignment);
+ }
+ else
+ {
+ /*
+ * align cdna to protein - currently only if
+ * fetching and aligning Ensembl transcripts!
+ */
+ // TODO: generalise for other sources of locus/transcript/cds data
+ if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source))
+ {
+ copyAlignment.alignAs(xrefsAlignment);
+ }
+ }
+
+ return copyAlignment;
+ }
+
+ /**
* Makes an alignment containing the given sequences, and adds them to the
* given dataset, which is also set as the dataset for the new alignment
*
return al;
}
- public CrossRefAction(AlignFrame alignFrame, SequenceI[] sel,
- boolean _odna, String source)
+ /**
+ * Constructor
+ *
+ * @param af
+ * @param seqs
+ * @param fromDna
+ * @param dbSource
+ */
+ CrossRefAction(AlignFrame af, SequenceI[] seqs, boolean fromDna,
+ String dbSource)
{
- this.alignFrame = alignFrame;
- this.sel = sel;
- this._odna = _odna;
- this.source = source;
+ this.alignFrame = af;
+ this.sel = seqs;
+ this._odna = fromDna;
+ this.source = dbSource;
}
- public static CrossRefAction showProductsFor(final SequenceI[] sel,
- final boolean _odna, final String source,
+ public static CrossRefAction getHandlerFor(final SequenceI[] sel,
+ final boolean fromDna, final String source,
final AlignFrame alignFrame)
{
- return new CrossRefAction(alignFrame, sel, _odna, source);
+ return new CrossRefAction(alignFrame, sel, fromDna, source);
}
}
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
+import java.util.Map;
+
+import junit.extensions.PA;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
// . codonframes
//
//
- HashMap<String, String> dbtoviewBit = new HashMap<>();
+ Map<String, String> dbtoviewBit = new HashMap<>();
List<String> keyseq = new ArrayList<>();
- HashMap<String, File> savedProjects = new HashMap<>();
+ Map<String, File> savedProjects = new HashMap<>();
for (String[] did : new String[][] { { "UNIPROT", "P00338" } })
{
if (pass2 == 0)
{ // retrieve and show cross-refs in this thread
- cra = new CrossRefAction(af, seqs, dna, db);
+ cra = CrossRefAction.getHandlerFor(seqs, dna, db, af);
cra.run();
- if (cra.getXrefViews().size() == 0)
+ cra_views = (List<AlignmentViewPanel>) PA.getValue(cra,
+ "xrefViews");
+ if (cra_views.size() == 0)
{
failedXrefMenuItems.add("No crossrefs retrieved for "
+ first + " -> " + db);
continue;
}
- cra_views = cra.getXrefViews();
assertNucleotide(cra_views.get(0),
"Nucleotide panel included proteins for " + first
+ " -> " + db);
if (pass3 == 0)
{
-
SequenceI[] xrseqs = avp.getAlignment()
.getSequencesArray();
AlignFrame nextaf = Desktop.getAlignFrameFor(avp
.getAlignViewport());
- cra = new CrossRefAction(nextaf, xrseqs, avp
- .getAlignViewport().isNucleotide(), xrefdb);
+ cra = CrossRefAction.getHandlerFor(xrseqs, avp
+ .getAlignViewport().isNucleotide(), xrefdb,
+ nextaf);
cra.run();
- if (cra.getXrefViews().size() == 0)
+ cra_views2 = (List<AlignmentViewPanel>) PA.getValue(
+ cra, "xrefViews");
+ if (cra_views2.size() == 0)
{
failedXrefMenuItems
.add("No crossrefs retrieved for '"
+ " via '" + nextaf.getTitle() + "'");
continue;
}
- cra_views2 = cra.getXrefViews();
assertNucleotide(cra_views2.get(0),
"Nucleotide panel included proteins for '"
+ nextxref + "' to " + xrefdb
* viewpanel needs to be called with a distinct xrefpath to ensure
* each one's strings are compared)
*/
- private void stringify(HashMap<String, String> dbtoviewBit,
- HashMap<String, File> savedProjects, String xrefpath,
+ private void stringify(Map<String, String> dbtoviewBit,
+ Map<String, File> savedProjects, String xrefpath,
AlignmentViewPanel avp)
{
if (savedProjects != null)