label.show_sequence_features = Show Sequence Features
label.nucleotide = Nucleotide
label.protein = Protein
+label.nucleotides = Nucleotides
+label.proteins = Proteins
label.to_new_alignment = To New Alignment
label.to_this_alignment = Add To This Alignment
label.apply_colour_to_all_groups = Apply Colour To All Groups
/**
*
- * @param dna
- * @param seqs
- * @return
- */
- public static Alignment findXrefSequences(SequenceI[] seqs, boolean dna,
- String source)
- {
- return findXrefSequences(seqs, dna, source, null);
- }
-
- /**
- *
* @param seqs
* sequences whose xrefs are being retrieved
* @param dna
* true if sequences are nucleotide
* @param source
- * @param dataset
- * alignment to search for product sequences.
+ * @param al
+ * alignment to search for cross-referenced sequences (and possibly
+ * add to)
+ * @param addedPeers
+ * a list of sequences to add to if 'peers' to the original sequences
+ * are found e.g. alternative protein products for a protein's gene
* @return products (as dataset sequences)
*/
public static Alignment findXrefSequences(SequenceI[] seqs, boolean dna,
- String source, AlignmentI dataset)
+ String source, AlignmentI al, List<SequenceI> addedPeers)
{
+ AlignmentI dataset = al.getDataset() == null ? al : al.getDataset();
List<SequenceI> rseqs = new ArrayList<SequenceI>();
AlignedCodonFrame cf = new AlignedCodonFrame();
for (SequenceI seq : seqs)
int sf = map.getMap().getToLowest();
int st = map.getMap().getToHighest();
SequenceI mappedrg = ms.getSubSequence(sf, st);
- SequenceI loc = dss.getSubSequence(sf, st);
+ // SequenceI loc = dss.getSubSequence(sf, st);
if (mappedrg.getLength() > 0
- && mappedrg.getSequenceAsString().equals(
- loc.getSequenceAsString()))
+ && ms.getSequenceAsString().equals(
+ dss.getSequenceAsString()))
+ // && mappedrg.getSequenceAsString().equals(
+ // loc.getSequenceAsString()))
{
String msg = "Mapping updated from "
+ ms.getName()
for (SequenceFeature feat : sfs)
{
/*
- * we override the equality test here (but not
- * elsewhere) to ignore Parent attribute
+ * we override SequenceFeature.equals here (but
+ * not elsewhere) to ignore Parent attribute
* TODO not quite working yet!
*/
if (!copiedFeatures
cf.addMap(retrieved[rs].getDatasetSequence(),
dss, map.getMap());
}
+ else
+ {
+ addedPeers.add(map.getTo());
+ cf.addMap(retrieved[rs].getDatasetSequence(),
+ map.getTo(), map.getMap());
+ }
} catch (Exception e)
{
System.err
Alignment ral = null;
if (rseqs.size() > 0)
{
- SequenceI[] rsqs = new SequenceI[rseqs.size()];
- rseqs.toArray(rsqs);
- ral = new Alignment(rsqs);
+ ral = new Alignment(rseqs.toArray(new SequenceI[rseqs.size()]));
if (cf != null && !cf.isEmpty())
{
ral.addCodonFrame(cf);
{
for (SequenceI sourceAligned : al.getSequences())
{
- if (ssm.mapping.to == sourceAligned.getDatasetSequence())
+ if (ssm.mapping.to == sourceAligned.getDatasetSequence()
+ || ssm.mapping.to == sourceAligned)
{
return sourceAligned;
}
public class EnsemblCdna extends EnsemblSeqProxy
{
- // TODO modify to accept other species e.g. ENSMUSPnnn
+ /*
+ * accepts ENST or ENSTG with 11 digits
+ * or ENSMUST or similar for other species
+ * or CCDSnnnnn.nn with at least 3 digits
+ */
private static final Regex ACCESSION_REGEX = new Regex(
- "(ENST|ENSG|CCDS)[0-9.]{3,}$");
+ "(ENS([A-Z]{3}|)[TG][0-9]{11}$)" + "|" + "(CCDS[0-9.]{3,}$)");
/*
* fetch exon features on genomic sequence (to identify the cdna regions)
import jalview.io.gff.SequenceOntologyFactory;
import jalview.io.gff.SequenceOntologyI;
import jalview.util.MapList;
+import jalview.util.StringUtils;
-import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
{
private static final String GENE_PREFIX = "gene:";
- // TODO modify to accept other species e.g. ENSMUSGnnn
- private static final Regex ACCESSION_REGEX = new Regex(
- "(ENSG|ENST)[0-9]{11}$");
+ /*
+ * accepts anything as we will attempt lookup of gene or
+ * transcript id or gene name
+ */
+ private static final Regex ACCESSION_REGEX = new Regex(".*");
private static final EnsemblFeatureType[] FEATURES_TO_FETCH = {
EnsemblFeatureType.gene, EnsemblFeatureType.transcript,
}
/**
- * Builds an alignment of all transcripts for the requested gene:
+ * Returns an alignment containing the gene(s) for the given gene or
+ * transcript identifier, or external identifier (e.g. Uniprot id). If given a
+ * gene name or external identifier, returns any related gene sequences found
+ * for model organisms. If only a single gene is queried for, then its
+ * transcripts are also retrieved and added to the alignment. <br>
+ * Method:
* <ul>
+ * <li>resolves a transcript identifier by looking up its parent gene id</li>
+ * <li>resolves an external identifier by looking up xref-ed gene ids</li>
* <li>fetches the gene sequence</li>
* <li>fetches features on the sequence</li>
* <li>identifies "transcript" features whose Parent is the requested gene</li>
* <li>aligns each transcript against the gene sequence based on the position
* mappings</li>
* </ul>
+ *
+ * @param query
+ * one or more identifiers separated by a space
+ * @return an alignment containing one or more genes, and possibly
+ * transcripts, or null
*/
@Override
public AlignmentI getSequenceRecords(String query) throws Exception
{
- List<String> transcriptsWanted = null;
+ // todo: tidy up handling of one or multiple accession ids
+ String[] queries = query.split(getAccessionSeparator());
+ /*
+ * if given a transcript id, look up its gene parent
+ */
if (isTranscriptIdentifier(query))
{
- transcriptsWanted = Arrays.asList(query
- .split(getAccessionSeparator()));
- query = getGeneForTranscript(query);
+ // we are assuming all transcripts have the same gene parent here
+ query = new EnsemblLookup().getParent(queries[0]);
if (query == null)
{
return null;
}
}
+ /*
+ * if given a gene or other external name, lookup and fetch
+ * the corresponding gene for all model organisms
+ */
+ if (!isGeneIdentifier(query))
+ {
+ List<String> geneIds = new EnsemblSymbol().getIds(query);
+ if (geneIds.isEmpty())
+ {
+ return null;
+ }
+ String theIds = StringUtils.listToDelimitedString(geneIds,
+ getAccessionSeparator());
+ return getSequenceRecords(theIds);
+ }
+
AlignmentI al = super.getSequenceRecords(query);
- if (al.getHeight() > 0)
+
+ /*
+ * if we retrieved a single gene, get its transcripts as well
+ */
+ if (al.getHeight() == 1)
{
- getTranscripts(al, query, transcriptsWanted);
+ getTranscripts(al, query);
}
return al;
}
/**
- * Gets the parent gene identifier for a given transcript identifier, by
- * retrieving 'transcript' features overlapping the transcript, and finding
- * the Parent property of the feature whose id is the given identifier.
+ * Attempts to get Ensembl stable identifiers for model organisms for a gene
+ * name by calling the xrefs symbol REST service to resolve the gene name.
*
* @param query
* @return
*/
- protected String getGeneForTranscript(String transcriptId)
+ protected String getGeneIdentifiersForName(String query)
{
- String geneId = null;
-
- /*
- * reduce multiple transcripts (e.g. from Uniprot x-ref) to the first
- * one only as representative (they should all have the same gene)
- */
- transcriptId = transcriptId.split(getAccessionSeparator())[0];
-
- try
+ List<String> ids = new EnsemblSymbol().getIds(query);
+ if (ids != null)
{
- EnsemblFeatureType[] geneFeature = new EnsemblFeatureType[] { EnsemblFeatureType.transcript };
- AlignmentI al = new EnsemblFeatures().getSequenceRecords(
- transcriptId, geneFeature);
- if (al != null && al.getHeight() > 0)
+ for (String id : ids)
{
- SequenceFeature[] sfs = al.getSequenceAt(0).getSequenceFeatures();
- if (sfs != null)
+ if (isGeneIdentifier(id))
{
- for (SequenceFeature sf : sfs)
- {
- if (transcriptId.equals(getTranscriptId(sf)))
- {
- String parent = (String) sf.getValue(PARENT);
- if (parent != null && parent.startsWith(GENE_PREFIX))
- {
- geneId = parent.substring(5);
- }
- break;
- }
- }
+ return id;
}
}
- return geneId;
- } catch (IOException e)
- {
- System.err.println("Error retrieving gene id for " + transcriptId
- + ": " + e.getMessage());
- return null;
}
+ return null;
}
/**
*
* @param al
* @param accId
- * @param transcriptsWanted
- * optional list of transcript ids to filter by
* @throws Exception
*/
- protected void getTranscripts(AlignmentI al, String accId,
- List<String> transcriptsWanted)
+ protected void getTranscripts(AlignmentI al, String accId)
throws Exception
{
SequenceI gene = al.getSequenceAt(0);
List<SequenceFeature> transcriptFeatures = getTranscriptFeatures(accId,
- gene, transcriptsWanted);
+ gene);
for (SequenceFeature transcriptFeature : transcriptFeatures)
{
transcript.getDatasetSequence(), mapping, parentId);
/*
+ * fetch and save cross-references
+ */
+ super.getCrossReferences(transcript);
+
+ /*
* and finally fetch the protein product and save as a cross-reference
*/
new EnsemblCdna().addProteinProduct(transcript);
*
* @param accId
* @param geneSequence
- * @param transcriptsWanted
- * optional list of ids to filter on
* @return
*/
protected List<SequenceFeature> getTranscriptFeatures(String accId,
- SequenceI geneSequence, List<String> transcriptsWanted)
+ SequenceI geneSequence)
{
List<SequenceFeature> transcriptFeatures = new ArrayList<SequenceFeature>();
{
if (isTranscript(sf.getType()))
{
- if (transcriptsWanted != null)
- {
- String transcriptId = (String) sf.getValue("transcript_id");
- if (!transcriptsWanted.contains(transcriptId))
- {
- // continue;
- }
- }
String parent = (String) sf.getValue(PARENT);
if (parentIdentifier.equals(parent))
{
--- /dev/null
+package jalview.ext.ensembl;
+
+import jalview.datamodel.AlignmentI;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Arrays;
+import java.util.List;
+
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+
+public class EnsemblLookup extends EnsemblRestClient
+{
+
+ @Override
+ public String getDbName()
+ {
+ return "ENSEMBL";
+ }
+
+ @Override
+ public AlignmentI getSequenceRecords(String queries) throws Exception
+ {
+ return null;
+ }
+
+ @Override
+ protected URL getUrl(List<String> ids) throws MalformedURLException
+ {
+ String identifier = ids.get(0);
+ return getUrl(identifier);
+ }
+
+ /**
+ * @param identifier
+ * @return
+ */
+ protected URL getUrl(String identifier)
+ {
+ String url = ENSEMBL_REST + "/lookup/id/" + identifier
+ + "?content-type=application/json";
+ try
+ {
+ return new URL(url);
+ } catch (MalformedURLException e)
+ {
+ return null;
+ }
+ }
+
+ @Override
+ protected boolean useGetRequest()
+ {
+ return true;
+ }
+
+ @Override
+ protected String getRequestMimeType(boolean multipleIds)
+ {
+ return "application/json";
+ }
+
+ @Override
+ protected String getResponseMimeType()
+ {
+ return "application/json";
+ }
+
+ /**
+ * Calls the Ensembl lookup REST endpoint and retrieves the 'Parent' for the
+ * given identifier, or null if not found
+ *
+ * @param identifier
+ * @return
+ */
+ public String getParent(String identifier)
+ {
+ List<String> ids = Arrays.asList(new String[] { identifier });
+
+ BufferedReader br = null;
+ try
+ {
+ URL url = getUrl(identifier);
+ if (url != null)
+ {
+ br = getHttpResponse(url, ids);
+ }
+ return (parseResponse(br));
+ } catch (IOException e)
+ {
+ // ignore
+ return null;
+ } finally
+ {
+ if (br != null)
+ {
+ try
+ {
+ br.close();
+ } catch (IOException e)
+ {
+ // ignore
+ }
+ }
+ }
+ }
+
+ /**
+ * Parses "Parent" from the JSON response and returns the value, or null if
+ * not found
+ *
+ * @param br
+ * @return
+ * @throws IOException
+ */
+ protected String parseResponse(BufferedReader br) throws IOException
+ {
+ String parent = null;
+ JSONParser jp = new JSONParser();
+ try
+ {
+ JSONObject val = (JSONObject) jp.parse(br);
+ parent = val.get("Parent").toString();
+ } catch (ParseException e)
+ {
+ // ignore
+ }
+ return parent;
+ }
+
+}
public class EnsemblProtein extends EnsemblSeqProxy
{
- // TODO modify to accept other species e.g. ENSMUSPnnn
+ /*
+ * accepts ENSP with 11 digits
+ * or ENSMUSP or similar for other species
+ * or CCDSnnnnn.nn with at least 3 digits
+ */
private static final Regex ACCESSION_REGEX = new Regex(
- "(ENSP|CCDS)[0-9.]{3,}$");
+ "(ENS([A-Z]{3}|)P[0-9]{11}$)" + "|" + "(CCDS[0-9.]{3,}$)");
private static final List<String> CROSSREFS = Arrays.asList(new String[] {
"PDB", "Uniprot/SPTREMBL", "Uniprot/SWISSPROT" });
import javax.ws.rs.HttpMethod;
+import com.stevesoft.pat.Regex;
+
/**
* Base class for Ensembl REST service clients
*
private final static long RETEST_INTERVAL = 10000L; // 10 seconds
+ private static final Regex TRANSCRIPT_REGEX = new Regex(
+ "(ENS)([A-Z]{3}|)T[0-9]{11}$");
+
+ private static final Regex GENE_REGEX = new Regex(
+ "(ENS)([A-Z]{3}|)G[0-9]{11}$");
+
private static boolean ensemblRestAvailable = false;
private static long lastCheck = -1;
protected volatile boolean inProgress = false;
+ public static boolean isTranscriptIdentifier(String query)
+ {
+ return query == null ? false : TRANSCRIPT_REGEX.search(query);
+ }
+
+ public static boolean isGeneIdentifier(String query)
+ {
+ return query == null ? false : GENE_REGEX.search(query);
+ }
+
@Override
public boolean queryInProgress()
{
import java.util.List;
import java.util.Map.Entry;
-import com.stevesoft.pat.Regex;
-
/**
* Base class for Ensembl sequence fetchers
*
*/
public abstract class EnsemblSeqProxy extends EnsemblRestClient
{
- // TODO modify to accept other species e.g. ENSMUSTnnn
- private static final Regex TRANSCRIPT_REGEX = new Regex(
- "(ENST)[0-9]{11}$");
-
private static final List<String> CROSS_REFERENCES = Arrays
- .asList(new String[] { "CCDS" });
+ .asList(new String[] { "CCDS", "Uniprot/SWISSPROT" });
protected static final String CONSEQUENCE_TYPE = "consequence_type";
}
}
+ if (alignment == null)
+ {
+ return null;
+ }
+
/*
* fetch and transfer genomic sequence features,
* fetch protein product and add as cross-reference
int mappedDnaLength = getCdsRanges(dnaSeq, ranges);
int proteinLength = proteinSeq.getLength();
- List<int[]> proteinRange = new ArrayList<int[]>();
int proteinStart = 1;
/*
proteinStart = 2;
proteinLength--;
}
- proteinRange.add(new int[] { proteinStart, proteinLength });
+ List<int[]> proteinRange = new ArrayList<int[]>();
/*
* dna length should map to protein (or protein plus stop codon)
*/
int codesForResidues = mappedDnaLength / 3;
- if (codesForResidues == proteinLength
- || codesForResidues == (proteinLength + 1))
+ if (codesForResidues == (proteinLength + 1))
+ {
+ MappingUtils.unmapStopCodon(ranges, mappedDnaLength);
+ codesForResidues--;
+ }
+ if (codesForResidues == proteinLength)
{
+ proteinRange.add(new int[] { proteinStart, proteinLength });
return new MapList(ranges, proteinRange, 3, 1);
}
return null;
{
return 0;
}
+ SequenceOntologyI so = SequenceOntologyFactory.getInstance();
int mappedDnaLength = 0;
for (SequenceFeature sf : sfs)
{
/*
* process a CDS feature (or a sub-type of CDS)
*/
- if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
- SequenceOntologyI.CDS))
+ if (so.isA(sf.getType(), SequenceOntologyI.CDS))
{
int phase = 0;
try {
*/
int begin = sf.getBegin();
int end = sf.getEnd();
- if (ranges.isEmpty() && phase > 0)
+ if (ranges.isEmpty())
{
begin += phase;
if (begin > end)
|| SequenceOntologyFactory.getInstance().isA(featureType,
SequenceOntologyI.TRANSCRIPT);
}
-
- public static boolean isTranscriptIdentifier(String query)
- {
- return query == null ? false : TRANSCRIPT_REGEX.search(query);
- }
}
*/
abstract class EnsemblSequenceFetcher extends DbSourceProxyImpl
{
- // TODO modify to accept other species e.g. ENSMUSTnnn
+ /*
+ * accepts ENSG/T/E/P with 11 digits
+ * or ENSMUSP or similar for other species
+ * or CCDSnnnnn.nn with at least 3 digits
+ */
private static final Regex ACCESSION_REGEX = new Regex(
- "(ENSP|ENST|ENSG|CCDS)[0-9.]{3,}$");
+ "(ENS([A-Z]{3}|)[GTEP]{1}[0-9]{11}$)" + "|" + "(CCDS[0-9.]{3,}$)");
/*
* possible values for the 'feature' parameter of the /overlap REST service
--- /dev/null
+package jalview.ext.ensembl;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+
+public class EnsemblSymbol extends EnsemblXref
+{
+ /**
+ * Returns the first "id" value in gene identifier format from the JSON
+ * response, or null if none found
+ *
+ * @param br
+ * @return
+ * @throws IOException
+ */
+ protected String parseResponse(BufferedReader br)
+ throws IOException
+ {
+ JSONParser jp = new JSONParser();
+ String result = null;
+ try
+ {
+ JSONArray responses = (JSONArray) jp.parse(br);
+ Iterator rvals = responses.iterator();
+ while (rvals.hasNext())
+ {
+ JSONObject val = (JSONObject) rvals.next();
+ String id = val.get("id").toString();
+ if (id != null && isGeneIdentifier(id))
+ {
+ result = id;
+ break;
+ }
+ }
+ } catch (ParseException e)
+ {
+ // ignore
+ }
+ return result;
+ }
+
+ protected URL getUrl(String id, Species species)
+ {
+ String url = ENSEMBL_REST + "/xrefs/symbol/" + species.toString() + "/"
+ + id
+ + "?content-type=application/json";
+ try
+ {
+ return new URL(url);
+ } catch (MalformedURLException e)
+ {
+ return null;
+ }
+ }
+
+ /**
+ * Calls the Ensembl xrefs REST 'symbol' endpoint and retrieves any gene ids
+ * for the given identifier, for any known model organisms
+ *
+ * @param identifier
+ * @return
+ */
+ public List<String> getIds(String identifier)
+ {
+ List<String> result = new ArrayList<String>();
+ List<String> ids = new ArrayList<String>();
+ ids.add(identifier);
+
+ String[] queries = identifier.split(getAccessionSeparator());
+ BufferedReader br = null;
+ try
+ {
+ for (String query : queries)
+ {
+ for (Species taxon : Species.values())
+ {
+ if (taxon.isModelOrganism())
+ {
+ URL url = getUrl(query, taxon);
+ if (url != null)
+ {
+ br = getHttpResponse(url, ids);
+ }
+ String geneId = parseResponse(br);
+ if (geneId != null)
+ {
+ result.add(geneId);
+ }
+ }
+ }
+ }
+ } catch (IOException e)
+ {
+ // ignore
+ } finally
+ {
+ if (br != null)
+ {
+ try
+ {
+ br.close();
+ } catch (IOException e)
+ {
+ // ignore
+ }
+ }
+ }
+ return result;
+ }
+
+}
* service
*
* @author gmcarstairs
- *
+ * @see http://rest.ensembl.org/documentation/info/xref_id
*/
class EnsemblXref extends EnsemblRestClient
{
@Override
protected URL getUrl(List<String> ids) throws MalformedURLException
{
- // TODO Auto-generated method stub
- return null;
+ return getUrl(ids.get(0));
}
@Override
--- /dev/null
+package jalview.ext.ensembl;
+
+/**
+ * Selected species identifiers used by Ensembl
+ *
+ * @author gmcarstairs
+ * @see http://rest.ensembl.org/info/species?content-type=text/xml
+ */
+enum Species
+{
+ /*
+ * using any suitably readable alias as the enum name; these are all
+ * valid species parameters to Ensembl REST services where applicable
+ */
+ human(true), mouse(true), s_cerevisiae(true), cow(false), pig(false),
+ rat(true), celegans(true), sheep(false), horse(false), gorilla(false),
+ rabbit(false), gibbon(false), dog(false), orangutan(false),
+ xenopus(true), chimpanzee(false), cat(false), zebrafish(true), chicken(
+ true), dmelanogaster(true);
+
+ boolean modelOrganism;
+
+ private Species(boolean model)
+ {
+ this.modelOrganism = model;
+ }
+
+ boolean isModelOrganism()
+ {
+ return modelOrganism;
+ }
+}
rnahelicesColour.setEnabled(av.getAlignment().hasRNAStructure());
rnahelicesColour
.setSelected(av.getGlobalColourScheme() instanceof jalview.schemes.RNAHelicesColour);
- setShowProductsEnabled();
+
+ showProducts.setEnabled(canShowProducts());
+
updateEditMenuBar();
}
}
}
- /*
- * public void vamsasStore_actionPerformed(ActionEvent e) { JalviewFileChooser
- * chooser = new JalviewFileChooser(jalview.bin.Cache.
- * getProperty("LAST_DIRECTORY"));
- *
- * chooser.setFileView(new JalviewFileView()); chooser.setDialogTitle("Export
- * to Vamsas file"); chooser.setToolTipText("Export");
- *
- * int value = chooser.showSaveDialog(this);
- *
- * if (value == JalviewFileChooser.APPROVE_OPTION) {
- * jalview.io.VamsasDatastore vs = new jalview.io.VamsasDatastore(viewport);
- * //vs.store(chooser.getSelectedFile().getAbsolutePath() ); vs.storeJalview(
- * chooser.getSelectedFile().getAbsolutePath(), this); } }
- */
/**
- * prototype of an automatically enabled/disabled analysis function
+ * Searches selected sequences for xRef products and builds the Show
+ * Cross-References menu (formerly called Show Products)
*
+ * @return true if Show Cross-references menu should be enabled.
*/
- protected void setShowProductsEnabled()
+ public boolean canShowProducts()
{
SequenceI[] selection = viewport.getSequenceSelection();
- if (canShowProducts(selection, viewport.getSelectionGroup() != null,
- viewport.getAlignment().getDataset()))
- {
- showProducts.setEnabled(true);
-
- }
- else
- {
- showProducts.setEnabled(false);
- }
- }
-
- /**
- * search selection for sequence xRef products and build the show products
- * menu.
- *
- * @param selection
- * @param dataset
- * @return true if showProducts menu should be enabled.
- */
- public boolean canShowProducts(SequenceI[] selection,
- boolean isRegionSelection, Alignment dataset)
- {
+ AlignmentI dataset = viewport.getAlignment().getDataset();
boolean showp = false;
try
{
showProducts.removeAll();
final boolean dna = viewport.getAlignment().isNucleotide();
- final Alignment ds = dataset;
String[] ptypes = (selection == null || selection.length == 0) ? null
: CrossRef.findSequenceXrefTypes(dna, selection, dataset);
- // Object[] prods =
- // CrossRef.buildXProductsList(viewport.getAlignment().isNucleotide(),
- // selection, dataset, true);
- final SequenceI[] sel = selection;
+
for (int t = 0; ptypes != null && t < ptypes.length; t++)
{
showp = true;
- final boolean isRegSel = isRegionSelection;
final AlignFrame af = this;
final String source = ptypes[t];
JMenuItem xtype = new JMenuItem(ptypes[t]);
@Override
public void actionPerformed(ActionEvent e)
{
- // TODO: new thread for this call with vis-delay
- af.showProductsFor(af.viewport.getSequenceSelection(),
- isRegSel, dna, source);
+ showProductsFor(af.viewport.getSequenceSelection(), dna, source);
}
});
} catch (Exception e)
{
jalview.bin.Cache.log
- .warn("canTranslate threw an exception - please report to help@jalview.org",
+ .warn("canShowProducts threw an exception - please report to help@jalview.org",
e);
return false;
}
return showp;
}
- protected void showProductsFor(final SequenceI[] sel,
- final boolean isRegSel, final boolean dna, final String source)
+ protected void showProductsFor(final SequenceI[] sel, final boolean dna,
+ final String source)
{
Runnable foo = new Runnable()
{
new Object[] { source }), sttime);
try
{
- // update our local dataset reference
- Alignment ds = AlignFrame.this.getViewport().getAlignment()
- .getDataset();
- Alignment prods = CrossRef
- .findXrefSequences(sel, dna, source, ds);
- if (prods != null)
+ /*
+ * 'peer' sequences are any to add to this alignment, for example
+ * alternative protein products for my protein's gene
+ */
+ List<SequenceI> addedPeers = new ArrayList<SequenceI>();
+ AlignmentI alignment = AlignFrame.this.getViewport().getAlignment();
+ Alignment xrefs = CrossRef.findXrefSequences(sel, dna, source,
+ alignment, addedPeers);
+ if (xrefs != null)
{
- SequenceI[] sprods = new SequenceI[prods.getHeight()];
- for (int s = 0; s < sprods.length; s++)
- {
- sprods[s] = (prods.getSequenceAt(s)).deriveSequence();
- if (ds.getSequences() == null
- || !ds.getSequences().contains(
- sprods[s].getDatasetSequence()))
- {
- ds.addSequence(sprods[s].getDatasetSequence());
- }
- sprods[s].updatePDBIds();
- }
- Alignment al = new Alignment(sprods);
- al.setDataset(ds);
+ Alignment al = makeCrossReferencesAlignment(
+ alignment.getDataset(), xrefs);
/*
* Copy dna-to-protein mappings to new alignment
// TODO 1: no mappings are set up for EMBL product
// TODO 2: if they were, should add them to protein alignment, not
// dna
- List<AlignedCodonFrame> cf = prods.getCodonFrames();
- for (AlignedCodonFrame acf : cf)
- {
- al.addCodonFrame(acf);
- }
+ // List<AlignedCodonFrame> cf = xrefs.getCodonFrames();
+ // for (AlignedCodonFrame acf : cf)
+ // {
+ // al.addCodonFrame(acf);
+ // }
AlignFrame newFrame = new AlignFrame(al, DEFAULT_WIDTH,
DEFAULT_HEIGHT);
- String newtitle = "" + (dna ? "Proteins" : "Nucleotides")
- + " for " + (isRegSel ? "selected region of " : "")
- + getTitle();
+ String newtitle = String.format("%s %s %s",
+ MessageManager.getString(dna ? "label.proteins"
+ : "label.nucleotides"), MessageManager
+ .getString("label.for"), getTitle());
newFrame.setTitle(newtitle);
boolean asSplitFrame = Cache.getDefault(
AlignmentI copyAlignment = null;
final SequenceI[] sequenceSelection = AlignFrame.this.viewport
.getSequenceSelection();
+ List<AlignedCodonFrame> cf = xrefs.getCodonFrames();
if (dna)
{
copyAlignment = AlignmentUtils.makeCdsAlignment(
sequenceSelection, cf);
al.getCodonFrames().clear();
al.getCodonFrames().addAll(cf);
- final StructureSelectionManager ssm = StructureSelectionManager
- .getStructureSelectionManager(Desktop.instance);
- ssm.registerMappings(cf);
}
else
{
copyAlignment = new Alignment(new Alignment(
sequenceSelection));
+ copyAlignment.getCodonFrames().addAll(cf);
}
+ StructureSelectionManager ssm = StructureSelectionManager
+ .getStructureSelectionManager(Desktop.instance);
+ ssm.registerMappings(cf);
+
+ /*
+ * add in any extra 'peer' sequences discovered
+ * (e.g. alternative protein products)
+ */
+ for (SequenceI peer : addedPeers)
+ {
+ copyAlignment.addSequence(peer);
+ }
+
+ /*
+ * align protein to dna
+ */
+ // TODO needs debugging
+ // if (dna)
+ // {
+ // al.alignAs(copyAlignment);
+ // }
+ // else
+ // {
+ // copyAlignment.alignAs(al);
+ // }
+
AlignFrame copyThis = new AlignFrame(copyAlignment,
AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
copyThis.setTitle(AlignFrame.this.getTitle());
- // SplitFrame with dna above, protein below
+
boolean showSequenceFeatures = viewport
.isShowSequenceFeatures();
newFrame.setShowSeqFeatures(showSequenceFeatures);
String linkedTitle = MessageManager
.getString("label.linked_view_title");
Desktop.addInternalFrame(sf, linkedTitle, -1, -1);
+ sf.adjustDivider();
}
else
{
new Object[] { source }), sttime);
}
+ /**
+ * @param alignment
+ * @param prods
+ * @return
+ */
+ protected Alignment makeCrossReferencesAlignment(
+Alignment dataset,
+ Alignment prods)
+ {
+ SequenceI[] sprods = new SequenceI[prods.getHeight()];
+ for (int s = 0; s < sprods.length; s++)
+ {
+ sprods[s] = (prods.getSequenceAt(s)).deriveSequence();
+ if (dataset.getSequences() == null
+ || !dataset.getSequences().contains(
+ sprods[s].getDatasetSequence()))
+ {
+ dataset.addSequence(sprods[s].getDatasetSequence());
+ }
+ sprods[s].updatePDBIds();
+ }
+ Alignment al = new Alignment(sprods);
+ al.setDataset(dataset);
+ return al;
+ }
+
};
Thread frunner = new Thread(foo);
frunner.start();
boolean rfound = false;
for (int r = 0; r < rs.length; r++)
{
- if (rs[r] != null
- && (found = DBRefUtils.searchRefs(
- rs[r].getDBRefs(), dbr)) != null
- && found.length > 0)
+ if (rs[r] != null)
{
- rfound = true;
- rs[r] = null;
+ found = DBRefUtils.searchRefs(rs[r].getDBRefs(), accId);
+ if (found != null && found.length > 0)
+ {
+ rfound = true;
+ rs[r] = null;
+ }
}
}
if (!rfound)
*/
public class SplitFrame extends GSplitFrame implements SplitContainerI
{
+ private static final int WINDOWS_INSETS_WIDTH = 28; // tbc
+
+ private static final int MAC_INSETS_WIDTH = 28;
+
+ private static final int WINDOWS_INSETS_HEIGHT = 50; // tbc
+
+ private static final int MAC_INSETS_HEIGHT = 50;
+ private static final int DESKTOP_DECORATORS_HEIGHT = 65;
private static final long serialVersionUID = 1L;
public SplitFrame(GAlignFrame top, GAlignFrame bottom)
* estimate width and height of SplitFrame; this.getInsets() doesn't seem to
* give the full additional size (a few pixels short)
*/
- int widthFudge = Platform.isAMac() ? 28 : 28; // Windows tbc
- int heightFudge = Platform.isAMac() ? 50 : 50; // tbc
+ int widthFudge = Platform.isAMac() ? MAC_INSETS_WIDTH
+ : WINDOWS_INSETS_WIDTH;
+ int heightFudge = Platform.isAMac() ? MAC_INSETS_HEIGHT
+ : WINDOWS_INSETS_HEIGHT;
int width = ((AlignFrame) getTopFrame()).getWidth() + widthFudge;
int height = ((AlignFrame) getTopFrame()).getHeight()
+ ((AlignFrame) getBottomFrame()).getHeight() + DIVIDER_SIZE
{
// allow about 65 pixels for Desktop decorators on Windows
- int newHeight = Math.min(height, Desktop.instance.getHeight() - 65);
+ int newHeight = Math.min(height, Desktop.instance.getHeight()
+ - DESKTOP_DECORATORS_HEIGHT);
if (newHeight != height)
{
int oldDividerLocation = getDividerLocation();
}
/**
+ * Adjust the divider for a sensible split of the real estate (for example,
+ * when many transcripts are shown with a single protein). This should only be
+ * called after the split pane has been laid out (made visible) so it has a
+ * height.
+ */
+ protected void adjustDivider()
+ {
+ final AlignViewport topViewport = ((AlignFrame) getTopFrame()).viewport;
+ final AlignViewport bottomViewport = ((AlignFrame) getBottomFrame()).viewport;
+ final AlignmentI topAlignment = topViewport.getAlignment();
+ final AlignmentI bottomAlignment = bottomViewport.getAlignment();
+ boolean topAnnotations = topViewport.isShowAnnotation();
+ boolean bottomAnnotations = bottomViewport.isShowAnnotation();
+ int topCount = topAlignment.getHeight();
+ int bottomCount = bottomAlignment.getHeight();
+ int topCharHeight = topViewport.getViewStyle().getCharHeight();
+ int bottomCharHeight = bottomViewport.getViewStyle().getCharHeight();
+
+ /*
+ * estimate ratio of (topFrameContent / bottomFrameContent)
+ */
+ int insets = Platform.isAMac() ? MAC_INSETS_HEIGHT
+ : WINDOWS_INSETS_HEIGHT;
+ // allow 3 'rows' for scale, scrollbar, status bar
+ int topHeight = insets + (3 + topCount) * topCharHeight
+ + (topAnnotations ? topViewport.calcPanelHeight() : 0);
+ int bottomHeight = insets + (3 + bottomCount) * bottomCharHeight
+ + (bottomAnnotations ? bottomViewport.calcPanelHeight() : 0);
+ double ratio = ((double) topHeight) / (topHeight + bottomHeight);
+
+ setRelativeDividerLocation(ratio);
+ }
+
+ /**
* Add a listener to tidy up when the frame is closed.
*/
protected void addCloseFrameListener()
String desc = id.substring(space + 1);
seq.setDescription(desc);
- if (desc.startsWith("chromosome"))
- {
- /*
- * parse Ensembl style gene description e.g.
- * chromosome:GRCh38:7:140696688:140721955:1
- */
- String[] tokens = desc.split(":");
- if (tokens.length > 3)
- {
- try
- {
- seq.setStart(Integer.parseInt(tokens[3]));
- } catch (NumberFormatException e)
- {
- // ignore
- }
- }
- }
+ /*
+ * it is tempting to parse Ensembl style gene description e.g.
+ * chromosome:GRCh38:7:140696688:140721955:1 and set the
+ * start position of the sequence, but this causes much confusion
+ * for reverse strand feature locations
+ */
}
else
{
{ "snRNA_gene", "gene" },
{ "miRNA_gene", "gene" },
{ "lincRNA_gene", "gene" },
+ { "rRNA_gene", "gene" },
/*
* transcript sub-types:
{ "snRNA", "transcript" },
{ "miRNA", "transcript" },
{ "lincRNA", "transcript" },
+ { "rRNA", "transcript" },
// there are many more sub-types of ncRNA...
/*
}
/**
+ * Returns an array of those references that match the given accession id
+ * <ul>
+ * <li>database sources are the same</li>
+ * <li>accession ids are the same</li>
+ * <li>both have no mapping, or the mappings are the same</li>
+ * </ul>
+ *
+ * @param ref
+ * Set of references to search
+ * @param entry
+ * pattern to match
+ * @return
+ */
+ public static DBRefEntry[] searchRefs(DBRefEntry[] ref, String accId)
+ {
+ return searchRefs(ref, new DBRefEntry("", "", accId), matchId);
+ }
+
+ /**
* Returns an array of those references that match the given entry, according
* to the given comparator. Returns null if no matches.
*
};
/**
+ * accession ID only must be identical.
+ */
+ public static DbRefComp matchId = new DbRefComp()
+ {
+ @Override
+ public boolean matches(DBRefEntry refa, DBRefEntry refb)
+ {
+ if (refa.getAccessionId() != null && refb.getAccessionId() != null
+ && refb.getAccessionId().equals(refa.getAccessionId()))
+ {
+ return true;
+ }
+ return false;
+ }
+ };
+
+ /**
* Parses a DBRefEntry and adds it to the sequence, also a PDBEntry if the
* database is PDB.
* <p>
}
return result;
}
+
+ /**
+ * Remove the last 3 mapped positions from the given ranges
+ *
+ * @param ranges
+ * @param mappedLength
+ */
+ public static void unmapStopCodon(List<int[]> ranges,
+ int mappedLength)
+ {
+ if (mappedLength < 3)
+ {
+ return;
+ }
+ boolean done = false;
+ int targetLength = mappedLength - 3;
+ int mapped = 0;
+ Iterator<int[]> it = ranges.iterator();
+ while (!done && it.hasNext())
+ {
+ int[] range = it.next();
+ int length = Math.abs(range[1] - range[0]) + 1;
+ if (mapped + length == targetLength)
+ {
+ done = true;
+ }
+ else if (mapped + length < targetLength)
+ {
+ mapped += length;
+ continue;
+ }
+ else
+ {
+ /*
+ * need just a bit of this range
+ */
+ int needed = targetLength - mapped;
+ int sense = range[1] >= range[0] ? 1 : -1;
+ range[1] = range[0] + (sense * (needed - 1));
+ done = true;
+ }
+ }
+ /*
+ * remove any trailing ranges
+ */
+ while (it.hasNext())
+ {
+ it.next();
+ it.remove();
+ }
+ }
}
import java.util.List;
+import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
sf.setType("CDS");
assertFalse(testee.identifiesSequence(sf, accId));
}
+
+ @Test(groups = "Functional")
+ public void testIsValidReference() throws Exception
+ {
+ EnsemblSequenceFetcher esq = new EnsemblCdna();
+ Assert.assertTrue(esq.isValidReference("CCDS5863.1"));
+ Assert.assertTrue(esq.isValidReference("ENST00000288602"));
+ Assert.assertTrue(esq.isValidReference("ENSG00000288602"));
+ Assert.assertFalse(esq.isValidReference("ENSP00000288602"));
+ Assert.assertFalse(esq.isValidReference("ENST0000288602"));
+ // non-human species having a 3 character identifier included:
+ Assert.assertTrue(esq.isValidReference("ENSMUSG00000099398"));
+ }
}
import java.util.List;
+import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
assertFalse(testee.identifiesSequence(sf, accId));
}
+ @Test(groups = "Functional")
+ public void testIsValidReference() throws Exception
+ {
+ EnsemblSequenceFetcher esq = new EnsemblCds();
+ Assert.assertTrue(esq.isValidReference("CCDS5863.1"));
+ Assert.assertTrue(esq.isValidReference("ENST00000288602"));
+ Assert.assertTrue(esq.isValidReference("ENSG00000288602"));
+ Assert.assertTrue(esq.isValidReference("ENSP00000288602"));
+ Assert.assertFalse(esq.isValidReference("ENST0000288602"));
+ // non-human species have a 3 character identifier included:
+ Assert.assertTrue(esq.isValidReference("ENSMUSG00000099398"));
+ }
+
}
import jalview.io.gff.SequenceOntologyLite;
import jalview.util.MapList;
-import java.util.Arrays;
import java.util.List;
import org.testng.annotations.AfterClass;
* with no filter
*/
List<SequenceFeature> features = testee.getTranscriptFeatures(geneId,
- genomic, null);
+ genomic);
assertEquals(3, features.size());
assertSame(sf1, features.get(0));
assertSame(sf2, features.get(1));
assertSame(sf3, features.get(2));
-
- /*
- * with filter
- */
- List<String> ids = Arrays.asList(new String[] { "transcript2",
- "transcript3" });
- features = testee.getTranscriptFeatures(geneId, genomic, ids);
- assertEquals(2, features.size());
- assertSame(sf2, features.get(0));
- assertSame(sf3, features.get(1));
}
/**
Assert.assertTrue(esq.isValidReference("ENSP00000288602"));
Assert.assertFalse(esq.isValidReference("ENST00000288602"));
Assert.assertFalse(esq.isValidReference("ENSG00000288602"));
+ // non-human species having a 3 character identifier included:
+ Assert.assertTrue(esq.isValidReference("ENSMUSP00000099398"));
}
@Test(groups = "Functional")
assertFalse(EnsemblSeqProxy.isTranscriptIdentifier(""));
assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("ENSG00000012345"));
assertTrue(EnsemblSeqProxy.isTranscriptIdentifier("ENST00000012345"));
+ assertTrue(EnsemblSeqProxy.isTranscriptIdentifier("ENSMUST00000012345"));
assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("enst00000012345"));
assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("ENST000000123456"));
assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("ENST0000001234"));
}
+
+ @Test(groups = "Functional")
+ public void testIsGeneIdentifier()
+ {
+ assertFalse(EnsemblSeqProxy.isGeneIdentifier(null));
+ assertFalse(EnsemblSeqProxy.isGeneIdentifier(""));
+ assertFalse(EnsemblSeqProxy.isGeneIdentifier("ENST00000012345"));
+ assertTrue(EnsemblSeqProxy.isGeneIdentifier("ENSG00000012345"));
+ assertTrue(EnsemblSeqProxy.isGeneIdentifier("ENSMUSG00000012345"));
+ assertFalse(EnsemblSeqProxy.isGeneIdentifier("ensg00000012345"));
+ assertFalse(EnsemblSeqProxy.isGeneIdentifier("ENSG000000123456"));
+ assertFalse(EnsemblSeqProxy.isGeneIdentifier("ENSG0000001234"));
+ }
}
\ No newline at end of file
assertSame(ref1, matches[0]);
assertSame(ref2, matches[1]);
}
+
+ /**
+ * Test the method that searches for matching references based on accession id
+ * only
+ */
+ @Test(groups = { "Functional" })
+ public void testSearchRefs_accessionid()
+ {
+
+ DBRefEntry ref1 = new DBRefEntry("Uniprot", "1", "A1234"); // matches
+ DBRefEntry ref2 = new DBRefEntry("embl", "1", "A1234"); // matches
+ // constructor does not upper-case accession id
+ DBRefEntry ref3 = new DBRefEntry("EMBL", "1", "a1234"); // no match
+ DBRefEntry ref4 = new DBRefEntry("EMBLCDS", "1", "A1235"); // no match
+ // ref5 matches although it has a mapping - ignored
+ DBRefEntry ref5 = new DBRefEntry("EMBL", "1", "A1234");
+ ref5.setMap(new Mapping(new MapList(new int[] { 1, 1 }, new int[] { 1,
+ 1 }, 1, 1)));
+
+ DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[] { ref1,
+ ref2, ref3, ref4, ref5 }, "A1234");
+ assertEquals(3, matches.length);
+ assertSame(ref1, matches[0]);
+ assertSame(ref2, matches[1]);
+ assertSame(ref5, matches[2]);
+ }
}
import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.assertSame;
import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
import jalview.api.AlignViewportI;
import jalview.commands.EditCommand;
assertEquals("[0, 3]", Arrays.toString(hidden.get(0)));
assertEquals("[5, 10]", Arrays.toString(hidden.get(1)));
}
+
+ /**
+ * Tests for the method that removes the trailing stop codon from a mapping
+ * range i.e. the last 3 positions (whether split or not)
+ */
+ @Test(groups = { "Functional" })
+ public void testUnmapStopCodon()
+ {
+ List<int[]> ranges = new ArrayList<int[]>();
+
+ // simple case, forward strand:
+ ranges.add(new int[] { 1, 3 });
+ ranges.add(new int[] { 9, 14 });
+ MappingUtils.unmapStopCodon(ranges, 9);
+ assertEquals(2, ranges.size());
+ assertArrayEquals(new int[] { 1, 3 }, ranges.get(0));
+ assertArrayEquals(new int[] { 9, 11 }, ranges.get(1));
+
+ // split stop codon, forward strand:
+ ranges.clear();
+ ranges.add(new int[] { 1, 8 });
+ ranges.add(new int[] { 10, 10 });
+ MappingUtils.unmapStopCodon(ranges, 9);
+ assertEquals(1, ranges.size());
+ assertArrayEquals(new int[] { 1, 6 }, ranges.get(0));
+
+ // very split stop codon, forward strand:
+ ranges.clear();
+ ranges.add(new int[] { 1, 1 });
+ ranges.add(new int[] { 3, 4 });
+ ranges.add(new int[] { 6, 6 });
+ ranges.add(new int[] { 8, 8 });
+ ranges.add(new int[] { 10, 10 });
+ MappingUtils.unmapStopCodon(ranges, 6);
+ assertEquals(2, ranges.size());
+ assertArrayEquals(new int[] { 1, 1 }, ranges.get(0));
+ assertArrayEquals(new int[] { 3, 4 }, ranges.get(1));
+
+ // simple case, reverse strand:
+ ranges.clear();
+ ranges.add(new int[] { 12, 10 });
+ ranges.add(new int[] { 6, 1 });
+ MappingUtils.unmapStopCodon(ranges, 9);
+ assertEquals(2, ranges.size());
+ assertArrayEquals(new int[] { 12, 10 }, ranges.get(0));
+ assertArrayEquals(new int[] { 6, 4 }, ranges.get(1));
+
+ // split stop codon, reverse strand:
+ ranges.clear();
+ ranges.add(new int[] { 12, 6 });
+ ranges.add(new int[] { 4, 3 });
+ MappingUtils.unmapStopCodon(ranges, 9);
+ assertEquals(1, ranges.size());
+ assertArrayEquals(new int[] { 12, 7 }, ranges.get(0));
+ }
}
import jalview.ws.seqfetcher.ASequenceFetcher;
import jalview.ws.seqfetcher.DbSourceProxy;
+import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
import java.util.Vector;
System.out.println("Type: " + types[t]);
SequenceI[] prod = jalview.analysis.CrossRef
.findXrefSequences(al.getSequencesArray(), dna,
- types[t]).getSequencesArray();
+ types[t], null, new ArrayList<SequenceI>())
+ .getSequencesArray();
System.out.println("Found "
+ ((prod == null) ? "no" : "" + prod.length)
+ " products");
// sequences.
SequenceI[] seqs = al.getSequencesArray();
Alignment prodal = jalview.analysis.CrossRef.findXrefSequences(
- seqs, dna, null, ds);
+ seqs, dna, null, ds, new ArrayList<SequenceI>());
System.out.println("Found "
+ ((prodal == null) ? "no" : "" + prodal.getHeight())
+ " products");
assertEquals("Expected local reference map to be 3 nucleotides", dr[0]
.getMap().getWidth(), 3);
AlignmentI sprods = CrossRef.findXrefSequences(
- alsq.getSequencesArray(), true, dr[0].getSource(),
- alsq.getDataset());
+ alsq.getSequencesArray(), true, dr[0].getSource(), alsq,
+ new ArrayList<SequenceI>());
assertNotNull(
"Couldn't recover cross reference sequence from dataset. Was it ever added ?",
sprods);