public class DBRefEntry implements DBRefEntryI
{
- String source = "", version = "", accessionId = "";
+ /*
+ * the mapping to chromosome (genome) is held as an instance with
+ * source = speciesId
+ * version = assemblyId
+ * accessionId = "chromosome:" + chromosomeId
+ * map = mapping from sequence to reference assembly
+ */
+ public static final String CHROMOSOME = "chromosome";
+
+ String source = "";
+
+ String version = "";
+
+ String accessionId = "";
/**
* maps from associated sequence to the database sequence's coordinate system
}
return true;
}
+
+ /**
+ * Mappings to chromosome are held with accessionId as "chromosome:id"
+ *
+ * @return
+ */
+ public boolean isChromosome()
+ {
+ return accessionId != null && accessionId.startsWith(CHROMOSOME + ":");
+ }
}
+++ /dev/null
-package jalview.datamodel;
-
-import jalview.util.MapList;
-
-/**
- * A data bean to model one or more contiguous regions on one chromosome
- */
-public class GeneLoci
-{
- /*
- * an identifier for the species
- */
- public final String species;
-
- /*
- * an identifier for a genome assembly, e.g. GRCh38
- */
- public final String assembly;
-
- /*
- * a chromosome identifier, e.g. "5" or "X"
- */
- public final String chromosome;
-
- /*
- * mapping from sequence positions to chromosome locations;
- * any regions with start > end are on the reverse strand
- */
- public final MapList mapping;
-
- /**
- * Constructor
- *
- * @param taxon
- * @param ref
- * @param chrId
- * @param map
- */
- public GeneLoci(String taxon, String ref, String chrId, MapList map)
- {
- species = taxon;
- assembly = ref;
- chromosome = chrId;
- mapping = map;
- }
-}
--- /dev/null
+package jalview.datamodel;
+
+import jalview.util.MapList;
+
+/**
+ * An interface to model one or more contiguous regions on one chromosome
+ */
+public interface GeneLociI
+{
+ /**
+ * Answers the species identifier
+ *
+ * @return
+ */
+ String getSpeciesId();
+
+ /**
+ * Answers the reference assembly identifier
+ *
+ * @return
+ */
+ String getAssemblyId();
+
+ /**
+ * Answers the chromosome identifier e.g. "2", "Y", "II"
+ *
+ * @return
+ */
+ String getChromosomeId();
+
+ /**
+ * Answers the mapping from sequence to chromosome loci. For a reverse strand
+ * mapping, the chromosomal ranges will have start > end.
+ *
+ * @return
+ */
+ MapList getMap();
+}
*/
private int changeCount;
- private GeneLoci geneLoci;
-
/**
* Creates a new Sequence object.
*
public void setDescription(String desc)
{
this.description = desc;
- parseDescription();
- }
-
- /**
- * Parses and saves fields of an Ensembl-style description e.g.
- * chromosome:GRCh38:17:45051610:45109016:1
- */
- protected void parseDescription()
- {
- if (description == null)
- {
- return;
- }
- String[] tokens = description.split(":");
- if (tokens.length == 6 && "chromosome".equals(tokens[0])) {
- String ref = tokens[1];
- String chrom = tokens[2];
- try {
- int chStart = Integer.parseInt(tokens[3]);
- int chEnd = Integer.parseInt(tokens[4]);
- boolean forwardStrand = "1".equals(tokens[5]);
- String species = ""; // dunno yet!
- int[] from = new int[] { start, end };
- int[] to = new int[] { forwardStrand ? chStart : chEnd,
- forwardStrand ? chEnd : chStart };
- MapList map = new MapList(from, to, 1, 1);
- GeneLoci gl = new GeneLoci(species, ref, chrom, map);
- setGeneLoci(gl);
- } catch (NumberFormatException e)
- {
- System.err.println("Bad integers in description " + description);
- }
- }
}
@Override
- public void setGeneLoci(GeneLoci gl)
+ public void setGeneLoci(String speciesId, String assemblyId,
+ String chromosomeId, MapList map)
{
- geneLoci = gl;
+ addDBRef(new DBRefEntry(speciesId, assemblyId, DBRefEntry.CHROMOSOME
+ + ":" + chromosomeId, new Mapping(map)));
}
/**
* @return
*/
@Override
- public GeneLoci getGeneLoci()
+ public GeneLociI getGeneLoci()
{
- return geneLoci;
+ DBRefEntry[] refs = getDBRefs();
+ if (refs != null)
+ {
+ for (final DBRefEntry ref : refs)
+ {
+ if (ref.isChromosome())
+ {
+ return new GeneLociI()
+ {
+ @Override
+ public String getSpeciesId()
+ {
+ return ref.getSource();
+ }
+
+ @Override
+ public String getAssemblyId()
+ {
+ return ref.getVersion();
+ }
+
+ @Override
+ public String getChromosomeId()
+ {
+ // strip of "chromosome:" prefix to chrId
+ return ref.getAccessionId().substring(
+ DBRefEntry.CHROMOSOME.length() + 1);
+ }
+
+ @Override
+ public MapList getMap()
+ {
+ return ref.getMap().getMap();
+ }
+ };
+ }
+ }
+ }
+ return null;
}
/**
package jalview.datamodel;
import jalview.datamodel.features.SequenceFeaturesI;
+import jalview.util.MapList;
import java.util.BitSet;
import java.util.List;
*/
public int replace(char c1, char c2);
- public abstract GeneLoci getGeneLoci();
+ GeneLociI getGeneLoci();
- public abstract void setGeneLoci(GeneLoci gl);
+ void setGeneLoci(String speciesId, String assemblyId,
+ String chromosomeId, MapList map);
}
import jalview.api.FeatureColourI;
import jalview.api.FeatureSettingsModelI;
import jalview.datamodel.AlignmentI;
-import jalview.datamodel.GeneLoci;
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.GeneLociI;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
{
continue;
}
+
+ parseChromosomeLocations(geneAlignment);
+
if (geneAlignment.getHeight() == 1)
{
getTranscripts(geneAlignment, geneId);
}
/**
+ * Parses and saves fields of an Ensembl-style description e.g.
+ * chromosome:GRCh38:17:45051610:45109016:1
+ *
+ * @param alignment
+ */
+ private void parseChromosomeLocations(AlignmentI alignment)
+ {
+ for (SequenceI seq : alignment.getSequences())
+ {
+ String description = seq.getDescription();
+ if (description == null)
+ {
+ continue;
+ }
+ String[] tokens = description.split(":");
+ if (tokens.length == 6 && tokens[0].startsWith(DBRefEntry.CHROMOSOME))
+ {
+ String ref = tokens[1];
+ String chrom = tokens[2];
+ try
+ {
+ int chStart = Integer.parseInt(tokens[3]);
+ int chEnd = Integer.parseInt(tokens[4]);
+ boolean forwardStrand = "1".equals(tokens[5]);
+ String species = ""; // dunno yet!
+ int[] from = new int[] { seq.getStart(), seq.getEnd() };
+ int[] to = new int[] { forwardStrand ? chStart : chEnd,
+ forwardStrand ? chEnd : chStart };
+ MapList map = new MapList(from, to, 1, 1);
+ seq.setGeneLoci(species, ref, chrom, map);
+ } catch (NumberFormatException e)
+ {
+ System.err.println("Bad integers in description " + description);
+ }
+ }
+ }
+ }
+
+ /**
* Converts a query, which may contain one or more gene or transcript
* identifiers, into a non-redundant list of gene identifiers.
*
protected void mapTranscriptToChromosome(SequenceI transcript,
SequenceI gene, MapList mapping)
{
- GeneLoci loci = gene.getGeneLoci();
+ GeneLociI loci = gene.getGeneLoci();
if (loci == null)
{
return;
}
- /*
- * patch to ensure gene to chromosome mapping is complete
- * (in case created before gene length was known)
- */
- MapList geneMapping = loci.mapping;
- if (geneMapping.getFromRanges().get(0)[1] == 0)
- {
- geneMapping.getFromRanges().get(0)[0] = gene.getStart();
- geneMapping.getFromRanges().get(0)[1] = gene.getEnd();
- }
+ MapList geneMapping = loci.getMap();
List<int[]> exons = mapping.getFromRanges();
List<int[]> transcriptLoci = new ArrayList<>();
List<int[]> transcriptRange = Arrays.asList(new int[] {
transcript.getStart(), transcript.getEnd() });
MapList mapList = new MapList(transcriptRange, transcriptLoci, 1, 1);
- GeneLoci gl = new GeneLoci(loci.species, loci.assembly,
- loci.chromosome, mapList);
- transcript.setGeneLoci(gl);
+ transcript.setGeneLoci(loci.getSpeciesId(), loci.getAssemblyId(),
+ loci.getChromosomeId(), mapList);
}
/**
import jalview.api.AlignViewControllerGuiI;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefEntry;
-import jalview.datamodel.GeneLoci;
+import jalview.datamodel.GeneLociI;
import jalview.datamodel.Mapping;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
boolean isVcfRefGrch37)
{
int count = 0;
- GeneLoci seqCoords = seq.getGeneLoci();
+ GeneLociI seqCoords = seq.getGeneLoci();
if (seqCoords == null)
{
return 0;
}
- List<int[]> seqChromosomalContigs = seqCoords.mapping.getToRanges();
+ List<int[]> seqChromosomalContigs = seqCoords.getMap().getToRanges();
for (int[] range : seqChromosomalContigs)
{
count += addVcfVariants(seq, reader, range, isVcfRefGrch37);
protected int addVcfVariants(SequenceI seq, VCFReader reader,
int[] range, boolean isVcfRefGrch37)
{
- GeneLoci seqCoords = seq.getGeneLoci();
+ GeneLociI seqCoords = seq.getGeneLoci();
- String chromosome = seqCoords.chromosome;
- String seqRef = seqCoords.assembly;
- String species = seqCoords.species;
+ String chromosome = seqCoords.getChromosomeId();
+ String seqRef = seqCoords.getAssemblyId();
+ String species = seqCoords.getSpeciesId();
// TODO handle species properly
if ("".equals(species))
* (convert a reverse strand range to forwards)
*/
int count = 0;
- MapList mapping = seqCoords.mapping;
+ MapList mapping = seqCoords.getMap();
int fromLocus = Math.min(range[0], range[1]);
int toLocus = Math.max(range[0], range[1]);
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefEntry;
-import jalview.datamodel.GeneLoci;
import jalview.datamodel.Mapping;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
* verify variant feature(s) computed and added to protein
* first codon AGC varies to ACC giving S/T
*/
- SequenceI peptide = al.getSequenceAt(1)
- .getDBRefs()[0].getMap().getTo();
+ DBRefEntry[] dbRefs = al.getSequenceAt(1).getDBRefs();
+ SequenceI peptide = null;
+ for (DBRefEntry dbref : dbRefs)
+ {
+ if (dbref.getMap().getMap().getFromRatio() == 3)
+ {
+ peptide = dbref.getMap().getTo();
+ }
+ }
List<SequenceFeature> proteinFeatures = peptide.getSequenceFeatures();
assertEquals(proteinFeatures.size(), 1);
sf = proteinFeatures.get(0);
SequenceI gene1 = alignment.getSequenceAt(0);
int[] to = new int[] { 45051610, 45051634 };
int[] from = new int[] { gene1.getStart(), gene1.getEnd() };
- gene1.setGeneLoci(new GeneLoci("human", "GRCh38", "17", new MapList(
- from, to, 1, 1)));
+ gene1.setGeneLoci("human", "GRCh38", "17", new MapList(from, to, 1, 1));
/*
* map 'transcript1' to chromosome via 'gene1'
to = new int[] { 45051612, 45051619, 45051624, 45051633 };
SequenceI transcript1 = alignment.getSequenceAt(1);
from = new int[] { transcript1.getStart(), transcript1.getEnd() };
- transcript1.setGeneLoci(new GeneLoci("human", "GRCh38", "17",
- new MapList(from, to, 1, 1)));
+ transcript1.setGeneLoci("human", "GRCh38", "17", new MapList(from, to,
+ 1, 1));
/*
* map gene2 to chromosome reverse strand
SequenceI gene2 = alignment.getSequenceAt(2);
to = new int[] { 45051634, 45051610 };
from = new int[] { gene2.getStart(), gene2.getEnd() };
- gene2.setGeneLoci(new GeneLoci("human", "GRCh38", "17", new MapList(
- from, to, 1, 1)));
+ gene2.setGeneLoci("human", "GRCh38", "17", new MapList(from, to, 1, 1));
/*
* map 'transcript2' to chromosome via 'gene2'
to = new int[] { 45051633, 45051624, 45051619, 45051612 };
SequenceI transcript2 = alignment.getSequenceAt(3);
from = new int[] { transcript2.getStart(), transcript2.getEnd() };
- transcript2.setGeneLoci(new GeneLoci("human", "GRCh38", "17",
- new MapList(from, to, 1, 1)));
+ transcript2.setGeneLoci("human", "GRCh38", "17", new MapList(from, to,
+ 1, 1));
/*
* add a protein product as a DBRef on transcript1
* verify variant feature(s) computed and added to protein
* last codon GCT varies to GGT giving A/G in the last peptide position
*/
- SequenceI peptide = al.getSequenceAt(3).getDBRefs()[0].getMap().getTo();
+ DBRefEntry[] dbRefs = al.getSequenceAt(3).getDBRefs();
+ SequenceI peptide = null;
+ for (DBRefEntry dbref : dbRefs)
+ {
+ if (dbref.getMap().getMap().getFromRatio() == 3)
+ {
+ peptide = dbref.getMap().getTo();
+ }
+ }
List<SequenceFeature> proteinFeatures = peptide.getSequenceFeatures();
assertEquals(proteinFeatures.size(), 1);
sf = proteinFeatures.get(0);