exception.resource_not_be_found = The requested resource could not be found
exception.pdb_server_error = There seems to be an error from the PDB server
exception.pdb_server_unreachable = Jalview is unable to reach the PDBe Solr server. \nPlease ensure that you are connected to the internet and try again.
+ label.nw_mapping = Needleman & Wunsch Alignment
+ label.sifts_mapping = SIFTs Mapping
-label.mapping_method = Sequence \u27f7 Structure mapping method
++label.mapping_method = Sequence \u27f7 Structure mapping method
* mapped protein sequences
* @return
*/
- protected static List<SequenceI> makeExonSequences(SequenceI dnaSeq,
- AlignedCodonFrame mapping, AlignedCodonFrame newMapping)
+ protected static List<SequenceI> makeCdsSequences(SequenceI dnaSeq,
+ AlignedCodonFrame mapping, AlignedCodonFrame newMappings)
{
- List<SequenceI> exonSequences = new ArrayList<SequenceI>();
+ List<SequenceI> cdsSequences = new ArrayList<SequenceI>();
List<Mapping> seqMappings = mapping.getMappingsForSequence(dnaSeq);
- final char[] dna = dnaSeq.getSequence();
+
for (Mapping seqMapping : seqMappings)
{
- StringBuilder newSequence = new StringBuilder(dnaSeq.getLength());
+ SequenceI cds = makeCdsSequence(dnaSeq, seqMapping);
+ cdsSequences.add(cds);
/*
- * Get the codon regions as { [2, 5], [7, 12], [14, 14] etc }
+ * add new mappings, from dna to cds, and from cds to peptide
*/
- final List<int[]> dnaExonRanges = seqMapping.getMap().getFromRanges();
- for (int[] range : dnaExonRanges)
+ MapList dnaToCds = addCdsMappings(dnaSeq, cds, seqMapping,
+ newMappings);
+
+ /*
+ * transfer any features on dna that overlap the CDS
+ */
+ transferFeatures(dnaSeq, cds, dnaToCds, "CDS" /* SequenceOntology.CDS */);
+ }
+ return cdsSequences;
+ }
+
+ /**
+ * Transfers any co-located features on 'fromSeq' to 'toSeq', adjusting the
+ * feature start/end ranges, optionally omitting specified feature types.
+ *
+ * @param fromSeq
+ * @param toSeq
+ * @param mapping
+ * the mapping from 'fromSeq' to 'toSeq'
+ * @param omitting
+ */
+ protected static void transferFeatures(SequenceI fromSeq,
+ SequenceI toSeq, MapList mapping, String... omitting)
+ {
+ SequenceI copyTo = toSeq;
+ while (copyTo.getDatasetSequence() != null)
+ {
+ copyTo = copyTo.getDatasetSequence();
+ }
+
+ SequenceFeature[] sfs = fromSeq.getSequenceFeatures();
+ if (sfs != null)
+ {
+ for (SequenceFeature sf : sfs)
{
- for (int pos = range[0]; pos <= range[1]; pos++)
+ String type = sf.getType();
+ boolean omit = false;
+ for (String toOmit : omitting)
+ {
+ if (type.equals(toOmit))
+ {
+ omit = true;
+ }
+ }
+ if (omit)
+ {
+ continue;
+ }
+
+ /*
+ * locate the mapped range - null if either start or end is
+ * not mapped (no partial overlaps are calculated)
+ */
+ int[] mappedTo = mapping.locateInTo(sf.getBegin(), sf.getEnd());
+ if (mappedTo != null)
{
- newSequence.append(dna[pos - 1]);
+ SequenceFeature copy = new SequenceFeature(sf);
+ copy.setBegin(Math.min(mappedTo[0], mappedTo[1]));
+ copy.setEnd(Math.max(mappedTo[0], mappedTo[1]));
+ copyTo.addSequenceFeature(copy);
}
}
+ }
+ }
- SequenceI exon = new Sequence(dnaSeq.getName(),
- newSequence.toString());
+ /**
+ * Creates and adds mappings
+ * <ul>
+ * <li>from cds to peptide</li>
+ * <li>from dna to cds</li>
+ * </ul>
+ * and returns the dna-to-cds mapping
+ *
+ * @param dnaSeq
+ * @param cdsSeq
+ * @param dnaMapping
+ * @param newMappings
+ * @return
+ */
+ protected static MapList addCdsMappings(SequenceI dnaSeq,
+ SequenceI cdsSeq,
+ Mapping dnaMapping, AlignedCodonFrame newMappings)
+ {
+ cdsSeq.createDatasetSequence();
- /*
- * Locate any xrefs to CDS database on the protein product and attach to
- * the CDS sequence. Also add as a sub-token of the sequence name.
- */
- // default to "CDS" if we can't locate an actual gene id
- String cdsAccId = FeatureProperties
- .getCodingFeature(DBRefSource.EMBL);
- DBRefEntry[] cdsRefs = DBRefUtils.selectRefs(seqMapping.getTo()
- .getDBRefs(), DBRefSource.CODINGDBS);
- if (cdsRefs != null)
+ /*
+ * CDS to peptide is just a contiguous 3:1 mapping, with
+ * the peptide ranges taken unchanged from the dna mapping
+ */
+ List<int[]> cdsRanges = new ArrayList<int[]>();
+ cdsRanges.add(new int[] { 1, cdsSeq.getLength() });
+ MapList cdsToPeptide = new MapList(cdsRanges, dnaMapping.getMap()
+ .getToRanges(), 3, 1);
+ newMappings.addMap(cdsSeq.getDatasetSequence(), dnaMapping.getTo(),
+ cdsToPeptide);
+
+ /*
+ * dna 'from' ranges map 1:1 to the contiguous extracted CDS
+ */
+ MapList dnaToCds = new MapList(
+ dnaMapping.getMap().getFromRanges(), cdsRanges, 1, 1);
+ newMappings.addMap(dnaSeq, cdsSeq.getDatasetSequence(), dnaToCds);
+ return dnaToCds;
+ }
+
+ /**
+ * Makes and returns a CDS-only sequence, where the CDS regions are identified
+ * as the 'from' ranges of the mapping on the dna.
+ *
+ * @param dnaSeq
+ * nucleotide sequence
+ * @param seqMapping
+ * mappings from CDS regions of nucleotide
+ * @return
+ */
+ protected static SequenceI makeCdsSequence(SequenceI dnaSeq,
+ Mapping seqMapping)
+ {
+ StringBuilder newSequence = new StringBuilder(dnaSeq.getLength());
+ final char[] dna = dnaSeq.getSequence();
+ int offset = dnaSeq.getStart() - 1;
+
+ /*
+ * Get the codon regions as { [2, 5], [7, 12], [14, 14] etc }
+ */
+ final List<int[]> dnaCdsRanges = seqMapping.getMap().getFromRanges();
+ for (int[] range : dnaCdsRanges)
+ {
+ // TODO handle reverse mapping as well (range[1] < range[0])
+ for (int pos = range[0]; pos <= range[1]; pos++)
{
- for (DBRefEntry cdsRef : cdsRefs)
- {
- exon.addDBRef(new DBRefEntry(cdsRef));
- cdsAccId = cdsRef.getAccessionId();
- }
+ newSequence.append(dna[pos - offset - 1]);
}
- exon.setName(exon.getName() + "|" + cdsAccId);
- exon.createDatasetSequence();
+ }
- /*
- * Build new mappings - from the same protein regions, but now to
- * contiguous exons
- */
- List<int[]> exonRange = new ArrayList<int[]>();
- exonRange.add(new int[] { 1, newSequence.length() });
- MapList map = new MapList(exonRange, seqMapping.getMap()
- .getToRanges(), 3, 1);
- newMapping.addMap(exon.getDatasetSequence(), seqMapping.getTo(), map);
- MapList cdsToDnaMap = new MapList(dnaExonRanges, exonRange, 1, 1);
- newMapping.addMap(dnaSeq, exon.getDatasetSequence(), cdsToDnaMap);
-
- exonSequences.add(exon);
+ SequenceI cds = new Sequence(dnaSeq.getName(),
+ newSequence.toString());
+
+ transferDbRefs(seqMapping.getTo(), cds);
+
+ return cds;
+ }
+
+ /**
+ * Locate any xrefs to CDS databases on the protein product and attach to the
+ * CDS sequence. Also add as a sub-token of the sequence name.
+ *
+ * @param from
+ * @param to
+ */
+ protected static void transferDbRefs(SequenceI from, SequenceI to)
+ {
+ String cdsAccId = FeatureProperties.getCodingFeature(DBRefSource.EMBL);
- DBRefEntry[] cdsRefs = DBRefUtils.selectRefs(from.getDBRef(),
++ DBRefEntry[] cdsRefs = DBRefUtils.selectRefs(from.getDBRefs(),
+ DBRefSource.CODINGDBS);
+ if (cdsRefs != null)
+ {
+ for (DBRefEntry cdsRef : cdsRefs)
+ {
+ to.addDBRef(new DBRefEntry(cdsRef));
+ cdsAccId = cdsRef.getAccessionId();
+ }
+ }
+ if (!to.getName().contains(cdsAccId))
+ {
+ to.setName(to.getName() + "|" + cdsAccId);
}
- return exonSequences;
}
}
{
dss = dss.getDatasetSequence();
}
- DBRefEntry[] rfs = findXDbRefs(dna, dss.getDBRef());
+ DBRefEntry[] rfs = findXDbRefs(dna, dss.getDBRefs());
- for (int r = 0; rfs != null && r < rfs.length; r++)
+ if (rfs != null)
{
- if (!refs.contains(rfs[r].getSource()))
+ for (DBRefEntry ref : rfs)
{
- refs.add(rfs[r].getSource());
+ if (!refs.contains(ref.getSource()))
+ {
+ refs.add(ref.getSource());
+ }
}
}
if (dataset != null)
{
// search for references to this sequence's direct references.
- DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seq.getDBRef());
- DBRefEntry[] lrfs = CrossRef
- .findXDbRefs(!dna, seqs[s].getDBRefs());
++ DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seq.getDBRefs());
List<SequenceI> rseqs = new ArrayList<SequenceI>();
- CrossRef.searchDatasetXrefs(seqs[s], !dna, lrfs, dataset, rseqs,
+ CrossRef.searchDatasetXrefs(seq, !dna, lrfs, dataset, rseqs,
null); // don't need to specify codon frame for mapping here
for (SequenceI rs : rseqs)
{
- DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRef());
- DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRefs()); // not used??
- for (int r = 0; rfs != null && r < rfs.length; r++)
++ DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRefs());
+ if (xrs != null)
{
- if (!refs.contains(rfs[r].getSource()))
+ for (DBRefEntry ref : xrs)
{
- refs.add(rfs[r].getSource());
+ if (!refs.contains(ref.getSource()))
+ {
+ refs.add(ref.getSource());
+ }
}
}
+ // looks like copy and paste - change rfs to xrs?
+ // for (int r = 0; rfs != null && r < rfs.length; r++)
+ // {
+ // if (!refs.contains(rfs[r].getSource()))
+ // {
+ // refs.add(rfs[r].getSource());
+ // }
+ // }
}
}
}
if ((xrfs == null || xrfs.length == 0) && dataset != null)
{
System.out.println("Attempting to find ds Xrefs refs.");
- DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seqs[s].getDBRefs());
+ // FIXME should be dss not seq here?
- DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seq.getDBRef());
++ DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seq.getDBRefs());
// less ambiguous would be a 'find primary dbRefEntry' method.
// filter for desired source xref here
found = CrossRef.searchDatasetXrefs(dss, !dna, lrfs, dataset,
for (int rs = 0; rs < retrieved.length; rs++)
{
// TODO: examine each sequence for 'redundancy'
- DBRefEntry[] dbr = retrieved[rs].getDBRef();
- jalview.datamodel.DBRefEntry[] dbr = retrieved[rs]
- .getDBRefs();
++ DBRefEntry[] dbr = retrieved[rs].getDBRefs();
if (dbr != null && dbr.length > 0)
{
for (int di = 0; di < dbr.length; di++)
return null;
}
+ public IProgressIndicator getProgressIndicator()
+ {
+ return progressIndicator;
+ }
+
+ public void setProgressIndicator(IProgressIndicator progressIndicator)
+ {
+ this.progressIndicator = progressIndicator;
+ }
+
+ public long getProgressSessionId()
+ {
+ return progressSessionId;
+ }
+
+ public void setProgressSessionId(long progressSessionId)
+ {
+ this.progressSessionId = progressSessionId;
+ }
+
+ public void setProgressBar(String message)
+ {
+ progressIndicator.setProgressBar(message, progressSessionId);
+ }
+
+ public List<AlignedCodonFrame> getSequenceMappings()
+ {
+ return seqmappings;
+ }
++
}
char fromGapChar = mapFrom.getAlignment().getGapCharacter();
-- // FIXME allow for hidden columns
--
/*
* For each mapped column, find the range of columns that residues in that
* column map to.
*/
- for (Object obj : colsel.getSelected())
+ List<SequenceI> fromSequences = mapFrom.getAlignment().getSequences();
+ List<SequenceI> toSequences = mapTo.getAlignment().getSequences();
+
+ for (Integer sel : colsel.getSelected())
{
- int col = ((Integer) obj).intValue();
- int mappedToMin = Integer.MAX_VALUE;
- int mappedToMax = Integer.MIN_VALUE;
+ mapColumn(sel.intValue(), codonFrames, mappedColumns, fromSequences,
+ toSequences, fromGapChar);
+ }
+
+ for (int[] hidden : colsel.getHiddenColumns())
+ {
+ mapHiddenColumns(hidden, codonFrames, mappedColumns, fromSequences,
+ toSequences, fromGapChar);
+ }
+ return mappedColumns;
+ }
+
+ /**
+ * Helper method that maps a [start, end] hidden column range to its mapped
+ * equivalent
+ *
+ * @param hidden
+ * @param mappings
+ * @param mappedColumns
+ * @param fromSequences
+ * @param toSequences
+ * @param fromGapChar
+ */
+ protected static void mapHiddenColumns(int[] hidden,
- Set<AlignedCodonFrame> mappings,
++ List<AlignedCodonFrame> mappings,
+ ColumnSelection mappedColumns, List<SequenceI> fromSequences,
+ List<SequenceI> toSequences, char fromGapChar)
+ {
+ for (int col = hidden[0]; col <= hidden[1]; col++)
+ {
+ int[] mappedTo = findMappedColumns(col, mappings, fromSequences,
+ toSequences, fromGapChar);
/*
- * For each sequence in the 'from' alignment
+ * Add the range of hidden columns to the mapped selection (converting
+ * base 1 to base 0).
*/
- for (SequenceI fromSeq : mapFrom.getAlignment().getSequences())
+ if (mappedTo != null)
{
- /*
- * Ignore gaps (unmapped anyway)
- */
- if (fromSeq.getCharAt(col) == fromGapChar)
- {
- continue;
- }
+ mappedColumns.hideColumns(mappedTo[0] - 1, mappedTo[1] - 1);
+ }
+ }
+ }
+
+ /**
+ * Helper method to map one column selection
+ *
+ * @param col
+ * the column number (base 0)
+ * @param mappings
+ * the sequence mappings
+ * @param mappedColumns
+ * the mapped column selections to add to
+ * @param fromSequences
+ * @param toSequences
+ * @param fromGapChar
+ */
- protected static void mapColumn(int col, Set<AlignedCodonFrame> mappings,
++ protected static void mapColumn(int col,
++ List<AlignedCodonFrame> mappings,
+ ColumnSelection mappedColumns, List<SequenceI> fromSequences,
+ List<SequenceI> toSequences, char fromGapChar)
+ {
+ int[] mappedTo = findMappedColumns(col, mappings, fromSequences,
+ toSequences, fromGapChar);
+
+ /*
+ * Add the range of mapped columns to the mapped selection (converting
+ * base 1 to base 0). Note that this may include intron-only regions which
+ * lie between the start and end ranges of the selection.
+ */
+ if (mappedTo != null)
+ {
+ for (int i = mappedTo[0]; i <= mappedTo[1]; i++)
+ {
+ mappedColumns.addElement(i - 1);
+ }
+ }
+ }
+
+ /**
+ * Helper method to find the range of columns mapped to from one column.
+ * Returns the maximal range of columns mapped to from all sequences in the
+ * source column, or null if no mappings were found.
+ *
+ * @param col
+ * @param mappings
+ * @param fromSequences
+ * @param toSequences
+ * @param fromGapChar
+ * @return
+ */
+ protected static int[] findMappedColumns(int col,
- Set<AlignedCodonFrame> mappings, List<SequenceI> fromSequences,
++ List<AlignedCodonFrame> mappings, List<SequenceI> fromSequences,
+ List<SequenceI> toSequences, char fromGapChar)
+ {
+ int[] mappedTo = new int[] { Integer.MAX_VALUE, Integer.MIN_VALUE };
+ boolean found = false;
+
+ /*
+ * For each sequence in the 'from' alignment
+ */
+ for (SequenceI fromSeq : fromSequences)
+ {
+ /*
+ * Ignore gaps (unmapped anyway)
+ */
+ if (fromSeq.getCharAt(col) == fromGapChar)
+ {
+ continue;
+ }
+
+ /*
+ * Get the residue position and find the mapped position.
+ */
+ int residuePos = fromSeq.findPosition(col);
+ SearchResults sr = buildSearchResults(fromSeq, residuePos,
+ mappings);
+ for (Match m : sr.getResults())
+ {
+ int mappedStartResidue = m.getStart();
+ int mappedEndResidue = m.getEnd();
+ SequenceI mappedSeq = m.getSequence();
/*
- * Get the residue position and find the mapped position.
+ * Locate the aligned sequence whose dataset is mappedSeq. TODO a
+ * datamodel that can do this efficiently.
*/
- int residuePos = fromSeq.findPosition(col);
- SearchResults sr = buildSearchResults(fromSeq, residuePos,
- codonFrames);
- for (Match m : sr.getResults())
+ for (SequenceI toSeq : toSequences)
{
- int mappedStartResidue = m.getStart();
- int mappedEndResidue = m.getEnd();
- SequenceI mappedSeq = m.getSequence();
-
- /*
- * Locate the aligned sequence whose dataset is mappedSeq. TODO a
- * datamodel that can do this efficiently.
- */
- for (SequenceI toSeq : mapTo.getAlignment().getSequences())
+ if (toSeq.getDatasetSequence() == mappedSeq)
{
- if (toSeq.getDatasetSequence() == mappedSeq)
- {
- int mappedStartCol = toSeq.findIndex(mappedStartResidue);
- int mappedEndCol = toSeq.findIndex(mappedEndResidue);
- mappedToMin = Math.min(mappedToMin, mappedStartCol);
- mappedToMax = Math.max(mappedToMax, mappedEndCol);
- // System.out.println(fromSeq.getName() + " mapped to cols "
- // + mappedStartCol + ":" + mappedEndCol);
- break;
- // note: remove break if we ever want to map one to many sequences
- }
+ int mappedStartCol = toSeq.findIndex(mappedStartResidue);
+ int mappedEndCol = toSeq.findIndex(mappedEndResidue);
+ mappedTo[0] = Math.min(mappedTo[0], mappedStartCol);
+ mappedTo[1] = Math.max(mappedTo[1], mappedEndCol);
+ found = true;
+ break;
+ // note: remove break if we ever want to map one to many sequences
}
}
}
*
* @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
*/
+ @Override
public AlignmentI getSequenceRecords(String queries) throws Exception
{
- AlignmentI pdbfile = null;
+ AlignmentI pdbAlignment = null;
Vector result = new Vector();
String chain = null;
String id = null;
import java.io.File;
import java.io.FileReader;
import java.io.Reader;
+import java.net.URL;
+ import java.util.ArrayList;
import java.util.Vector;
+import org.exolab.castor.mapping.Mapping;
import org.exolab.castor.xml.Unmarshaller;
import com.stevesoft.pat.Regex;
* @author JimP
*
*/
-public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
+public class Uniprot extends DbSourceProxyImpl
{
--
private static final String BAR_DELIMITER = "|";
- private static final String NEWLINE = "\n";
-
- private static org.exolab.castor.mapping.Mapping map;
++ /*
++ * Castor mapping loaded from uniprot_mapping.xml
++ */
+ private static Mapping map;
/**
* Constructor
mappings.add(acf);
AlignedCodonFrame newMapping = new AlignedCodonFrame();
- List<SequenceI> exons = AlignmentUtils.makeExonSequences(dna1, acf,
+ List<SequenceI> cdsSeqs = AlignmentUtils.makeCdsSequences(dna1, acf,
newMapping);
- assertEquals(1, exons.size());
- SequenceI exon = exons.get(0);
+ assertEquals(1, cdsSeqs.size());
+ SequenceI cdsSeq = cdsSeqs.get(0);
- assertEquals("GGGTTT", exon.getSequenceAsString());
- assertEquals("dna1|A12345", exon.getName());
- assertEquals(1, exon.getDBRefs().length);
- DBRefEntry cdsRef = exon.getDBRefs()[0];
+ assertEquals("GGGTTT", cdsSeq.getSequenceAsString());
+ assertEquals("dna1|A12345", cdsSeq.getName());
- assertEquals(1, cdsSeq.getDBRef().length);
- DBRefEntry cdsRef = cdsSeq.getDBRef()[0];
++ assertEquals(1, cdsSeq.getDBRefs().length);
++ DBRefEntry cdsRef = cdsSeq.getDBRefs()[0];
assertEquals("EMBLCDS", cdsRef.getSource());
assertEquals("2", cdsRef.getVersion());
assertEquals("A12345", cdsRef.getAccessionId());
new SequenceI[] { dna1 }, mappings);
/*
- * Verify we have 3 exon sequences, mapped to pep1/2/3 respectively
+ * Verify we have 3 cds sequences, mapped to pep1/2/3 respectively
*/
- List<SequenceI> exons = exal.getSequences();
- assertEquals(3, exons.size());
-
- SequenceI exon = exons.get(0);
- assertEquals("GGGTTT", exon.getSequenceAsString());
- assertEquals("dna1|A12345", exon.getName());
- assertEquals(1, exon.getDBRefs().length);
- DBRefEntry cdsRef = exon.getDBRefs()[0];
+ List<SequenceI> cds = exal.getSequences();
+ assertEquals(3, cds.size());
+
+ SequenceI cdsSeq = cds.get(0);
+ assertEquals("GGGTTT", cdsSeq.getSequenceAsString());
+ assertEquals("dna1|A12345", cdsSeq.getName());
- assertEquals(1, cdsSeq.getDBRef().length);
- DBRefEntry cdsRef = cdsSeq.getDBRef()[0];
++ assertEquals(1, cdsSeq.getDBRefs().length);
++ DBRefEntry cdsRef = cdsSeq.getDBRefs()[0];
assertEquals("EMBLCDS", cdsRef.getSource());
assertEquals("2", cdsRef.getVersion());
assertEquals("A12345", cdsRef.getAccessionId());
- exon = exons.get(1);
- assertEquals("aaaccc", exon.getSequenceAsString());
- assertEquals("dna1|A12346", exon.getName());
- assertEquals(1, exon.getDBRefs().length);
- cdsRef = exon.getDBRefs()[0];
+ cdsSeq = cds.get(1);
+ assertEquals("aaaccc", cdsSeq.getSequenceAsString());
+ assertEquals("dna1|A12346", cdsSeq.getName());
- assertEquals(1, cdsSeq.getDBRef().length);
- cdsRef = cdsSeq.getDBRef()[0];
++ assertEquals(1, cdsSeq.getDBRefs().length);
++ cdsRef = cdsSeq.getDBRefs()[0];
assertEquals("EMBLCDS", cdsRef.getSource());
assertEquals("3", cdsRef.getVersion());
assertEquals("A12346", cdsRef.getAccessionId());
- exon = exons.get(2);
- assertEquals("aaaTTT", exon.getSequenceAsString());
- assertEquals("dna1|A12347", exon.getName());
- assertEquals(1, exon.getDBRefs().length);
- cdsRef = exon.getDBRefs()[0];
+ cdsSeq = cds.get(2);
+ assertEquals("aaaTTT", cdsSeq.getSequenceAsString());
+ assertEquals("dna1|A12347", cdsSeq.getName());
- assertEquals(1, cdsSeq.getDBRef().length);
- cdsRef = cdsSeq.getDBRef()[0];
++ assertEquals(1, cdsSeq.getDBRefs().length);
++ cdsRef = cdsSeq.getDBRefs()[0];
assertEquals("EMBLCDS", cdsRef.getSource());
assertEquals("4", cdsRef.getVersion());
assertEquals("A12347", cdsRef.getAccessionId());
assertEquals("ABCDEF", derived.getDatasetSequence()
.getSequenceAsString());
}
+
+ @Test(groups = { "Functional" })
+ public void testCopyConstructor_noDataset()
+ {
+ SequenceI seq1 = new Sequence("Seq1", "AB-C.D EF");
+ seq1.setDescription("description");
+ seq1.addAlignmentAnnotation(new AlignmentAnnotation("label", "desc",
+ 1.3d));
+ seq1.addSequenceFeature(new SequenceFeature("type", "desc", 22, 33,
+ 12.4f, "group"));
+ seq1.addPDBId(new PDBEntry("1A70", "B", Type.PDB, "File"));
+ seq1.addDBRef(new DBRefEntry("EMBL", "1.2", "AZ12345"));
+
+ SequenceI copy = new Sequence(seq1);
+
+ assertNull(copy.getDatasetSequence());
+
+ verifyCopiedSequence(seq1, copy);
+
+ // copy has a copy of the DBRefEntry
+ // this is murky - DBrefs are only copied for dataset sequences
+ // where the test for 'dataset sequence' is 'dataset is null'
+ // but that doesn't distinguish it from an aligned sequence
+ // which has not yet generated a dataset sequence
+ // NB getDBRef looks inside dataset sequence if not null
- DBRefEntry[] dbrefs = copy.getDBRef();
++ DBRefEntry[] dbrefs = copy.getDBRefs();
+ assertEquals(1, dbrefs.length);
- assertFalse(dbrefs[0] == seq1.getDBRef()[0]);
- assertTrue(dbrefs[0].equals(seq1.getDBRef()[0]));
++ assertFalse(dbrefs[0] == seq1.getDBRefs()[0]);
++ assertTrue(dbrefs[0].equals(seq1.getDBRefs()[0]));
+ }
+
+ @Test(groups = { "Functional" })
+ public void testCopyConstructor_withDataset()
+ {
+ SequenceI seq1 = new Sequence("Seq1", "AB-C.D EF");
+ seq1.createDatasetSequence();
+ seq1.setDescription("description");
+ seq1.addAlignmentAnnotation(new AlignmentAnnotation("label", "desc",
+ 1.3d));
+ seq1.addSequenceFeature(new SequenceFeature("type", "desc", 22, 33,
+ 12.4f, "group"));
+ seq1.addPDBId(new PDBEntry("1A70", "B", Type.PDB, "File"));
+ // here we add DBRef to the dataset sequence:
+ seq1.getDatasetSequence().addDBRef(
+ new DBRefEntry("EMBL", "1.2", "AZ12345"));
+
+ SequenceI copy = new Sequence(seq1);
+
+ assertNotNull(copy.getDatasetSequence());
+ assertSame(copy.getDatasetSequence(), seq1.getDatasetSequence());
+
+ verifyCopiedSequence(seq1, copy);
+
+ // getDBRef looks inside dataset sequence and this is shared,
+ // so holds the same dbref objects
- DBRefEntry[] dbrefs = copy.getDBRef();
++ DBRefEntry[] dbrefs = copy.getDBRefs();
+ assertEquals(1, dbrefs.length);
- assertSame(dbrefs[0], seq1.getDBRef()[0]);
++ assertSame(dbrefs[0], seq1.getDBRefs()[0]);
+ }
+
+ /**
+ * Helper to make assertions about a copied sequence
+ *
+ * @param seq1
+ * @param copy
+ */
+ protected void verifyCopiedSequence(SequenceI seq1, SequenceI copy)
+ {
+ // verify basic properties:
+ assertEquals(copy.getName(), seq1.getName());
+ assertEquals(copy.getDescription(), seq1.getDescription());
+ assertEquals(copy.getStart(), seq1.getStart());
+ assertEquals(copy.getEnd(), seq1.getEnd());
+ assertEquals(copy.getSequenceAsString(), seq1.getSequenceAsString());
+
+ // copy has a copy of the annotation:
+ AlignmentAnnotation[] anns = copy.getAnnotation();
+ assertEquals(1, anns.length);
+ assertFalse(anns[0] == seq1.getAnnotation()[0]);
+ assertEquals(anns[0].label, seq1.getAnnotation()[0].label);
+ assertEquals(anns[0].description, seq1.getAnnotation()[0].description);
+ assertEquals(anns[0].score, seq1.getAnnotation()[0].score);
+
+ // copy has a copy of the sequence feature:
+ SequenceFeature[] sfs = copy.getSequenceFeatures();
+ assertEquals(1, sfs.length);
+ assertFalse(sfs[0] == seq1.getSequenceFeatures()[0]);
+ assertTrue(sfs[0].equals(seq1.getSequenceFeatures()[0]));
+
+ // copy has a copy of the PDB entry
+ Vector<PDBEntry> pdbs = copy.getAllPDBEntries();
+ assertEquals(1, pdbs.size());
+ assertFalse(pdbs.get(0) == seq1.getAllPDBEntries().get(0));
+ assertTrue(pdbs.get(0).equals(seq1.getAllPDBEntries().get(0)));
+ }
}
protected void setupMappedAlignments() throws IOException
{
/*
-- * Set up dna and protein Seq1/2/3 with mappings (held on the protein
-- * viewport). Lower case for introns.
++ * Map (upper-case = coding):
++ * Seq1/10-18 AC-GctGtC-T to Seq1/40 -K-P
++ * Seq2/20-27 Tc-GA-G-T-T to Seq2/20-27 L--Q
++ * Seq3/30-38 TtTT-AaCGg- to Seq3/60-61\nG--S
*/
AlignmentI cdna = loadAlignment(">Seq1/10-18\nAC-GctGtC-T\n"
+ ">Seq2/20-27\nTc-GA-G-T-Tc\n" + ">Seq3/30-38\nTtTT-AaCGg-\n",
}
/**
+ * Tests for the method that converts a series of [start, end] ranges to
+ * single positions, where the mapping is to a reverse strand i.e. start is
+ * greater than end point mapped to
+ */
+ @Test(groups = { "Functional" })
+ public void testFlattenRanges_reverseStrand()
+ {
+ assertEquals("[4, 3, 2, 1]",
+ Arrays.toString(MappingUtils.flattenRanges(new int[] { 4, 1 })));
+ assertEquals(
+ "[4, 3, 2, 1]",
+ Arrays.toString(MappingUtils.flattenRanges(new int[] { 4, 3, 2,
+ 1 })));
+ assertEquals(
+ "[4, 3, 2, 1]",
+ Arrays.toString(MappingUtils.flattenRanges(new int[] { 4, 4, 3,
+ 3, 2, 2, 1, 1 })));
+ assertEquals(
+ "[12, 9, 8, 7, 4, 3, 2, 1]",
+ Arrays.toString(MappingUtils.flattenRanges(new int[] { 12, 12,
+ 9, 7, 4, 1 })));
+ // forwards and backwards anyone?
+ assertEquals(
+ "[4, 5, 6, 3, 2, 1]",
+ Arrays.toString(MappingUtils.flattenRanges(new int[] { 4, 6, 3,
+ 1 })));
+ // backwards and forwards
+ assertEquals(
+ "[3, 2, 1, 4, 5, 6]",
+ Arrays.toString(MappingUtils.flattenRanges(new int[] { 3, 1, 4,
+ 6 })));
+ // trailing unpaired start position is ignored:
+ assertEquals(
+ "[12, 9, 8, 7, 4, 3, 2]",
+ Arrays.toString(MappingUtils.flattenRanges(new int[] { 12, 12,
+ 9, 7, 4, 2, 1 })));
+ }
++
++ /**
+ * Test mapping a column selection including hidden columns
+ *
+ * @throws IOException
+ */
+ @Test(groups = { "Functional" })
+ public void testMapColumnSelection_hiddenColumns() throws IOException
+ {
+ setupMappedAlignments();
+
- ColumnSelection colsel = new ColumnSelection();
++ ColumnSelection proteinSelection = new ColumnSelection();
+
+ /*
+ * Column 0 in protein picks up Seq2/L, Seq3/G which map to cols 0-4 and 0-3
+ * in dna respectively, overall 0-4
+ */
- colsel.hideColumns(0);
- ColumnSelection cs = MappingUtils.mapColumnSelection(colsel,
++ proteinSelection.hideColumns(0);
++ ColumnSelection dnaSelection = MappingUtils.mapColumnSelection(proteinSelection,
+ proteinView, dnaView);
- assertEquals("[]", cs.getSelected().toString());
- List<int[]> hidden = cs.getHiddenColumns();
++ assertEquals("[]", dnaSelection.getSelected().toString());
++ List<int[]> hidden = dnaSelection.getHiddenColumns();
+ assertEquals(1, hidden.size());
+ assertEquals("[0, 4]", Arrays.toString(hidden.get(0)));
+
+ /*
+ * Column 1 in protein picks up Seq1/K which maps to cols 0-3 in dna
+ */
- colsel.revealAllHiddenColumns();
- colsel.hideColumns(1);
- cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView);
- hidden = cs.getHiddenColumns();
++ proteinSelection.revealAllHiddenColumns();
++ // the unhidden columns are now marked selected!
++ assertEquals("[0]", proteinSelection.getSelected().toString());
++ // deselect these or hideColumns will be expanded to include 0
++ proteinSelection.clear();
++ proteinSelection.hideColumns(1);
++ dnaSelection = MappingUtils.mapColumnSelection(proteinSelection, proteinView, dnaView);
++ hidden = dnaSelection.getHiddenColumns();
+ assertEquals(1, hidden.size());
+ assertEquals("[0, 3]", Arrays.toString(hidden.get(0)));
+
+ /*
+ * Column 2 in protein picks up gaps only - no mapping
+ */
- colsel.revealAllHiddenColumns();
- colsel.clear();
- colsel.hideColumns(2);
- cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView);
- assertTrue(cs.getHiddenColumns().isEmpty());
++ proteinSelection.revealAllHiddenColumns();
++ proteinSelection.clear();
++ proteinSelection.hideColumns(2);
++ dnaSelection = MappingUtils.mapColumnSelection(proteinSelection, proteinView, dnaView);
++ assertTrue(dnaSelection.getHiddenColumns().isEmpty());
+
+ /*
+ * Column 3 in protein picks up Seq1/P, Seq2/Q, Seq3/S which map to columns
+ * 6-9, 6-10, 5-8 respectively, overall to 5-10
+ */
- colsel.revealAllHiddenColumns();
- colsel.clear();
- colsel.hideColumns(3); // 5-10 hidden in dna
- colsel.addElement(1); // 0-3 selected in dna
- cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView);
- assertEquals("[0, 1, 2, 3]", cs.getSelected().toString());
- hidden = cs.getHiddenColumns();
++ proteinSelection.revealAllHiddenColumns();
++ proteinSelection.clear();
++ proteinSelection.hideColumns(3); // 5-10 hidden in dna
++ proteinSelection.addElement(1); // 0-3 selected in dna
++ dnaSelection = MappingUtils.mapColumnSelection(proteinSelection, proteinView, dnaView);
++ assertEquals("[0, 1, 2, 3]", dnaSelection.getSelected().toString());
++ hidden = dnaSelection.getHiddenColumns();
+ assertEquals(1, hidden.size());
+ assertEquals("[5, 10]", Arrays.toString(hidden.get(0)));
+
+ /*
+ * Combine hiding columns 1 and 3 to get discontiguous hidden columns
+ */
- colsel.revealAllHiddenColumns();
- colsel.clear();
- colsel.hideColumns(1);
- colsel.hideColumns(3);
- cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView);
- hidden = cs.getHiddenColumns();
++ proteinSelection.revealAllHiddenColumns();
++ proteinSelection.clear();
++ proteinSelection.hideColumns(1);
++ proteinSelection.hideColumns(3);
++ dnaSelection = MappingUtils.mapColumnSelection(proteinSelection, proteinView, dnaView);
++ hidden = dnaSelection.getHiddenColumns();
+ assertEquals(2, hidden.size());
+ assertEquals("[0, 3]", Arrays.toString(hidden.get(0)));
+ assertEquals("[5, 10]", Arrays.toString(hidden.get(1)));
+ }
}
FeatureProperties.isCodingFeature(embl.getDbSource(),
sfs[0].getType()));
assertEquals(embl.getDbSource(), sfs[0].getFeatureGroup());
- DBRefEntry[] dr = DBRefUtils.selectRefs(seq.getDBRef(),
+ DBRefEntry[] dr = DBRefUtils.selectRefs(seq.getDBRefs(),
- DBRefSource.PROTEINSEQ);
+ new String[] { DBRefSource.UNIPROT, DBRefSource.UNIPROTKB,
+ DBRefSource.EMBLCDSProduct, DBRefSource.ENSEMBL });
assertNotNull(dr);
assertEquals("Expected a single Uniprot cross reference", 1, dr.length);
assertEquals("Expected cross reference map to be one amino acid", dr[0]