* Constructs an alignment consisting of the mapped (CDS) regions in the given
* nucleotide sequences, and updates mappings to match. The CDS sequences are
* added to the original alignment's dataset, which is shared by the new
- * alignment.
+ * alignment. Mappings from nucleotide to CDS, and from CDS to protein, are
+ * added to the alignment dataset.
*
* @param dna
* aligned dna sequences
* @param mappings
- * from dna to protein; these are replaced with new mappings
+ * from dna to protein
* @param al
* @return an alignment whose sequences are the cds-only parts of the dna
* sequences (or null if no mappings are found)
Map<Integer, Map<SequenceI, Character>> map = new TreeMap<Integer, Map<SequenceI, Character>>();
/*
- * report any sequences that have no mapping so can't be realigned
+ * r any sequences that have no mapping so can't be realigned
*/
unmapped.addAll(unaligned.getSequences());
SequenceI fromSeq = mapping.findAlignedSequence(seq, aligned);
if (fromSeq != null)
{
- Mapping seqMap = mapping.getMappingForSequence(seq);
+ Mapping seqMap = mapping.getMappingBetween(fromSeq, seq);
if (addMappedPositions(seq, fromSeq, seqMap, map))
{
unmapped.remove(seq);
static boolean addMappedPositions(SequenceI seq, SequenceI fromSeq,
Mapping seqMap, Map<Integer, Map<SequenceI, Character>> map)
{
+ if (seqMap == null)
+ {
+ return false;
+ }
+
char[] fromChars = fromSeq.getSequence();
int toStart = seq.getStart();
char[] toChars = seq.getSequence();
}
return true;
}
+
+ // strictly temporary hack until proper criteria for aligning protein to cds
+ // are in place; this is so Ensembl -> fetch xrefs Uniprot aligns the Uniprot
+ public static boolean looksLikeEnsembl(AlignmentI alignment)
+ {
+ for (SequenceI seq : alignment.getSequences())
+ {
+ String name = seq.getName();
+ if (!name.startsWith("ENSG") && !name.startsWith("ENST"))
+ {
+ return false;
+ }
+ }
+ return true;
+ }
}