* aligned dna sequences
* @param dataset
* - throws error if not given a dataset
+ * @param products
+ * (optional) to restrict results to CDS that map to specified
+ * protein products
* @return an alignment whose sequences are the cds-only parts of the dna
* sequences (or null if no mappings are found)
*/
public static AlignmentI makeCdsAlignment(SequenceI[] dna,
- AlignmentI dataset)
+ AlignmentI dataset, AlignmentI products)
{
if (dataset.getDataset() != null)
{
}
List<SequenceI> cdsSeqs = new ArrayList<SequenceI>();
List<AlignedCodonFrame> mappings = dataset.getCodonFrames();
-
+ HashSet<SequenceI> productSeqs = null;
+ if (products != null)
+ {
+ productSeqs = new HashSet<SequenceI>();
+ for (SequenceI seq : products.getSequences())
+ {
+ productSeqs.add(seq.getDatasetSequence() == null ? seq : seq
+ .getDatasetSequence());
+ }
+ }
/*
* construct CDS sequences from the (cds-to-protein) mappings made earlier;
* the dna mapping's product
*/
SequenceI cdsSeq = null;
+
// TODO better mappings collection data model so we can do
- // a table lookup instead of double loops to find mappings
+ // a direct lookup instead of double loops to find mappings
+
SequenceI proteinProduct = aMapping.getTo();
+
+ /*
+ * skip if not mapped to one of a specified set of proteins
+ */
+ if (productSeqs != null && !productSeqs.contains(proteinProduct))
+ {
+ continue;
+ }
+
for (AlignedCodonFrame acf : MappingUtils
.findMappingsForSequence(proteinProduct, mappings))
{
AlignmentI cds = new Alignment(cdsSeqs.toArray(new SequenceI[cdsSeqs
.size()]));
- cds.setDataset((Alignment) dataset);
+ cds.setDataset(dataset);
return cds;
}