+ clearGeneFeatures(gene);
+ }
+
+ /**
+ * Remove unwanted features (transcript, exon, CDS) from the gene sequence
+ * after we have used them to derive transcripts and transfer features
+ *
+ * @param gene
+ */
+ protected void clearGeneFeatures(SequenceI gene)
+ {
+ /*
+ * Note we include NMD_transcript_variant here because it behaves like
+ * 'transcript' in Ensembl, although strictly speaking it is not
+ * (it is a sub-type of sequence_variant)
+ */
+ String[] soTerms = new String[] {
+ SequenceOntologyI.NMD_TRANSCRIPT_VARIANT,
+ SequenceOntologyI.TRANSCRIPT, SequenceOntologyI.EXON,
+ SequenceOntologyI.CDS };
+ List<SequenceFeature> sfs = gene.getFeatures().getFeaturesByOntology(
+ soTerms);
+ for (SequenceFeature sf : sfs)
+ {
+ gene.deleteFeature(sf);
+ }
+ }
+
+ /**
+ * Constructs a spliced transcript sequence by finding 'exon' features for the
+ * given id (or failing that 'CDS'). Copies features on to the new sequence.
+ * 'Aligns' the new sequence against the gene sequence by padding with gaps,
+ * and adds it to the alignment.
+ *
+ * @param transcriptFeature
+ * @param al
+ * the alignment to which to add the new sequence
+ * @param gene
+ * the parent gene sequence, with features
+ * @return
+ */
+ SequenceI makeTranscript(SequenceFeature transcriptFeature, AlignmentI al,
+ SequenceI gene)
+ {
+ String accId = getTranscriptId(transcriptFeature);
+ if (accId == null)
+ {
+ return null;
+ }
+
+ /*
+ * NB we are mapping from gene sequence (not genome), so do not
+ * need to check for reverse strand (gene and transcript sequences
+ * are in forward sense)
+ */
+
+ /*
+ * make a gene-length sequence filled with gaps
+ * we will fill in the bases for transcript regions
+ */
+ char[] seqChars = new char[gene.getLength()];
+ Arrays.fill(seqChars, al.getGapCharacter());
+
+ /*
+ * look for exon features of the transcript, failing that for CDS
+ * (for example ENSG00000124610 has 1 CDS but no exon features)
+ */
+ String parentId = "transcript:" + accId;
+ List<SequenceFeature> splices = findFeatures(gene,
+ SequenceOntologyI.EXON, parentId);
+ if (splices.isEmpty())
+ {
+ splices = findFeatures(gene, SequenceOntologyI.CDS, parentId);
+ }
+ SequenceFeatures.sortFeatures(splices, true);
+
+ int transcriptLength = 0;
+ final char[] geneChars = gene.getSequence();
+ int offset = gene.getStart(); // to convert to 0-based positions
+ List<int[]> mappedFrom = new ArrayList<int[]>();