+ transcript.createDatasetSequence();
+
+ al.addSequence(transcript);
+
+ /*
+ * transfer features to the new sequence; we use EnsemblCdna to do this,
+ * to filter out unwanted features types (see method retainFeature)
+ */
+ List<int[]> mapTo = new ArrayList<>();
+ mapTo.add(new int[] { 1, transcriptLength });
+ MapList mapping = new MapList(mappedFrom, mapTo, 1, 1);
+ EnsemblCdna cdna = new EnsemblCdna(getDomain());
+ cdna.transferFeatures(gene.getFeatures().getPositionalFeatures(),
+ transcript.getDatasetSequence(), mapping, parentId);
+
+ mapTranscriptToChromosome(transcript, gene, mapping);
+
+ /*
+ * fetch and save cross-references
+ */
+ cdna.getCrossReferences(transcript);
+
+ /*
+ * and finally fetch the protein product and save as a cross-reference
+ */
+ cdna.addProteinProduct(transcript);
+
+ return transcript;
+ }
+
+ /**
+ * If the gene has a mapping to chromosome coordinates, derive the transcript
+ * chromosome regions and save on the transcript sequence
+ *
+ * @param transcript
+ * @param gene
+ * @param mapping
+ * the mapping from gene to transcript positions
+ */
+ protected void mapTranscriptToChromosome(SequenceI transcript,
+ SequenceI gene, MapList mapping)
+ {
+ GeneLociI loci = gene.getGeneLoci();
+ if (loci == null)
+ {
+ return;
+ }
+
+ MapList geneMapping = loci.getMap();
+
+ List<int[]> exons = mapping.getFromRanges();
+ List<int[]> transcriptLoci = new ArrayList<>();
+
+ for (int[] exon : exons)
+ {
+ transcriptLoci.add(geneMapping.locateInTo(exon[0], exon[1]));
+ }
+
+ List<int[]> transcriptRange = Arrays.asList(new int[] {
+ transcript.getStart(), transcript.getEnd() });
+ MapList mapList = new MapList(transcriptRange, transcriptLoci, 1, 1);
+
+ transcript.setGeneLoci(loci.getSpeciesId(), loci.getAssemblyId(),
+ loci.getChromosomeId(), mapList);
+ }
+
+ /**
+ * Returns the 'transcript_id' property of the sequence feature (or null)
+ *
+ * @param feature
+ * @return
+ */
+ protected String getTranscriptId(SequenceFeature feature)
+ {
+ return (String) feature.getValue(JSON_ID);
+ }
+
+ /**
+ * Returns a list of the transcript features on the sequence whose Parent is
+ * the gene for the accession id.
+ * <p>
+ * Transcript features are those of type "transcript", or any of its sub-types
+ * in the Sequence Ontology e.g. "mRNA", "processed_transcript". We also
+ * include "NMD_transcript_variant", because this type behaves like a
+ * transcript identifier in Ensembl, although strictly speaking it is not in
+ * the SO.
+ *
+ * @param accId
+ * @param geneSequence
+ * @return
+ */
+ protected List<SequenceFeature> getTranscriptFeatures(String accId,
+ SequenceI geneSequence)
+ {
+ List<SequenceFeature> transcriptFeatures = new ArrayList<>();
+
+ String parentIdentifier = accId;
+
+ List<SequenceFeature> sfs = geneSequence.getFeatures()
+ .getFeaturesByOntology(SequenceOntologyI.TRANSCRIPT);
+ sfs.addAll(geneSequence.getFeatures().getPositionalFeatures(
+ SequenceOntologyI.NMD_TRANSCRIPT_VARIANT));
+
+ for (SequenceFeature sf : sfs)