return (forwards ? 1 : -1) * Integer.compare(o1[0], o2[0]);
}
- };
+ }
+
+ /*
+ * genomic sequence, with features retrieved from the REST overlap service
+ */
+ private SequenceI genomicSequence;
/**
* Constructor
/*
* get 'dummy' genomic sequence with exon, cds and variation features
*/
- EnsemblOverlap gffFetcher = new EnsemblOverlap();
- EnsemblFeatureType[] features = getFeaturesToFetch();
- AlignmentI geneFeatures = gffFetcher.getSequenceRecords(accId,
- features);
- if (geneFeatures.getHeight() > 0)
+ if (genomicSequence == null)
+ {
+ EnsemblOverlap gffFetcher = new EnsemblOverlap();
+ EnsemblFeatureType[] features = getFeaturesToFetch();
+ AlignmentI geneFeatures = gffFetcher.getSequenceRecords(accId,
+ features);
+ if (geneFeatures.getHeight() > 0)
+ {
+ /*
+ * transfer features to the query sequence
+ */
+ genomicSequence = geneFeatures.getSequenceAt(0);
+ }
+ }
+ if (genomicSequence != null)
{
- /*
- * transfer features to the query sequence
- */
- SequenceI genomicSequence = geneFeatures.getSequenceAt(0);
SequenceI querySeq = alignment.findName(accId);
- transferFeatures(accId, genomicSequence, querySeq);
+ if (transferFeatures(accId, genomicSequence, querySeq))
+ {
- /*
- * fetch and map protein product, and add it as a cross-reference
- * of the retrieved sequence
- */
- addProteinProduct(querySeq);
+ /*
+ * fetch and map protein product, and add it as a cross-reference
+ * of the retrieved sequence
+ */
+ addProteinProduct(querySeq);
+ }
}
} catch (IOException e)
{
}
@Override
- public boolean useGetRequest()
+ protected boolean useGetRequest()
{
return false;
}
@Override
- public String getRequestMimeType()
+ protected String getRequestMimeType()
{
return "application/json";
}
@Override
- public String getResponseMimeType()
+ protected String getResponseMimeType()
{
return "text/x-fasta";
}
* backwards (for negative strand). Aborts and returns null if both positive
* and negative strand are found (this should not normally happen).
*
- * @param sfs
+ * @param sourceSequence
* @param accId
+ * @param start
+ * the start position of the sequence we are mapping to
* @return
*/
- protected MapList getGenomicRanges(SequenceFeature[] sfs, String accId)
+ protected MapList getGenomicRanges(SequenceI sourceSequence,
+ String accId, int start)
{
+ SequenceFeature[] sfs = sourceSequence.getSequenceFeatures();
+ if (sfs == null)
+ {
+ return null;
+ }
+
/*
* generously size for initial number of cds regions
* (worst case titin Q8WZ42 has c. 313 exons)
*/
List<int[]> regions = new ArrayList<int[]>(100);
+ int sourceLength = sourceSequence.getLength();
int mappedLength = 0;
int direction = 1; // forward
boolean directionSet = false;
}
else
{
- regions.add(new int[] { sf.getBegin(), sf.getEnd() });
- }
- mappedLength += Math.abs(sf.getEnd() - sf.getBegin() + 1);
+ regions.add(new int[] { sf.getBegin(), sf.getEnd() });
}
+ mappedLength += Math.abs(sf.getEnd() - sf.getBegin() + 1);
+
+ if (mappedLength >= sourceLength)
+ {
+ /*
+ * break for the case of matching gene features v gene sequence
+ * - only need to locate the 'gene' feature for accId
+ */
+ break;
+ }
+ }
}
+ if (regions.isEmpty())
+ {
+ System.out.println("Failed to identify target sequence for " + accId
+ + " from genomic features");
+ return null;
+ }
+
/*
* a final sort is needed since Ensembl returns CDS sorted within source
* (havana / ensembl_havana)
Collections.sort(regions, new RangeSorter(direction == 1));
List<int[]> to = new ArrayList<int[]>();
- to.add(new int[] { 1, mappedLength });
+ to.add(new int[] { start, start + mappedLength - 1 });
return new MapList(regions, to, 1, 1);
}
/**
- * Returns true if the sequence feature identifies positions of the genomic
- * sequence feature which are within the sequence being retrieved.
+ * Returns true if the sequence feature marks positions of the genomic
+ * sequence feature which are within the sequence being retrieved. For
+ * example, an 'exon' feature whose parent is the target transcript marks the
+ * cdna positions of the transcript.
*
* @param sf
* @param accId
protected void transferFeature(SequenceFeature sf,
SequenceI targetSequence, MapList overlap)
{
- String parent = (String) sf.getValue(PARENT);
- if (parent != null && !parent.contains(targetSequence.getName()))
- {
- // this genomic feature belongs to a different transcript
- return;
- }
-
int start = sf.getBegin();
int end = sf.getEnd();
int[] mappedRange = overlap.locateInTo(start, end);
if (mappedRange != null)
{
SequenceFeature copy = new SequenceFeature(sf);
- int offset = targetSequence.getStart() - 1;
- copy.setBegin(offset + Math.min(mappedRange[0], mappedRange[1]));
- copy.setEnd(offset + Math.max(mappedRange[0], mappedRange[1]));
+ copy.setBegin(Math.min(mappedRange[0], mappedRange[1]));
+ copy.setEnd(Math.max(mappedRange[0], mappedRange[1]));
targetSequence.addSequenceFeature(copy);
/*
* @param accessionId
* @param sourceSequence
* @param targetSequence
+ * @return true if any features were transferred, else false
*/
- protected void transferFeatures(String accessionId,
+ protected boolean transferFeatures(String accessionId,
SequenceI sourceSequence, SequenceI targetSequence)
{
if (sourceSequence == null || targetSequence == null)
{
- return;
+ return false;
}
SequenceFeature[] sfs = sourceSequence.getSequenceFeatures();
- MapList overlap = getGenomicRanges(sfs, accessionId);
+ MapList overlap = getGenomicRanges(sourceSequence, accessionId,
+ targetSequence.getStart());
+ if (overlap == null)
+ {
+ return false;
+ }
final boolean forwardStrand = overlap.isFromForwardStrand();
}
});
+ boolean transferred = false;
for (SequenceFeature sf : sfs)
{
- if (retainFeature(sf.getType()))
+ if (retainFeature(sf, accessionId))
{
transferFeature(sf, targetSequence, overlap);
+ transferred = true;
}
}
+ return transferred;
}
/**
- * Answers true if the feature type is one to attach to the retrieved sequence
+ * Answers true if the feature is one to attach to the retrieved sequence
*
* @param type
* @return
*/
- protected boolean retainFeature(@SuppressWarnings("unused") String type)
+ protected boolean retainFeature(SequenceFeature sf, String accessionId)
+ {
+ String parent = (String) sf.getValue(PARENT);
+ if (parent != null && !parent.contains(accessionId))
+ {
+ // this genomic feature belongs to a different transcript
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public String getDescription()
+ {
+ return "Ensembl " + getSourceEnsemblType().getType()
+ + " sequence with variant features";
+ }
+
+ public AlignmentI getSequenceRecords(String transcriptId,
+ SequenceI geneSeq) throws Exception
{
- return true; // default is to keep all
+ this.genomicSequence = geneSeq;
+ return getSequenceRecords(transcriptId);
}
}