public class EnsemblCdna extends EnsemblSeqProxy
{
- // TODO modify to accept other species e.g. ENSMUSPnnn
+ /*
+ * accepts ENST or ENSTG with 11 digits
+ * or ENSMUST or similar for other species
+ * or CCDSnnnnn.nn with at least 3 digits
+ */
private static final Regex ACCESSION_REGEX = new Regex(
- "(ENST|ENSG|CCDS)[0-9.]{3,}$");
+ "(ENS([A-Z]{3}|)[TG][0-9]{11}$)" + "|" + "(CCDS[0-9.]{3,}$)");
/*
* fetch exon features on genomic sequence (to identify the cdna regions)
public class EnsemblProtein extends EnsemblSeqProxy
{
- // TODO modify to accept other species e.g. ENSMUSPnnn
+ /*
+ * accepts ENSP with 11 digits
+ * or ENSMUSP or similar for other species
+ * or CCDSnnnnn.nn with at least 3 digits
+ */
private static final Regex ACCESSION_REGEX = new Regex(
- "(ENSP|CCDS)[0-9.]{3,}$");
+ "(ENS([A-Z]{3}|)P[0-9]{11}$)" + "|" + "(CCDS[0-9.]{3,}$)");
private static final List<String> CROSSREFS = Arrays.asList(new String[] {
"PDB", "Uniprot/SPTREMBL", "Uniprot/SWISSPROT" });
import javax.ws.rs.HttpMethod;
+import com.stevesoft.pat.Regex;
+
/**
* Base class for Ensembl REST service clients
*
private final static long RETEST_INTERVAL = 10000L; // 10 seconds
+ private static final Regex TRANSCRIPT_REGEX = new Regex(
+ "(ENS)([A-Z]{3}|)T[0-9]{11}$");
+
+ private static final Regex GENE_REGEX = new Regex(
+ "(ENS)([A-Z]{3}|)G[0-9]{11}$");
+
private static boolean ensemblRestAvailable = false;
private static long lastCheck = -1;
protected volatile boolean inProgress = false;
+ public static boolean isTranscriptIdentifier(String query)
+ {
+ return query == null ? false : TRANSCRIPT_REGEX.search(query);
+ }
+
+ public static boolean isGeneIdentifier(String query)
+ {
+ return query == null ? false : GENE_REGEX.search(query);
+ }
+
@Override
public boolean queryInProgress()
{
import java.util.List;
import java.util.Map.Entry;
-import com.stevesoft.pat.Regex;
-
/**
* Base class for Ensembl sequence fetchers
*
*/
public abstract class EnsemblSeqProxy extends EnsemblRestClient
{
- // TODO modify to accept other species e.g. ENSMUSTnnn
- private static final Regex TRANSCRIPT_REGEX = new Regex(
- "(ENST)[0-9]{11}$");
-
private static final List<String> CROSS_REFERENCES = Arrays
- .asList(new String[] { "CCDS" });
+ .asList(new String[] { "CCDS", "Uniprot/SWISSPROT" });
protected static final String CONSEQUENCE_TYPE = "consequence_type";
}
}
+ if (alignment == null)
+ {
+ return null;
+ }
+
/*
* fetch and transfer genomic sequence features,
* fetch protein product and add as cross-reference
int mappedDnaLength = getCdsRanges(dnaSeq, ranges);
int proteinLength = proteinSeq.getLength();
- List<int[]> proteinRange = new ArrayList<int[]>();
int proteinStart = 1;
/*
proteinStart = 2;
proteinLength--;
}
- proteinRange.add(new int[] { proteinStart, proteinLength });
+ List<int[]> proteinRange = new ArrayList<int[]>();
/*
* dna length should map to protein (or protein plus stop codon)
*/
int codesForResidues = mappedDnaLength / 3;
- if (codesForResidues == proteinLength
- || codesForResidues == (proteinLength + 1))
+ if (codesForResidues == (proteinLength + 1))
+ {
+ MappingUtils.unmapStopCodon(ranges, mappedDnaLength);
+ codesForResidues--;
+ }
+ if (codesForResidues == proteinLength)
{
+ proteinRange.add(new int[] { proteinStart, proteinLength });
return new MapList(ranges, proteinRange, 3, 1);
}
return null;
{
return 0;
}
+ SequenceOntologyI so = SequenceOntologyFactory.getInstance();
int mappedDnaLength = 0;
for (SequenceFeature sf : sfs)
{
/*
* process a CDS feature (or a sub-type of CDS)
*/
- if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
- SequenceOntologyI.CDS))
+ if (so.isA(sf.getType(), SequenceOntologyI.CDS))
{
int phase = 0;
try {
*/
int begin = sf.getBegin();
int end = sf.getEnd();
- if (ranges.isEmpty() && phase > 0)
+ if (ranges.isEmpty())
{
begin += phase;
if (begin > end)
|| SequenceOntologyFactory.getInstance().isA(featureType,
SequenceOntologyI.TRANSCRIPT);
}
-
- public static boolean isTranscriptIdentifier(String query)
- {
- return query == null ? false : TRANSCRIPT_REGEX.search(query);
- }
}
*/
abstract class EnsemblSequenceFetcher extends DbSourceProxyImpl
{
- // TODO modify to accept other species e.g. ENSMUSTnnn
+ /*
+ * accepts ENSG/T/E/P with 11 digits
+ * or ENSMUSP or similar for other species
+ * or CCDSnnnnn.nn with at least 3 digits
+ */
private static final Regex ACCESSION_REGEX = new Regex(
- "(ENSP|ENST|ENSG|CCDS)[0-9.]{3,}$");
+ "(ENS([A-Z]{3}|)[GTEP]{1}[0-9]{11}$)" + "|" + "(CCDS[0-9.]{3,}$)");
/*
* possible values for the 'feature' parameter of the /overlap REST service
* service
*
* @author gmcarstairs
- *
+ * @see http://rest.ensembl.org/documentation/info/xref_id
*/
class EnsemblXref extends EnsemblRestClient
{
@Override
protected URL getUrl(List<String> ids) throws MalformedURLException
{
- // TODO Auto-generated method stub
- return null;
+ return getUrl(ids.get(0));
}
@Override
import java.util.List;
+import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
sf.setType("CDS");
assertFalse(testee.identifiesSequence(sf, accId));
}
+
+ @Test(groups = "Functional")
+ public void testIsValidReference() throws Exception
+ {
+ EnsemblSequenceFetcher esq = new EnsemblCdna();
+ Assert.assertTrue(esq.isValidReference("CCDS5863.1"));
+ Assert.assertTrue(esq.isValidReference("ENST00000288602"));
+ Assert.assertTrue(esq.isValidReference("ENSG00000288602"));
+ Assert.assertFalse(esq.isValidReference("ENSP00000288602"));
+ Assert.assertFalse(esq.isValidReference("ENST0000288602"));
+ // non-human species having a 3 character identifier included:
+ Assert.assertTrue(esq.isValidReference("ENSMUSG00000099398"));
+ }
}
import java.util.List;
+import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
assertFalse(testee.identifiesSequence(sf, accId));
}
+ @Test(groups = "Functional")
+ public void testIsValidReference() throws Exception
+ {
+ EnsemblSequenceFetcher esq = new EnsemblCds();
+ Assert.assertTrue(esq.isValidReference("CCDS5863.1"));
+ Assert.assertTrue(esq.isValidReference("ENST00000288602"));
+ Assert.assertTrue(esq.isValidReference("ENSG00000288602"));
+ Assert.assertTrue(esq.isValidReference("ENSP00000288602"));
+ Assert.assertFalse(esq.isValidReference("ENST0000288602"));
+ // non-human species have a 3 character identifier included:
+ Assert.assertTrue(esq.isValidReference("ENSMUSG00000099398"));
+ }
+
}
Assert.assertTrue(esq.isValidReference("ENSP00000288602"));
Assert.assertFalse(esq.isValidReference("ENST00000288602"));
Assert.assertFalse(esq.isValidReference("ENSG00000288602"));
+ // non-human species having a 3 character identifier included:
+ Assert.assertTrue(esq.isValidReference("ENSMUSP00000099398"));
}
@Test(groups = "Functional")
assertFalse(EnsemblSeqProxy.isTranscriptIdentifier(""));
assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("ENSG00000012345"));
assertTrue(EnsemblSeqProxy.isTranscriptIdentifier("ENST00000012345"));
+ assertTrue(EnsemblSeqProxy.isTranscriptIdentifier("ENSMUST00000012345"));
assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("enst00000012345"));
assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("ENST000000123456"));
assertFalse(EnsemblSeqProxy.isTranscriptIdentifier("ENST0000001234"));
}
+
+ @Test(groups = "Functional")
+ public void testIsGeneIdentifier()
+ {
+ assertFalse(EnsemblSeqProxy.isGeneIdentifier(null));
+ assertFalse(EnsemblSeqProxy.isGeneIdentifier(""));
+ assertFalse(EnsemblSeqProxy.isGeneIdentifier("ENST00000012345"));
+ assertTrue(EnsemblSeqProxy.isGeneIdentifier("ENSG00000012345"));
+ assertTrue(EnsemblSeqProxy.isGeneIdentifier("ENSMUSG00000012345"));
+ assertFalse(EnsemblSeqProxy.isGeneIdentifier("ensg00000012345"));
+ assertFalse(EnsemblSeqProxy.isGeneIdentifier("ENSG000000123456"));
+ assertFalse(EnsemblSeqProxy.isGeneIdentifier("ENSG0000001234"));
+ }
}
\ No newline at end of file