X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fext%2Fensembl%2FEnsemblSeqProxy.java;h=b2ebb1ac5dc4db1c833c16def46b67431cc3de16;hb=41bb6b94cc3cba97055e0ec02ac42c52f1483700;hp=dda77d7b82cfc06fe2e7b1171153df7372ff9a68;hpb=85a66ca345b3831b9925bc421084db9c2af3054b;p=jalview.git diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java index dda77d7..b2ebb1a 100644 --- a/src/jalview/ext/ensembl/EnsemblSeqProxy.java +++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java @@ -59,10 +59,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient { private static final String ALLELES = "alleles"; - protected static final String PARENT = "Parent"; - - protected static final String ID = "ID"; - protected static final String NAME = "Name"; protected static final String DESCRIPTION = "description"; @@ -137,8 +133,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient // danger: accession separator used as a regex here, a string elsewhere // in this case it is ok (it is just a space), but (e.g.) '\' would not be - List allIds = Arrays.asList(query - .split(getAccessionSeparator())); + List allIds = Arrays + .asList(query.split(getAccessionSeparator())); AlignmentI alignment = null; inProgress = true; @@ -207,7 +203,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient try { /* - * get 'dummy' genomic sequence with exon, cds and variation features + * get 'dummy' genomic sequence with gene, transcript, + * exon, cds and variation features */ SequenceI genomicSequence = null; EnsemblFeatures gffFetcher = new EnsemblFeatures(getDomain()); @@ -223,7 +220,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient /* * transfer features to the query sequence */ - SequenceI querySeq = alignment.findName(accId); + SequenceI querySeq = alignment.findName(accId, true); if (transferFeatures(accId, genomicSequence, querySeq)) { @@ -236,8 +233,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient } } catch (IOException e) { - System.err.println("Error transferring Ensembl features: " - + e.getMessage()); + System.err.println( + "Error transferring Ensembl features: " + e.getMessage()); } } @@ -275,8 +272,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient proteinSeq.createDatasetSequence(); querySeq.createDatasetSequence(); - MapList mapList = AlignmentUtils - .mapCdsToProtein(querySeq, proteinSeq); + MapList mapList = AlignmentUtils.mapCdsToProtein(querySeq, + proteinSeq); if (mapList != null) { // clunky: ensure Uniprot xref if we have one is on mapped sequence @@ -287,9 +284,11 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient getEnsemblDataVersion(), proteinSeq.getName(), map); querySeq.getDatasetSequence().addDBRef(dbr); DBRefEntry[] uprots = DBRefUtils.selectRefs(ds.getDBRefs(), - new String[] { DBRefSource.UNIPROT }); + new String[] + { DBRefSource.UNIPROT }); DBRefEntry[] upxrefs = DBRefUtils.selectRefs(querySeq.getDBRefs(), - new String[] { DBRefSource.UNIPROT }); + new String[] + { DBRefSource.UNIPROT }); if (uprots != null) { for (DBRefEntry up : uprots) @@ -304,8 +303,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient if (upx.size() > 1) { - Cache.log - .warn("Implementation issue - multiple uniprot acc on product sequence."); + Cache.log.warn( + "Implementation issue - multiple uniprot acc on product sequence."); } } else @@ -330,8 +329,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient * copy exon features to protein, compute peptide variants from dna * variants and add as features on the protein sequence ta-da */ - AlignmentUtils - .computeProteinFeatures(querySeq, proteinSeq, mapList); + AlignmentUtils.computeProteinFeatures(querySeq, proteinSeq, + mapList); } } catch (Exception e) { @@ -364,8 +363,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient /* * and add a reference to itself */ - DBRefEntry self = new DBRefEntry(getDbSource(), - getEnsemblDataVersion(), seq.getName()); + DBRefEntry self = new DBRefEntry(getDbSource(), getEnsemblDataVersion(), + seq.getName()); seq.addDBRef(self); } @@ -379,8 +378,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient * @throws JalviewException * @throws IOException */ - protected AlignmentI fetchSequences(List ids, AlignmentI alignment) - throws JalviewException, IOException + protected AlignmentI fetchSequences(List ids, + AlignmentI alignment) throws JalviewException, IOException { if (!isEnsemblAvailable()) { @@ -396,15 +395,15 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient FastaFile fr = new FastaFile(fp); if (fr.hasWarningMessage()) { - System.out.println(String.format( - "Warning when retrieving %d ids %s\n%s", ids.size(), - ids.toString(), fr.getWarningMessage())); + System.out.println( + String.format("Warning when retrieving %d ids %s\n%s", + ids.size(), ids.toString(), fr.getWarningMessage())); } else if (fr.getSeqs().size() != ids.size()) { System.out.println(String.format( - "Only retrieved %d sequences for %d query strings", fr - .getSeqs().size(), ids.size())); + "Only retrieved %d sequences for %d query strings", + fr.getSeqs().size(), ids.size())); } if (fr.getSeqs().size() == 1 && fr.getSeqs().get(0).getLength() == 0) @@ -468,11 +467,28 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient urlstring.append("?type=").append(getSourceEnsemblType().getType()); urlstring.append(("&Accept=text/x-fasta")); + String objectType = getObjectType(); + if (objectType != null) + { + urlstring.append("&").append(OBJECT_TYPE).append("=") + .append(objectType); + } + URL url = new URL(urlstring.toString()); return url; } /** + * Override this method to specify object_type request parameter + * + * @return + */ + protected String getObjectType() + { + return null; + } + + /** * A sequence/id POST request currently allows up to 50 queries * * @see http://rest.ensembl.org/documentation/info/sequence_id_post @@ -536,7 +552,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient protected MapList getGenomicRangesFromFeatures(SequenceI sourceSequence, String accId, int start) { - // SequenceFeature[] sfs = sourceSequence.getSequenceFeatures(); List sfs = sourceSequence.getFeatures() .getPositionalFeatures(); if (sfs.isEmpty()) @@ -548,7 +563,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient * generously initial size for number of cds regions * (worst case titin Q8WZ42 has c. 313 exons) */ - List regions = new ArrayList(100); + List regions = new ArrayList<>(100); int mappedLength = 0; int direction = 1; // forward boolean directionSet = false; @@ -567,8 +582,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient if (directionSet && strand != direction) { // abort - mix of forward and backward - System.err.println("Error: forward and backward strand for " - + accId); + System.err.println( + "Error: forward and backward strand for " + accId); return null; } direction = strand; @@ -612,8 +627,9 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient Collections.sort(regions, direction == 1 ? IntRangeComparator.ASCENDING : IntRangeComparator.DESCENDING); - List to = Arrays.asList(new int[] { start, - start + mappedLength - 1 }); + List to = Arrays + .asList(new int[] + { start, start + mappedLength - 1 }); return new MapList(regions, to, 1, 1); } @@ -669,16 +685,15 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient int newBegin = Math.min(mappedRange[0], mappedRange[1]); int newEnd = Math.max(mappedRange[0], mappedRange[1]); SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd, - group); + group, sf.getScore()); targetSequence.addSequenceFeature(copy); /* * for sequence_variant on reverse strand, have to convert the allele * values to their complements */ - if (!forwardStrand - && SequenceOntologyFactory.getInstance().isA(sf.getType(), - SequenceOntologyI.SEQUENCE_VARIANT)) + if (!forwardStrand && SequenceOntologyFactory.getInstance() + .isA(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT)) { reverseComplementAlleles(copy); } @@ -768,8 +783,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient return false; } - long start = System.currentTimeMillis(); - // SequenceFeature[] sfs = sourceSequence.getSequenceFeatures(); +// long start = System.currentTimeMillis(); List sfs = sourceSequence.getFeatures() .getPositionalFeatures(); MapList mapping = getGenomicRangesFromFeatures(sourceSequence, @@ -781,10 +795,10 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient boolean result = transferFeatures(sfs, targetSequence, mapping, accessionId); - System.out.println("transferFeatures (" + (sfs.size()) + " --> " - + targetSequence.getFeatures().getFeatureCount(true) + ") to " - + targetSequence.getName() + " took " - + (System.currentTimeMillis() - start) + "ms"); +// System.out.println("transferFeatures (" + (sfs.size()) + " --> " +// + targetSequence.getFeatures().getFeatureCount(true) + ") to " +// + targetSequence.getName() + " took " +// + (System.currentTimeMillis() - start) + "ms"); return result; } @@ -848,7 +862,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient { String parent = (String) sf.getValue(PARENT); // using contains to allow for prefix "gene:", "transcript:" etc - if (parent != null && !parent.contains(identifier)) + if (parent != null + && !parent.toUpperCase().contains(identifier.toUpperCase())) { // this genomic feature belongs to a different transcript return false; @@ -876,14 +891,14 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient protected List findFeatures(SequenceI sequence, String term, String parentId) { - List result = new ArrayList(); + List result = new ArrayList<>(); List sfs = sequence.getFeatures() .getFeaturesByOntology(term); for (SequenceFeature sf : sfs) { String parent = (String) sf.getValue(PARENT); - if (parent != null && parent.equals(parentId)) + if (parent != null && parent.equalsIgnoreCase(parentId)) { result.add(sf); }