X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FCrossRef.java;h=4ba7e416e7d115d183d68ce50878e21f1377b892;hb=aba253e57b22ce7d1f4fe376935e42aeb4f6d591;hp=71b0aa0b9adb5cd92d39b5f7908fa3dcc349e4ed;hpb=655b78299307682a4c7a6e5af0ed4618cbc9c924;p=jalview.git diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index 71b0aa0..4ba7e41 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -24,6 +24,7 @@ import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; +import jalview.datamodel.DBRefSource; import jalview.datamodel.Mapping; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; @@ -106,6 +107,16 @@ public class CrossRef findXrefSourcesForSequence(seq, dna, sources); } } + sources.remove(DBRefSource.EMBL); // hack to prevent EMBL xrefs resulting in + // redundant datasets + if (dna) + { + sources.remove(DBRefSource.ENSEMBL); // hack to prevent Ensembl and + // EnsemblGenomes xref option shown + // from cdna panel + sources.remove(DBRefSource.ENSEMBLGENOMES); + } + // redundant datasets return sources; } @@ -209,8 +220,7 @@ public class CrossRef rseqs = new ArrayList(); AlignedCodonFrame cf = new AlignedCodonFrame(); - matcher = new SequenceIdMatcher( - dataset.getSequences()); + matcher = new SequenceIdMatcher(dataset.getSequences()); for (SequenceI seq : fromSeqs) { @@ -422,13 +432,16 @@ public class CrossRef if (retrieved != null) { boolean addedXref = false; + List newDsSeqs = new ArrayList(), doNotAdd = new ArrayList(); + for (SequenceI retrievedSequence : retrieved) { // dataset gets contaminated ccwith non-ds sequences. why ??! // try: Ensembl -> Nuc->Ensembl, Nuc->Uniprot-->Protein->EMBL-> SequenceI retrievedDss = retrievedSequence.getDatasetSequence() == null ? retrievedSequence : retrievedSequence.getDatasetSequence(); - addedXref |= importCrossRefSeq(cf, dss, retrievedDss); + addedXref |= importCrossRefSeq(cf, newDsSeqs, doNotAdd, dss, + retrievedDss); } if (!addedXref) { @@ -441,7 +454,17 @@ public class CrossRef // try: Ensembl -> Nuc->Ensembl, Nuc->Uniprot-->Protein->EMBL-> SequenceI retrievedDss = retrievedSequence.getDatasetSequence() == null ? retrievedSequence : retrievedSequence.getDatasetSequence(); - addedXref |= importCrossRefSeq(cf, dss, retrievedDss); + addedXref |= importCrossRefSeq(cf, newDsSeqs, doNotAdd, dss, + retrievedDss); + } + } + for (SequenceI newToSeq : newDsSeqs) + { + if (!doNotAdd.contains(newToSeq) + && dataset.findIndex(newToSeq) == -1) + { + dataset.addSequence(newToSeq); + matcher.add(newToSeq); } } } @@ -494,6 +517,7 @@ public class CrossRef * @return true if retrieveSequence was imported */ private boolean importCrossRefSeq(AlignedCodonFrame cf, + List newDsSeqs, List doNotAdd, SequenceI sourceSequence, SequenceI retrievedSequence) { /** @@ -502,7 +526,6 @@ public class CrossRef */ boolean imported = false; DBRefEntry[] dbr = retrievedSequence.getDBRefs(); - List newDsSeqs = new ArrayList(); if (dbr != null) { for (DBRefEntry dbref : dbr) @@ -574,9 +597,15 @@ public class CrossRef */ for (DBRefEntry ref : toRefs) { + if (dbref.getSrcAccString().equals( + ref.getSrcAccString())) + { + continue; // avoid overwriting the ref on source sequence + } matched.addDBRef(ref); // add or update mapping } } + doNotAdd.add(map.getTo()); map.setTo(matched); /* @@ -600,8 +629,7 @@ public class CrossRef * attribute in equality test; this avoids creating many * otherwise duplicate exon features on genomic sequence */ - SequenceFeature newFeature = new SequenceFeature( - feat) + SequenceFeature newFeature = new SequenceFeature(feat) { @Override public boolean equals(Object o) @@ -634,18 +662,10 @@ public class CrossRef dataset.addSequence(retrievedSequence); matcher.add(retrievedSequence); } - for (SequenceI newToSeq : newDsSeqs) - { - - if (dataset.findIndex(newToSeq) == -1) - { - dataset.addSequence(newToSeq); - matcher.add(newToSeq); - } - } } return imported; } + /** * Sets the inverse sequence mapping in the corresponding dbref of the mapped * to sequence (if any). This is used after fetching a cross-referenced @@ -845,8 +865,8 @@ public class CrossRef MapList mapping = null; SequenceI dsmapFrom = mapFrom.getDatasetSequence() == null ? mapFrom : mapFrom.getDatasetSequence(); - SequenceI dsmapTo = mapTo.getDatasetSequence() == null ? mapTo - : mapTo.getDatasetSequence(); + SequenceI dsmapTo = mapTo.getDatasetSequence() == null ? mapTo : mapTo + .getDatasetSequence(); /* * look for a reverse mapping, if found make its inverse. * Note - we do this on dataset sequences only.