import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
+
import jalview.analysis.SequenceIdMatcher;
-import jalview.bin.Cache;
+import jalview.bin.Console;
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefEntry;
import jalview.util.MapList;
import jalview.util.MappingUtils;
import jalview.util.MessageManager;
+import jalview.util.Platform;
import jalview.ws.ebi.EBIFetchClient;
import jalview.xml.binding.embl.EntryType;
import jalview.xml.binding.embl.EntryType.Feature;
import jalview.xml.binding.embl.ROOT;
import jalview.xml.binding.embl.XrefType;
+import com.stevesoft.pat.Regex;
+
/**
* Provides XML binding and parsing of EMBL or EMBLCDS records retrieved from
* (e.g.) {@code https://www.ebi.ac.uk/ena/data/view/x53828&display=xml}.
*
* @deprecated endpoint withdrawn August 2020 (JAL-3692), use EmblFlatfileSource
*/
-
+@Deprecated
public abstract class EmblXmlSource extends EbiFileRetrievedProxy
{
// TODO: delete class or update tyhis validator for 2.12 style Platform.regex
- private static final Regex ACCESSION_REGEX = new Regex("^[A-Z]+[0-9]+");
+ private static final Regex ACCESSION_REGEX = Platform.newRegex("^[A-Z]+[0-9]+");
+
/*
* JAL-1856 Embl returns this text for query not found
*/
{
return new int[] {};
}
+
try
{
List<int[]> ranges = DnaUtils.parseLocation(location);
return listToArray(ranges);
} catch (ParseException e)
{
- Cache.log.warn(
+ Console.warn(
String.format("Not parsing inexact CDS location %s in ENA %s",
location, accession));
return new int[] {};
}
return getAccessionValidator().search(accession);
}
+
/**
* Truncates (if necessary) the exon intervals to match 3 times the length of
- * the protein; also accepts 3 bases longer (for stop codon not included in
- * protein)
+ * the protein (including truncation for stop codon included in exon)
*
* @param proteinLength
* @param exon
}
int expectedCdsLength = proteinLength * 3;
int exonLength = MappingUtils.getLength(Arrays.asList(exon));
+
/*
- * if exon length matches protein, or is shorter, or longer by the
- * length of a stop codon (3 bases), then leave it unchanged
+ * if exon length matches protein, or is shorter, then leave it unchanged
*/
- if (expectedCdsLength >= exonLength
- || expectedCdsLength == exonLength - 3)
+ if (expectedCdsLength >= exonLength)
{
return exon;
}
+
int origxon[];
int sxpos = -1;
int endxon = 0;
// .println("Truncating final exon interval on region by "
// + (cdspos - cdslength));
}
+
/*
* shrink the final exon - reduce end position if forward
* strand, increase it if reverse
break;
}
}
+
if (sxpos != -1)
{
// and trim the exon interval set if necessary