From: gmungoc Date: Tue, 14 Jun 2016 13:57:59 +0000 (+0100) Subject: JAL-2114 more helpful reporting of parse failures X-Git-Tag: Release_2_10_0~179^2 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=469f115c9d248bcae315c0eb591f357ce0f24bd0;hp=1b464022dd7d919df66738a517b3b46f8126ad98;p=jalview.git JAL-2114 more helpful reporting of parse failures --- diff --git a/src/jalview/datamodel/xdb/embl/EmblEntry.java b/src/jalview/datamodel/xdb/embl/EmblEntry.java index cfe87d9..9a07c36 100644 --- a/src/jalview/datamodel/xdb/embl/EmblEntry.java +++ b/src/jalview/datamodel/xdb/embl/EmblEntry.java @@ -21,6 +21,7 @@ package jalview.datamodel.xdb.embl; import jalview.analysis.SequenceIdMatcher; +import jalview.bin.Cache; import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; import jalview.datamodel.FeatureProperties; @@ -34,6 +35,7 @@ import jalview.util.MapList; import jalview.util.MappingUtils; import jalview.util.StringUtils; +import java.text.ParseException; import java.util.Arrays; import java.util.Hashtable; import java.util.List; @@ -564,7 +566,7 @@ public class EmblEntry } /** - * Returns the CDS positions as a list of [start, end, start, end...] + * Returns the CDS positions as a single array of [start, end, start, end...] * positions. If on the reverse strand, these will be in descending order. * * @param feature @@ -576,8 +578,18 @@ public class EmblEntry { return new int[] {}; } - List ranges = DnaUtils.parseLocation(feature.location); - return ranges == null ? new int[] {} : listToArray(ranges); + + try + { + List ranges = DnaUtils.parseLocation(feature.location); + return listToArray(ranges); + } catch (ParseException e) + { + Cache.log.warn(String.format( + "Not parsing inexact CDS location %s in ENA %s", + feature.location, this.accession)); + return new int[] {}; + } } /** diff --git a/src/jalview/util/DnaUtils.java b/src/jalview/util/DnaUtils.java index 9ab4fda..9582e2e 100644 --- a/src/jalview/util/DnaUtils.java +++ b/src/jalview/util/DnaUtils.java @@ -1,5 +1,6 @@ package jalview.util; +import java.text.ParseException; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -9,13 +10,22 @@ public class DnaUtils /** * Parses an ENA/GenBank format location specifier and returns a list of - * [start, end] ranges. Returns null if not able to parse. + * [start, end] ranges. Throws an exception if not able to parse. + *

+ * Currently we do not parse "order()" specifiers, or indeterminate ranges of + * the format "<start..end" or "start..>end" or "start.end" or + * "start^end" * * @param location * @return + * @throws ParseException + * if unable to parse the location (the exception message is the + * location specifier being parsed); we use ParseException in + * preference to the unchecked IllegalArgumentException * @see http://www.insdc.org/files/feature_table.html#3.4 */ public static List parseLocation(String location) + throws ParseException { if (location.startsWith("join(")) { @@ -25,12 +35,9 @@ public class DnaUtils { return parseComplement(location); } - String errorMessage = "Unable to process location specifier: " - + location; if (location.startsWith("order(")) { - System.err.println(errorMessage); - return null; + throw new ParseException(location, 0); } /* @@ -49,8 +56,7 @@ public class DnaUtils /* * could be a location like <1..888 or 1..>888 */ - System.err.println(errorMessage); - return null; + throw new ParseException(location, 0); } } else @@ -58,8 +64,7 @@ public class DnaUtils /* * could be a location like 102.110 or 123^124 */ - System.err.println(errorMessage); - return null; + throw new ParseException(location, 0); } } @@ -68,26 +73,20 @@ public class DnaUtils * * @param location * @return + * @throws ParseException */ - static List parseComplement(String location) + static List parseComplement(String location) throws ParseException { /* * take what is inside complement() */ if (!location.endsWith(")")) { - return null; + throw new ParseException(location, 0); } String toComplement = location.substring("complement(".length(), location.length() - 1); List ranges = parseLocation(toComplement); - if (ranges == null) - { - /* - * something bad in there - */ - return null; - } /* * reverse the order and direction of ranges @@ -107,8 +106,9 @@ public class DnaUtils * * @param location * @return + * @throws ParseException */ - static List parseJoin(String location) + static List parseJoin(String location) throws ParseException { List ranges = new ArrayList(); @@ -117,7 +117,7 @@ public class DnaUtils */ if (!location.endsWith(")")) { - return null; + throw new ParseException(location, 0); } String joinedLocs = location.substring("join(".length(), location.length() - 1); @@ -125,17 +125,7 @@ public class DnaUtils for (String loc : locations) { List range = parseLocation(loc); - if (range == null) - { - /* - * something bad in there - */ - return null; - } - else - { - ranges.addAll(range); - } + ranges.addAll(range); } return ranges; } diff --git a/test/jalview/util/DnaUtilsTest.java b/test/jalview/util/DnaUtilsTest.java index 6623c13..fbc95ad 100644 --- a/test/jalview/util/DnaUtilsTest.java +++ b/test/jalview/util/DnaUtilsTest.java @@ -4,6 +4,7 @@ import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.fail; +import java.text.ParseException; import java.util.List; import org.testng.annotations.Test; @@ -13,10 +14,12 @@ public class DnaUtilsTest /** * Tests for parsing an ENA/GenBank location specifier * + * @throws ParseException + * * @see http://www.insdc.org/files/feature_table.html#3.4 */ @Test(groups = { "Functional" }) - public void testParseLocation() + public void testParseLocation() throws ParseException { /* * single locus @@ -89,33 +92,23 @@ public class DnaUtilsTest assertEquals(87064, ranges.get(1)[1]); /* - * beyond 5' or 3' locus - */ - ranges = DnaUtils.parseLocation("<34..126"); - assertEquals(1, ranges.size()); - assertEquals(34, ranges.get(0)[0]); - assertEquals(126, ranges.get(0)[1]); - ranges = DnaUtils.parseLocation("35..>127"); - assertEquals(1, ranges.size()); - assertEquals(35, ranges.get(0)[0]); - assertEquals(127, ranges.get(0)[1]); - - /* * valid things we don't yet handle */ - assertNull(DnaUtils.parseLocation("34.126")); - assertNull(DnaUtils.parseLocation("34^126")); - assertNull(DnaUtils.parseLocation("order(34..126,130..180)")); + checkForParseException("<34..126"); + checkForParseException("35..>126"); + checkForParseException("34.126"); + checkForParseException("34^126"); + checkForParseException("order(34..126,130..180)"); /* * invalid things */ - assertNull(DnaUtils.parseLocation("")); - assertNull(DnaUtils.parseLocation("JOIN(1..2)")); - assertNull(DnaUtils.parseLocation("join(1..2")); - assertNull(DnaUtils.parseLocation("join(1..2(")); - assertNull(DnaUtils.parseLocation("complement(1..2")); - assertNull(DnaUtils.parseLocation("complement(1..2(")); + checkForParseException(""); + checkForParseException("JOIN(1..2)"); + checkForParseException("join(1..2"); + checkForParseException("join(1..2("); + checkForParseException("complement(1..2"); + checkForParseException("complement(1..2("); try { assertNull(DnaUtils.parseLocation(null)); @@ -129,14 +122,29 @@ public class DnaUtilsTest * nested joins are not allowed; just as well since this fails to parse * (splitting tokens by comma fragments the inner join expression) */ - assertNull(DnaUtils - .parseLocation("join(1..2,join(4..5,10..12),18..22)")); + checkForParseException("join(1..2,join(4..5,10..12),18..22)"); /* * complement may not enclose multiple ranges * parsing fails for the same reason */ - assertNull(DnaUtils - .parseLocation("join(complement(36618..36700,4000..4200),86988..87064)")); + checkForParseException("join(complement(36618..36700,4000..4200),86988..87064)"); + } + + /** + * Verifies that a ParseException is thrown when the given location is parsed + * + * @param location + */ + void checkForParseException(String location) + { + try + { + DnaUtils.parseLocation(location); + fail("Expected exception"); + } catch (ParseException e) + { + // expected; + } } }