JAL-3949 - refactor logging from jalview.bin.Cache to jalview.bin.Console
[jalview.git] / src / jalview / ws / dbsources / EmblXmlSource.java
index e114ea9..034ea4f 100644 (file)
  */
 package jalview.ws.dbsources;
 
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.text.ParseException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Hashtable;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import javax.xml.bind.JAXBContext;
+import javax.xml.bind.JAXBElement;
+import javax.xml.bind.JAXBException;
+import javax.xml.stream.FactoryConfigurationError;
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
+
+import com.stevesoft.pat.Regex;
+
 import jalview.analysis.SequenceIdMatcher;
-import jalview.bin.Cache;
+import jalview.bin.Console;
 import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefEntry;
@@ -35,41 +57,22 @@ import jalview.util.DBRefUtils;
 import jalview.util.DnaUtils;
 import jalview.util.MapList;
 import jalview.util.MappingUtils;
-import jalview.util.MessageManager;
 import jalview.ws.ebi.EBIFetchClient;
 import jalview.xml.binding.embl.EntryType;
 import jalview.xml.binding.embl.EntryType.Feature;
 import jalview.xml.binding.embl.EntryType.Feature.Qualifier;
+import jalview.xml.binding.embl.ROOT;
 import jalview.xml.binding.embl.XrefType;
 
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.InputStream;
-import java.text.ParseException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Hashtable;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.regex.Pattern;
-
-import javax.xml.bind.JAXBContext;
-import javax.xml.bind.JAXBException;
-import javax.xml.stream.FactoryConfigurationError;
-import javax.xml.stream.XMLInputFactory;
-import javax.xml.stream.XMLStreamException;
-import javax.xml.stream.XMLStreamReader;
-
 public abstract class EmblXmlSource extends EbiFileRetrievedProxy
 {
+  private static final Regex ACCESSION_REGEX = new Regex("^[A-Z]+[0-9]+");
+
   /*
    * JAL-1856 Embl returns this text for query not found
    */
   private static final String EMBL_NOT_FOUND_REPLY = "ERROR 12 No entries found.";
 
-  private static final Pattern SPACE_PATTERN = Pattern.compile(" ");
-
   public EmblXmlSource()
   {
     super();
@@ -94,14 +97,15 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy
     try
     {
       reply = dbFetch.fetchDataAsFile(
-              emprefx.toLowerCase() + ":" + query.trim(), "display=xml",
-              "xml");
+              emprefx.toLowerCase(Locale.ROOT) + ":" + query.trim(),
+              "display=xml", "xml");
     } catch (Exception e)
     {
       stopQuery();
-      throw new Exception(MessageManager.formatMessage(
-              "exception.ebiembl_retrieval_failed_on", new String[]
-              { emprefx.toLowerCase(), query.trim() }), e);
+      throw new Exception(
+              String.format("EBI EMBL XML retrieval failed for %s:%s",
+                      emprefx.toLowerCase(Locale.ROOT), query.trim()),
+              e);
     }
     return getEmblSequenceRecords(emprefx, query, reply);
   }
@@ -183,8 +187,9 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy
       XMLStreamReader streamReader = XMLInputFactory.newInstance()
               .createXMLStreamReader(is);
       javax.xml.bind.Unmarshaller um = jc.createUnmarshaller();
-      jalview.xml.binding.embl.ROOT root = (jalview.xml.binding.embl.ROOT) um
-              .unmarshal(streamReader);
+      JAXBElement<ROOT> rootElement = um.unmarshal(streamReader,
+              ROOT.class);
+      ROOT root = rootElement.getValue();
 
       /*
        * document root contains either "entry" or "entrySet"
@@ -443,9 +448,8 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy
         else
         {
           // final product length truncation check
-          int[] cdsRanges = adjustForProteinLength(translationLength,
-                  exons);
-          dnaToProteinMapping = new Mapping(product, cdsRanges,
+          int[] exons2 = adjustForProteinLength(translationLength, exons);
+          dnaToProteinMapping = new Mapping(product, exons2,
                   new int[]
                   { 1, translationLength }, 3, 1);
           if (product != null)
@@ -564,6 +568,7 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy
               proteinSeq = new Sequence(proteinSeqName,
                       product.getSequenceAsString());
               matcher.add(proteinSeq);
+              proteinSeq.setDescription(product.getDescription());
               peptides.add(proteinSeq);
             }
             dnaToProteinMapping.setTo(proteinSeq);
@@ -617,8 +622,7 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy
               && dnaToProteinMapping.getTo() != null)
       {
         DBRefEntry dnaToEmblProteinRef = new DBRefEntry(
-                DBRefSource.EMBLCDSProduct, sequenceVersion,
-                proteinId);
+                DBRefSource.EMBLCDSProduct, sequenceVersion, proteinId);
         dnaToEmblProteinRef.setMap(dnaToProteinMapping);
         dnaToProteinMapping.setMappedFromId(proteinId);
         dna.addDBRef(dnaToEmblProteinRef);
@@ -647,14 +651,14 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy
     {
       return new int[] {};
     }
-  
+
     try
     {
       List<int[]> ranges = DnaUtils.parseLocation(location);
       return listToArray(ranges);
     } catch (ParseException e)
     {
-      Cache.log.warn(
+      Console.warn(
               String.format("Not parsing inexact CDS location %s in ENA %s",
                       location, accession));
       return new int[] {};
@@ -703,27 +707,51 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy
     SequenceFeature sf = new SequenceFeature(type, desc, begin, end, group);
     if (!vals.isEmpty())
     {
-      StringBuilder sb = new StringBuilder();
-      boolean first = true;
       for (Entry<String, String> val : vals.entrySet())
       {
-        if (!first)
-        {
-          sb.append(";");
-        }
-        sb.append(val.getKey()).append("=").append(val.getValue());
-        first = false;
         sf.setValue(val.getKey(), val.getValue());
       }
-      sf.setAttributes(sb.toString());
     }
     return sf;
   }
 
+  @Override
+  public String getAccessionSeparator()
+  {
+    return null;
+  }
+
+  @Override
+  public Regex getAccessionValidator()
+  {
+    return ACCESSION_REGEX;
+  }
+
+  @Override
+  public String getDbVersion()
+  {
+    return "0";
+  }
+
+  @Override
+  public int getTier()
+  {
+    return 0;
+  }
+
+  @Override
+  public boolean isValidReference(String accession)
+  {
+    if (accession == null || accession.length() < 2)
+    {
+      return false;
+    }
+    return getAccessionValidator().search(accession);
+  }
+
   /**
    * Truncates (if necessary) the exon intervals to match 3 times the length of
-   * the protein; also accepts 3 bases longer (for stop codon not included in
-   * protein)
+   * the protein (including truncation for stop codon included in exon)
    * 
    * @param proteinLength
    * @param exon
@@ -738,17 +766,15 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy
     }
     int expectedCdsLength = proteinLength * 3;
     int exonLength = MappingUtils.getLength(Arrays.asList(exon));
-  
+
     /*
-     * if exon length matches protein, or is shorter, or longer by the 
-     * length of a stop codon (3 bases), then leave it unchanged
+     * if exon length matches protein, or is shorter, then leave it unchanged
      */
-    if (expectedCdsLength >= exonLength
-            || expectedCdsLength == exonLength - 3)
+    if (expectedCdsLength >= exonLength)
     {
       return exon;
     }
-  
+
     int origxon[];
     int sxpos = -1;
     int endxon = 0;
@@ -768,7 +794,7 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy
           // .println("Truncating final exon interval on region by "
           // + (cdspos - cdslength));
         }
-  
+
         /*
          * shrink the final exon - reduce end position if forward
          * strand, increase it if reverse
@@ -784,7 +810,7 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy
         break;
       }
     }
-  
+
     if (sxpos != -1)
     {
       // and trim the exon interval set if necessary