sequenceId is a string, not an integer - allows generation of unlimited unique sequen...
[jalview.git] / src / jalview / gui / Jalview2XML.java
index 9a56150..5c08fb0 100755 (executable)
@@ -48,8 +48,45 @@ import jalview.structure.StructureSelectionManager;
  */
 public class Jalview2XML
 {
-
-  Hashtable seqRefIds = null;
+  /**
+   * create/return unique hash string for sq
+   * @param sq
+   * @return new or existing unique string for sq
+   */
+  String seqHash(SequenceI sq)
+  {
+    if (seqsToIds==null)
+    {
+      initSeqRefs();
+    }
+    if (seqsToIds.containsKey(sq))
+    {
+      return (String) seqsToIds.get(sq);
+    } else {
+      //         create sequential key
+      String key = "sq"+(seqsToIds.size()+1);
+      seqsToIds.put(sq, key);
+      return key;
+    }
+  }
+  void clearSeqRefs()
+  {
+    seqRefIds.clear();
+    seqsToIds.clear();
+  }
+  void initSeqRefs()
+  {
+    if (seqsToIds==null)
+    {
+      seqsToIds = new IdentityHashMap();
+    }
+    if (seqRefIds==null)
+    {
+      seqRefIds = new Hashtable();
+    }
+  }
+  java.util.IdentityHashMap seqsToIds = null; // SequenceI->key resolution
+  java.util.Hashtable seqRefIds = null; // key->SequenceI resolution
 
   Vector frefedSequence = null;
   boolean raiseGUI = true; // whether errors are raised in dialog boxes or not
@@ -139,8 +176,8 @@ public class Jalview2XML
 
       //NOTE UTF-8 MUST BE USED FOR WRITING UNICODE CHARS
       ////////////////////////////////////////////////////
-      PrintWriter out = new PrintWriter(new OutputStreamWriter(jout,
-              "UTF-8"));
+      //NOTE ALSO new PrintWriter must be used for each new JarEntry
+      PrintWriter out = null;
 
       Vector shortNames = new Vector();
 
@@ -185,14 +222,23 @@ public class Jalview2XML
           {
             AlignmentPanel apanel = (AlignmentPanel) af.alignPanels
                     .elementAt(ap);
+            String fileName = apSize == 1 ? shortName : ap + shortName;
+            if (!fileName.endsWith(".xml"))
+            {
+              fileName = fileName + ".xml";
+            }
 
-            SaveState(apanel, apSize == 1 ? shortName : ap + shortName,
+            JarEntry entry = new JarEntry(fileName);
+            jout.putNextEntry(entry);
+            out = new PrintWriter(new OutputStreamWriter(jout, "UTF-8"));
+            SaveState(apanel, fileName,
                     jout, out);
           }
         }
       }
-
-      out.close();
+      try { out.flush(); } catch (Exception foo) {};
+      jout.closeEntry();
+      try { jout.flush(); } catch (Exception foo) {};
       jout.close();
     } catch (Exception ex)
     {
@@ -210,17 +256,25 @@ public class Jalview2XML
       int ap, apSize = af.alignPanels.size();
       FileOutputStream fos = new FileOutputStream(jarFile);
       JarOutputStream jout = new JarOutputStream(fos);
-      PrintWriter out = new PrintWriter(new OutputStreamWriter(jout,
-              "UTF-8"));
       for (ap = 0; ap < apSize; ap++)
       {
         AlignmentPanel apanel = (AlignmentPanel) af.alignPanels
                 .elementAt(ap);
-
-        SaveState(apanel, apSize == 1 ? fileName : fileName + ap, jout, out);
+        String jfileName = apSize == 1 ? fileName : fileName + ap;
+        if (!jfileName.endsWith(".xml"))
+        {
+          jfileName = jfileName + ".xml";
+        }
+        JarEntry entry = new JarEntry(jfileName);
+        jout.putNextEntry(entry);
+        PrintWriter out = new PrintWriter(new OutputStreamWriter(jout,
+        "UTF-8"));
+        SaveState(apanel, jfileName, jout, out);
+        try { out.flush(); } catch (Exception foo) {};
+        jout.closeEntry();
       }
 
-      out.close();
+      try { jout.flush(); } catch (Exception foo) {};
       jout.close();
       return true;
     } catch (Exception ex)
@@ -242,11 +296,8 @@ public class Jalview2XML
   public JalviewModel SaveState(AlignmentPanel ap, String fileName,
           JarOutputStream jout, PrintWriter out)
   {
-    if (seqRefIds == null)
-    {
-      seqRefIds = new Hashtable();
-    }
-
+    initSeqRefs();
+    
     Vector userColours = new Vector();
 
     AlignViewport av = ap.av;
@@ -291,22 +342,29 @@ public class Jalview2XML
     JSeq jseq;
 
     //SAVE SEQUENCES
-    int id = 0;
+    String id = "";
     jalview.datamodel.SequenceI jds;
     for (int i = 0; i < jal.getHeight(); i++)
     {
       jds = jal.getSequenceAt(i);
-      id = jds.hashCode();
+      id = seqHash(jds);
 
-      if (seqRefIds.get(id + "") != null)
+      if (seqRefIds.get(id) != null)
       {
-
+        // This happens for two reasons: 1. multiple views are being serialised. 2. the hashCode has collided with another sequence's code. This DOES HAPPEN! (PF00072.15.stk does this)
+        // JBPNote: Uncomment to debug writing out of files that do not read back in due to ArrayOutOfBoundExceptions.
+        //System.err.println("vamsasSeq backref: "+id+"");
+        //System.err.println(jds.getName()+" "+jds.getStart()+"-"+jds.getEnd()+" "+jds.getSequenceAsString());
+        //System.err.println("Hashcode: "+seqHash(jds));
+        //SequenceI rsq = (SequenceI) seqRefIds.get(id + "");
+        //System.err.println(rsq.getName()+" "+rsq.getStart()+"-"+rsq.getEnd()+" "+rsq.getSequenceAsString());
+        //System.err.println("Hashcode: "+seqHash(rsq));
       }
       else
       {
         vamsasSeq = createVamsasSequence(id, jds);
         vamsasSet.addSequence(vamsasSeq);
-        seqRefIds.put(id + "", jal.getSequenceAt(i));
+        seqRefIds.put(id, jds);
       }
 
       jseq = new JSeq();
@@ -314,7 +372,7 @@ public class Jalview2XML
       jseq.setEnd(jds.getEnd());
       jseq.setColour(av.getSequenceColour(jds).getRGB());
 
-      jseq.setId(id);
+      jseq.setId(id); // jseq id should be a string not a number
 
       if (av.hasHiddenRows)
       {
@@ -449,6 +507,7 @@ public class Jalview2XML
 
                   DataOutputStream dout = new DataOutputStream(jout);
                   dout.write(data, 0, data.length);
+                  dout.flush();
                   jout.closeEntry();
                 }
               } catch (Exception ex)
@@ -730,7 +789,7 @@ public class Jalview2XML
         {
           jalview.datamodel.Sequence seq = (jalview.datamodel.Sequence) sg
                   .getSequenceAt(s);
-          groups[i].addSeq(seq.hashCode());
+          groups[i].addSeq(seqHash(seq));
         }
       }
 
@@ -929,20 +988,14 @@ public class Jalview2XML
 
     if (out != null)
     {
-      //We may not want to right the object to disk,
+      //We may not want to write the object to disk,
       //eg we can copy the alignViewport to a new view object
       //using save and then load
       try
       {
-        if (!fileName.endsWith(".xml"))
-        {
-          fileName = fileName + ".xml";
-        }
-
-        JarEntry entry = new JarEntry(fileName);
-        jout.putNextEntry(entry);
-
-        object.marshal(out);
+        org.exolab.castor.xml.Marshaller marshaller = new org.exolab.castor.xml.Marshaller(out);
+        marshaller.marshal(object);
+        out.flush();
       } catch (Exception ex)
       {
         ex.printStackTrace();
@@ -951,23 +1004,23 @@ public class Jalview2XML
     return object;
   }
 
-  private Sequence createVamsasSequence(int id, SequenceI jds)
+  private Sequence createVamsasSequence(String id, SequenceI jds)
   {
     return createVamsasSequence(true, id, jds, null);
   }
 
-  private Sequence createVamsasSequence(boolean recurse, int id,
+  private Sequence createVamsasSequence(boolean recurse, String id,
           SequenceI jds, SequenceI parentseq)
   {
     Sequence vamsasSeq = new Sequence();
-    vamsasSeq.setId(id + "");
+    vamsasSeq.setId(id);
     vamsasSeq.setName(jds.getName());
     vamsasSeq.setSequence(jds.getSequenceAsString());
     vamsasSeq.setDescription(jds.getDescription());
     jalview.datamodel.DBRefEntry[] dbrefs = null;
     if (jds.getDatasetSequence() != null)
     {
-      vamsasSeq.setDsseqid(jds.getDatasetSequence().hashCode() + "");
+      vamsasSeq.setDsseqid(seqHash(jds.getDatasetSequence()));
       if (jds.getDatasetSequence().getDBRef() != null)
       {
         dbrefs = jds.getDatasetSequence().getDBRef();
@@ -975,7 +1028,7 @@ public class Jalview2XML
     }
     else
     {
-      vamsasSeq.setDsseqid(id + ""); // so we can tell which sequences really are dataset sequences only
+      vamsasSeq.setDsseqid(id); // so we can tell which sequences really are dataset sequences only
       dbrefs = jds.getDBRef();
     }
     if (dbrefs != null)
@@ -1032,24 +1085,24 @@ public class Jalview2XML
                 && (parentseq != jmp.getTo() || parentseq
                         .getDatasetSequence() != jmp.getTo()))
         {
-          mpc.setSequence(createVamsasSequence(false, jmp.getTo()
-                  .hashCode(), jmp.getTo(), jds));
+          mpc.setSequence(createVamsasSequence(false, seqHash(jmp.getTo())
+                  , jmp.getTo(), jds));
         }
         else
         {
-          long jmpid = 0;
+          String jmpid = "";
           SequenceI ps = null;
           if (parentseq != jmp.getTo()
                   && parentseq.getDatasetSequence() != jmp.getTo())
           {
             // chaining dbref rather than a handshaking one
-            jmpid = (ps = jmp.getTo()).hashCode();
+            jmpid = seqHash(ps = jmp.getTo());
           }
           else
           {
-            jmpid = (ps = parentseq).hashCode();
+            jmpid = seqHash(ps = parentseq);
           }
-          mpc.setDseqFor("" + jmpid);
+          mpc.setDseqFor(jmpid);
           if (!seqRefIds.containsKey(mpc.getDseqFor()))
           {
             jalview.bin.Cache.log.debug("creatign new DseqFor ID");
@@ -1330,6 +1383,7 @@ public class Jalview2XML
       {
         out.println(data);
       }
+      try { out.flush(); } catch (Exception foo) {};
       out.close();
 
       alreadyLoadedPDB.put(pdbId, outFile.getAbsolutePath());
@@ -1364,6 +1418,7 @@ public class Jalview2XML
     boolean multipleView = false;
 
     JSeq[] JSEQ = object.getJalviewModelSequence().getJSeq();
+    int vi=0; // counter in vamsasSeq array
     for (int i = 0; i < JSEQ.length; i++)
     {
       String seqId = JSEQ[i].getId() + "";
@@ -1375,14 +1430,15 @@ public class Jalview2XML
       }
       else
       {
-        jseq = new jalview.datamodel.Sequence(vamsasSeq[i].getName(),
-                vamsasSeq[i].getSequence());
-        jseq.setDescription(vamsasSeq[i].getDescription());
+        jseq = new jalview.datamodel.Sequence(vamsasSeq[vi].getName(),
+                vamsasSeq[vi].getSequence());
+        jseq.setDescription(vamsasSeq[vi].getDescription());
         jseq.setStart(JSEQ[i].getStart());
         jseq.setEnd(JSEQ[i].getEnd());
         jseq.setVamsasId(uniqueSetSuffix + seqId);
-        seqRefIds.put(vamsasSeq[i].getId(), jseq);
+        seqRefIds.put(vamsasSeq[vi].getId()+"", jseq);
         tmpseqs.add(jseq);
+        vi++;
       }
 
       if (JSEQ[i].getHidden())
@@ -1596,9 +1652,14 @@ public class Jalview2XML
                     .getSecondaryStructure() == null || ae[aa]
                     .getSecondaryStructure().length() == 0) ? ' ' : ae[aa]
                     .getSecondaryStructure().charAt(0), ae[aa].getValue()
-
+                    
             );
-
+            // JBPNote: Consider verifying dataflow for IO of secondary structure annotation read from Stockholm files
+            // this was added to try to ensure that 
+            //if (anot[ae[aa].getPosition()].secondaryStructure>' ')
+            //{
+            //  anot[ae[aa].getPosition()].displayCharacter = "";
+            //}
             anot[ae[aa].getPosition()].colour = new java.awt.Color(ae[aa]
                     .getColour());
           }
@@ -2224,7 +2285,7 @@ public class Jalview2XML
         if (sqid == null)
         {
           // make up a new dataset reference for this sequence
-          sqid = "" + dsq.hashCode();
+          sqid = seqHash(dsq);
         }
         dsq.setVamsasId(uniqueSetSuffix + sqid);
         seqRefIds.put(sqid, dsq);
@@ -2382,7 +2443,7 @@ public class Jalview2XML
           djs = (jalview.datamodel.Sequence) seqRefIds.get(sqid);
         } else {
           System.err.println("Warning - making up dataset sequence id for DbRef sequence map reference");
-          sqid = ""+ms.hashCode(); // make up a new hascode for undefined dataset sequence hash (unlikely to happen)
+          sqid = ((Object)ms).toString(); // make up a new hascode for undefined dataset sequence hash (unlikely to happen)
         }
         
         if (djs==null) {
@@ -2415,7 +2476,7 @@ public class Jalview2XML
 
     if (!keepSeqRefs)
     {
-      seqRefIds.clear();
+      clearSeqRefs();
       jm.getJalviewModelSequence().getViewport(0).setSequenceSetId(null);
     }
     else
@@ -2443,4 +2504,16 @@ public class Jalview2XML
 
     return af.alignPanel;
   }
+  /* (non-Javadoc)
+   * @see java.lang.Object#finalize()
+   */
+  protected void finalize() throws Throwable
+  {
+    // really make sure we have no buried refs left.
+    clearSeqRefs();
+    this.seqRefIds = null;
+    this.seqsToIds = null;
+    super.finalize();
+  }
+  
 }