From e9d517ba16a71d6070fbf38df19b539b66d83425 Mon Sep 17 00:00:00 2001
From: jprocter <Jim Procter>
Date: Thu, 9 Dec 2010 15:59:29 +0000
Subject: [PATCH] initial implementation for JAL-718

---
 src/jalview/io/packed/DataProvider.java       |   61 ++++++
 src/jalview/io/packed/JalviewDataset.java     |  176 ++++++++++++++++++
 src/jalview/io/packed/ParsePackedSet.java     |  247 +++++++++++++++++++++++++
 src/jalview/io/packed/SimpleDataProvider.java |   49 +++++
 4 files changed, 533 insertions(+)
 create mode 100644 src/jalview/io/packed/DataProvider.java
 create mode 100644 src/jalview/io/packed/JalviewDataset.java
 create mode 100644 src/jalview/io/packed/ParsePackedSet.java
 create mode 100644 src/jalview/io/packed/SimpleDataProvider.java
diff --git a/src/jalview/io/packed/DataProvider.java b/src/jalview/io/packed/DataProvider.java
new file mode 100644
index 0000000..964d85f
--- /dev/null
+++ b/src/jalview/io/packed/DataProvider.java
@@ -0,0 +1,61 @@
+package jalview.io.packed;
+
+/**
+ * API for a data provider that can be used with jalview.io.packed.ParsePackedSet
+ * @author JimP
+ *
+ */
+public interface DataProvider
+{
+  /**
+   * class of data expected to be provided by datasource
+   * @author JimP
+   *
+   */
+  public enum JvDataType
+  {
+    /**
+     * any alignment flatfile recognisable by jalview.io.IdentifyFile
+     */
+    ALIGNMENT,
+    /**
+     * a jalview annotation file
+     */
+    ANNOTATION, 
+    /**
+     * a GFF or Jalview features file 
+     */
+    FEATURES,
+    /**
+     * a tree representation understood by the NewickFile parser
+     */
+    TREE,
+    /**
+     * any file that provides data that should be associated with a specified sequence.
+     */
+    SEQASSOCATED;
+  }
+
+  /**
+   * data to be parsed according to its type. Each call to getDataSource
+   * should return a new instance of the same data stream initialised to the
+   * beginning of the chunk of data that is to be parsed.
+   * 
+   * @return
+   */
+  jalview.io.FileParse getDataSource();
+
+  /**
+   * association context for data. Either null or a specific sequence.
+   * 
+   * @return
+   */
+  Object getSequenceTarget();
+
+  /**
+   * type of data
+   * 
+   * @return
+   */
+  DataProvider.JvDataType getType();
+}
\ No newline at end of file
diff --git a/src/jalview/io/packed/JalviewDataset.java b/src/jalview/io/packed/JalviewDataset.java
new file mode 100644
index 0000000..1e06636
--- /dev/null
+++ b/src/jalview/io/packed/JalviewDataset.java
@@ -0,0 +1,176 @@
+package jalview.io.packed;
+
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.SequenceI;
+import jalview.io.NewickFile;
+
+import java.util.ArrayList;
+import java.util.Hashtable;
+import java.util.List;
+
+public class JalviewDataset
+{
+  /**
+   * dataset that new data (sequences, alignments) will be added to
+   */
+  AlignmentI parentDataset;
+
+  /**
+   * current alignment being worked on.
+   */
+  List<AlignmentSet> al;
+  public class AlignmentSet {
+    AlignmentI al;
+    List<jalview.io.NewickFile> trees;
+    AlignmentSet(AlignmentI a) {
+      al = a;
+      trees = new ArrayList<jalview.io.NewickFile>();
+    }
+    /**
+     * deuniquify the current alignment in the context, merging any new
+     * annotation/features with the existing set
+     * 
+     * @param context
+     */
+      void deuniquifyAlignment()
+      {
+        if (seqDetails==null || seqDetails.size()==0)
+        {
+          // nothing to do
+          return;
+        }
+        // 1. recover correct names and attributes for each sequence in alignment.
+        /*
+         * TODO: housekeeping w.r.t. recovery of dataset and annotation
+         * references for input sequences, and then dataset sequence creation
+         * for new sequences retrieved from service // finally, attempt to
+         * de-uniquify to recover input sequence identity, and try to map back
+         * onto dataset Note: this
+         * jalview.analysis.SeqsetUtils.deuniquify(SeqNames, alseqs, true); will
+         * NOT WORK - the returned alignment may contain multiple versions of
+         * the input sequence, each being a subsequence of the original.
+         * deuniquify also removes existing annotation and features added in the
+         * previous step... al.setDataset(dataset); // add in new sequences
+         * retrieved from sequence search which are not already in dataset. //
+         * trigger a 'fetchDBids' to annotate sequences with database ids...
+         */
+        //jalview.analysis.SeqsetUtils.deuniquifyAndMerge(parentDataset, seqDetails, al,true);
+        
+        jalview.analysis.SeqsetUtils.deuniquify(seqDetails, al.getSequencesArray(),true);
+        // 2. Update names of associated nodes in any trees
+        for (NewickFile nf:trees)
+        {
+          // the following works because all trees are already had node/SequenceI associations created.
+          jalview.analysis.NJTree njt = new jalview.analysis.NJTree(al.getSequencesArray(), nf);
+          // this just updates the displayed leaf name on the try according to the SequenceIs.
+          njt.renameAssociatedNodes();
+        }
+        
+      }
+  }
+
+  /**
+   * current set of feature colours
+   */
+  Hashtable featureColours;
+
+  /**
+   * original identity of each sequence in results
+   */
+  Hashtable seqDetails;
+
+  public JalviewDataset()
+  {
+    seqDetails = new Hashtable();
+    al = new ArrayList<AlignmentSet>();
+    parentDataset = null;
+    featureColours = new Hashtable();
+  }
+  /**
+   * context created from an existing alignment.
+   * @param parentAlignment
+   */
+  public JalviewDataset(AlignmentI aldataset, Hashtable fc, Hashtable seqDets)
+  {
+    this(aldataset, fc, seqDets, null);
+  }
+  /**
+   * 
+   * @param aldataset - parent dataset for any new alignment/sequence data (must not be null)
+   * @param fc  (may be null) feature settings for the alignment where new feature renderstyles are stored
+   * @param seqDets - (may be null) anonymised sequence information created by Sequence uniquifier 
+   * @param parentAlignment (may be null) alignment to associate new annotation and trees with.
+   */
+  public JalviewDataset(AlignmentI aldataset, Hashtable fc, Hashtable seqDets, AlignmentI parentAlignment)
+  {
+    this();
+    parentDataset = aldataset;
+    if (parentAlignment!=null)
+    {
+      parentDataset = parentAlignment.getDataset();
+      if (parentDataset==null)
+      {
+        parentDataset = parentAlignment;
+      } else {
+        addAlignment(parentAlignment);
+      }
+    }
+    if (seqDets!=null)
+    {
+      seqDetails = seqDets;
+    }
+    if (fc!=null)
+    {
+      featureColours = fc;
+    }
+    
+    
+  }
+
+  public boolean hasAlignments()
+  {
+    return al!=null && al.size()>0;
+  }
+
+  public AlignmentI getLastAlignment()
+  {
+    return (al==null || al.size()<1) ? null: al.get(al.size()-1).al;
+  }
+  public AlignmentSet getLastAlignmentSet()
+  {
+    return (al==null || al.size()<1) ? null: al.get(al.size()-1);
+  }
+
+  /**
+   * post process (deuniquify) the current alignment and its dependent data, and then add newal to the dataset.
+   * @param newal
+   */
+public void addAlignment(AlignmentI newal) {
+  if (!hasAlignments())
+  {
+    al = new ArrayList<AlignmentSet>();
+  } 
+  AlignmentSet last = getLastAlignmentSet();
+  if (last!=null) {
+    System.err.println("Deuniquifying last alignment set.");
+    last.deuniquifyAlignment();
+  }
+  al.add(new AlignmentSet(newal));
+}
+
+public void addTreeFromFile(NewickFile nf)
+{
+  AlignmentSet lal = getLastAlignmentSet();
+  lal.trees.add(nf);
+}
+
+public boolean hasSequenceAssoc()
+{
+  // TODO: discover where sequence associated data should be put.
+  return false;
+}
+public SequenceI getLastAssociatedSequence() {
+  // TODO: delineate semantics for associating uniquified data with potentially de-uniquified sequence.
+  return null;
+}
+}
\ No newline at end of file
diff --git a/src/jalview/io/packed/ParsePackedSet.java b/src/jalview/io/packed/ParsePackedSet.java
new file mode 100644
index 0000000..58892ed
--- /dev/null
+++ b/src/jalview/io/packed/ParsePackedSet.java
@@ -0,0 +1,247 @@
+package jalview.io.packed;
+
+import jalview.datamodel.AlignmentI;
+import jalview.io.AppletFormatAdapter;
+import jalview.io.FileParse;
+import jalview.io.FormatAdapter;
+import jalview.io.IdentifyFile;
+import jalview.io.packed.DataProvider.JvDataType;
+
+import java.io.BufferedReader;
+import java.util.ArrayList;
+import java.util.List;
+
+public class ParsePackedSet
+{
+
+  /**
+   * return results as a series of jalview.datamodel objects suitable for
+   * display
+   * 
+   * @param context
+   *          - context which is updated with new data
+   * @param files
+   *          - source data
+   * @return list of data objects added to context
+   * @throws Exception
+   */
+  public Object[] getAlignment(JalviewDataset context,
+          Iterable<DataProvider> files) throws Exception
+  {
+    List<Object> rslt = new ArrayList<Object>();
+    if (context == null)
+    {
+      context = new JalviewDataset();
+    }
+    boolean deuniquify = false;
+    for (DataProvider dta : files)
+    {
+      Exception exerror = null;
+      String errmsg = null;
+      FileParse src = dta.getDataSource();
+      if (dta.getType().equals(DataProvider.JvDataType.ALIGNMENT))
+      {
+        String fmt = null;
+        try
+        {
+          fmt = new IdentifyFile().Identify(src, false);
+        } catch (Exception ex)
+        {
+          exerror = ex;
+          errmsg = "Couldn't identify alignment format.";
+        }
+
+        if (fmt != null)
+        {
+          if (!FormatAdapter.isValidIOFormat(fmt, false))
+          {
+            errmsg = fmt;
+            exerror = null;
+          }
+          else
+          {
+            // parse the alignment
+            AlignmentI al = null;
+            try
+            {
+              al = new FormatAdapter().readFromFile(src, fmt);
+            } catch (Exception e)
+            {
+              errmsg = "Failed to parse alignment from result set";
+              exerror = e;
+            }
+            if (al != null)
+            {
+              // deuniquify and construct/merge additional dataset entries if
+              // necessary.
+              context.addAlignment(al);
+              rslt.add(al);
+              deuniquify = true;
+            }
+          }
+        }
+      }
+      if (dta.getType().equals(JvDataType.ANNOTATION))
+      {
+        if (!context.hasAlignments())
+        {
+          errmsg = "No alignment or sequence dataset to associate annotation with.";
+          // could duplicate the dataset reference here as default behaviour for
+          // sequence associated annotation ?
+        }
+        try
+        {
+          BufferedReader br;
+          if (src.getReader() instanceof BufferedReader)
+          {
+            br = (BufferedReader) src.getReader();
+          }
+          else
+          {
+            br = new BufferedReader(src.getReader());
+          }
+          new jalview.io.AnnotationFile().parseAnnotationFrom(
+                  context.getLastAlignment(), br);
+
+        } catch (Exception e)
+        {
+          errmsg = ((errmsg == null) ? "" : errmsg)
+                  + "Failed to parse the annotation file associated with the alignment.";
+          exerror = e;
+        }
+      }
+      if (dta.getType().equals(JvDataType.SEQASSOCATED))
+      {
+        if (!context.hasSequenceAssoc())
+        {
+          errmsg = "No sequence to associate data with.";
+
+        }
+        errmsg = "parsing of sequence associated data is not implemented";
+        exerror = new Exception(errmsg);
+      }
+      if (dta.getType().equals(JvDataType.FEATURES))
+      {
+        try
+        {
+          jalview.io.FeaturesFile ff = new jalview.io.FeaturesFile(src);
+          ff.parse(context.getLastAlignment(), context.featureColours,
+                  false);
+        } catch (Exception e)
+        {
+          errmsg = ("Failed to parse the Features file associated with the alignment.");
+          exerror = e;
+        }
+      }
+      if (dta.getType().equals(JvDataType.TREE))
+      {
+        try
+        {
+          jalview.io.NewickFile nf = new jalview.io.NewickFile(src);
+          if (!nf.isValid())
+          {
+            nf.close();
+            nf = null;
+          }
+          else
+          {
+            // do association to current alignment.
+
+            context.addTreeFromFile(nf);
+            rslt.add(nf);
+          }
+        } catch (Exception e)
+        {
+          errmsg = ("Failed to parse the treeFile associated with the result.");
+          exerror = e;
+        }
+
+      }
+
+    }
+    if (deuniquify)
+    {
+      context.getLastAlignmentSet().deuniquifyAlignment();
+    }
+    return rslt.toArray();
+  }
+
+  /**
+   * simple command line test. Arguments should be one or more pairs of
+   * <DataProvider.JvDataType> <Filename> arguments. The routine will attempt to
+   * read each source in turn, and report what kind of Jalview datamodel objects
+   * would be created.
+   * 
+   * @param args
+   */
+  public static void main(String args[])
+  {
+    // make data providers from the set of keys/files
+    int i = 0;
+    List<DataProvider> dp = new ArrayList<DataProvider>();
+    while ((i + 1) < args.length)
+    {
+      String type = args[i++];
+      final String file = args[i++];
+      final JvDataType jtype = DataProvider.JvDataType.valueOf(type
+              .toUpperCase());
+      if (jtype != null)
+      {
+        final FileParse fp;
+        try
+        {
+          fp = new FileParse(file, AppletFormatAdapter.checkProtocol(file));
+        } catch (Exception e)
+        {
+          System.err.println("Couldn't handle datasource of type " + jtype
+                  + " using URI " + file);
+          e.printStackTrace();
+          return;
+        }
+        dp.add(new SimpleDataProvider(jtype, fp, null));
+      }
+      else
+      {
+        System.out.println("Couldn't parse source type token '"
+                + type.toUpperCase() + "'");
+      }
+    }
+    if (i < args.length)
+    {
+      System.out.print("** WARNING\nIgnoring unused arguments:\n");
+      while (i < args.length)
+      {
+        System.out.print(" " + args[i]);
+      }
+      System.out.print("\n");
+
+    }
+    System.out.println("Now trying to parse set:");
+    JalviewDataset context;
+    Object[] newdm;
+    ParsePackedSet pps;
+    try
+    {
+      newdm = (pps = new ParsePackedSet()).getAlignment(
+              context = new JalviewDataset(), dp);
+    } catch (Exception e)
+    {
+      System.out.println("Test failed for these arguments.\n");
+      e.printStackTrace(System.out);
+      return;
+    }
+    if (newdm != null)
+    {
+      for (Object o : newdm)
+      {
+        System.out.println("Will need to create an " + o.getClass());
+      }
+
+      // now test uniquify/deuniquify stuff
+      // uniquify alignment and write alignment, annotation, features, and trees
+      // to buffers.
+      // import with deuniquify info, and compare results to input.
+
+    }
+  }
+}
diff --git a/src/jalview/io/packed/SimpleDataProvider.java b/src/jalview/io/packed/SimpleDataProvider.java
new file mode 100644
index 0000000..6b00ddb
--- /dev/null
+++ b/src/jalview/io/packed/SimpleDataProvider.java
@@ -0,0 +1,49 @@
+package jalview.io.packed;
+
+import jalview.io.FileParse;
+
+/**
+ * minimal implementation of the DataProvider interface.
+ * Allows a FileParse datasource to be specified as one of the DataProvider.JvDataType content types, with or without some other associated object as external reference.
+ */
+public class SimpleDataProvider implements DataProvider
+{
+  DataProvider.JvDataType jvtype;
+
+  FileParse source;
+
+  Object assocseq;
+
+  /**
+   * create a SimpleDataProvider
+   * @param type - contents of resource accessible via fp
+   * @param fp - datasource
+   * @param assoc - external object that fp's content should be associated with (may be null)
+   */
+  public SimpleDataProvider(DataProvider.JvDataType type, FileParse fp,
+          Object assoc)
+  {
+    jvtype = type;
+    source = fp;
+    assocseq = assoc;
+  }
+
+  @Override
+  public FileParse getDataSource()
+  {
+    return source;
+  }
+
+  @Override
+  public Object getSequenceTarget()
+  {
+    return assocseq;
+  }
+
+  @Override
+  public DataProvider.JvDataType getType()
+  {
+    return jvtype;
+  }
+
+}
\ No newline at end of file
-- 
1.7.10.2