--- /dev/null
+package jalview.io.packed;
+
+/**
+ * API for a data provider that can be used with jalview.io.packed.ParsePackedSet
+ * @author JimP
+ *
+ */
+public interface DataProvider
+{
+ /**
+ * class of data expected to be provided by datasource
+ * @author JimP
+ *
+ */
+ public enum JvDataType
+ {
+ /**
+ * any alignment flatfile recognisable by jalview.io.IdentifyFile
+ */
+ ALIGNMENT,
+ /**
+ * a jalview annotation file
+ */
+ ANNOTATION,
+ /**
+ * a GFF or Jalview features file
+ */
+ FEATURES,
+ /**
+ * a tree representation understood by the NewickFile parser
+ */
+ TREE,
+ /**
+ * any file that provides data that should be associated with a specified sequence.
+ */
+ SEQASSOCATED;
+ }
+
+ /**
+ * data to be parsed according to its type. Each call to getDataSource
+ * should return a new instance of the same data stream initialised to the
+ * beginning of the chunk of data that is to be parsed.
+ *
+ * @return
+ */
+ jalview.io.FileParse getDataSource();
+
+ /**
+ * association context for data. Either null or a specific sequence.
+ *
+ * @return
+ */
+ Object getSequenceTarget();
+
+ /**
+ * type of data
+ *
+ * @return
+ */
+ DataProvider.JvDataType getType();
+}
\ No newline at end of file
--- /dev/null
+package jalview.io.packed;
+
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.SequenceI;
+import jalview.io.NewickFile;
+
+import java.util.ArrayList;
+import java.util.Hashtable;
+import java.util.List;
+
+public class JalviewDataset
+{
+ /**
+ * dataset that new data (sequences, alignments) will be added to
+ */
+ AlignmentI parentDataset;
+
+ /**
+ * current alignment being worked on.
+ */
+ List<AlignmentSet> al;
+ public class AlignmentSet {
+ AlignmentI al;
+ List<jalview.io.NewickFile> trees;
+ AlignmentSet(AlignmentI a) {
+ al = a;
+ trees = new ArrayList<jalview.io.NewickFile>();
+ }
+ /**
+ * deuniquify the current alignment in the context, merging any new
+ * annotation/features with the existing set
+ *
+ * @param context
+ */
+ void deuniquifyAlignment()
+ {
+ if (seqDetails==null || seqDetails.size()==0)
+ {
+ // nothing to do
+ return;
+ }
+ // 1. recover correct names and attributes for each sequence in alignment.
+ /*
+ * TODO: housekeeping w.r.t. recovery of dataset and annotation
+ * references for input sequences, and then dataset sequence creation
+ * for new sequences retrieved from service // finally, attempt to
+ * de-uniquify to recover input sequence identity, and try to map back
+ * onto dataset Note: this
+ * jalview.analysis.SeqsetUtils.deuniquify(SeqNames, alseqs, true); will
+ * NOT WORK - the returned alignment may contain multiple versions of
+ * the input sequence, each being a subsequence of the original.
+ * deuniquify also removes existing annotation and features added in the
+ * previous step... al.setDataset(dataset); // add in new sequences
+ * retrieved from sequence search which are not already in dataset. //
+ * trigger a 'fetchDBids' to annotate sequences with database ids...
+ */
+ //jalview.analysis.SeqsetUtils.deuniquifyAndMerge(parentDataset, seqDetails, al,true);
+
+ jalview.analysis.SeqsetUtils.deuniquify(seqDetails, al.getSequencesArray(),true);
+ // 2. Update names of associated nodes in any trees
+ for (NewickFile nf:trees)
+ {
+ // the following works because all trees are already had node/SequenceI associations created.
+ jalview.analysis.NJTree njt = new jalview.analysis.NJTree(al.getSequencesArray(), nf);
+ // this just updates the displayed leaf name on the try according to the SequenceIs.
+ njt.renameAssociatedNodes();
+ }
+
+ }
+ }
+
+ /**
+ * current set of feature colours
+ */
+ Hashtable featureColours;
+
+ /**
+ * original identity of each sequence in results
+ */
+ Hashtable seqDetails;
+
+ public JalviewDataset()
+ {
+ seqDetails = new Hashtable();
+ al = new ArrayList<AlignmentSet>();
+ parentDataset = null;
+ featureColours = new Hashtable();
+ }
+ /**
+ * context created from an existing alignment.
+ * @param parentAlignment
+ */
+ public JalviewDataset(AlignmentI aldataset, Hashtable fc, Hashtable seqDets)
+ {
+ this(aldataset, fc, seqDets, null);
+ }
+ /**
+ *
+ * @param aldataset - parent dataset for any new alignment/sequence data (must not be null)
+ * @param fc (may be null) feature settings for the alignment where new feature renderstyles are stored
+ * @param seqDets - (may be null) anonymised sequence information created by Sequence uniquifier
+ * @param parentAlignment (may be null) alignment to associate new annotation and trees with.
+ */
+ public JalviewDataset(AlignmentI aldataset, Hashtable fc, Hashtable seqDets, AlignmentI parentAlignment)
+ {
+ this();
+ parentDataset = aldataset;
+ if (parentAlignment!=null)
+ {
+ parentDataset = parentAlignment.getDataset();
+ if (parentDataset==null)
+ {
+ parentDataset = parentAlignment;
+ } else {
+ addAlignment(parentAlignment);
+ }
+ }
+ if (seqDets!=null)
+ {
+ seqDetails = seqDets;
+ }
+ if (fc!=null)
+ {
+ featureColours = fc;
+ }
+
+
+ }
+
+ public boolean hasAlignments()
+ {
+ return al!=null && al.size()>0;
+ }
+
+ public AlignmentI getLastAlignment()
+ {
+ return (al==null || al.size()<1) ? null: al.get(al.size()-1).al;
+ }
+ public AlignmentSet getLastAlignmentSet()
+ {
+ return (al==null || al.size()<1) ? null: al.get(al.size()-1);
+ }
+
+ /**
+ * post process (deuniquify) the current alignment and its dependent data, and then add newal to the dataset.
+ * @param newal
+ */
+public void addAlignment(AlignmentI newal) {
+ if (!hasAlignments())
+ {
+ al = new ArrayList<AlignmentSet>();
+ }
+ AlignmentSet last = getLastAlignmentSet();
+ if (last!=null) {
+ System.err.println("Deuniquifying last alignment set.");
+ last.deuniquifyAlignment();
+ }
+ al.add(new AlignmentSet(newal));
+}
+
+public void addTreeFromFile(NewickFile nf)
+{
+ AlignmentSet lal = getLastAlignmentSet();
+ lal.trees.add(nf);
+}
+
+public boolean hasSequenceAssoc()
+{
+ // TODO: discover where sequence associated data should be put.
+ return false;
+}
+public SequenceI getLastAssociatedSequence() {
+ // TODO: delineate semantics for associating uniquified data with potentially de-uniquified sequence.
+ return null;
+}
+}
\ No newline at end of file
--- /dev/null
+package jalview.io.packed;
+
+import jalview.datamodel.AlignmentI;
+import jalview.io.AppletFormatAdapter;
+import jalview.io.FileParse;
+import jalview.io.FormatAdapter;
+import jalview.io.IdentifyFile;
+import jalview.io.packed.DataProvider.JvDataType;
+
+import java.io.BufferedReader;
+import java.util.ArrayList;
+import java.util.List;
+
+public class ParsePackedSet
+{
+
+ /**
+ * return results as a series of jalview.datamodel objects suitable for
+ * display
+ *
+ * @param context
+ * - context which is updated with new data
+ * @param files
+ * - source data
+ * @return list of data objects added to context
+ * @throws Exception
+ */
+ public Object[] getAlignment(JalviewDataset context,
+ Iterable<DataProvider> files) throws Exception
+ {
+ List<Object> rslt = new ArrayList<Object>();
+ if (context == null)
+ {
+ context = new JalviewDataset();
+ }
+ boolean deuniquify = false;
+ for (DataProvider dta : files)
+ {
+ Exception exerror = null;
+ String errmsg = null;
+ FileParse src = dta.getDataSource();
+ if (dta.getType().equals(DataProvider.JvDataType.ALIGNMENT))
+ {
+ String fmt = null;
+ try
+ {
+ fmt = new IdentifyFile().Identify(src, false);
+ } catch (Exception ex)
+ {
+ exerror = ex;
+ errmsg = "Couldn't identify alignment format.";
+ }
+
+ if (fmt != null)
+ {
+ if (!FormatAdapter.isValidIOFormat(fmt, false))
+ {
+ errmsg = fmt;
+ exerror = null;
+ }
+ else
+ {
+ // parse the alignment
+ AlignmentI al = null;
+ try
+ {
+ al = new FormatAdapter().readFromFile(src, fmt);
+ } catch (Exception e)
+ {
+ errmsg = "Failed to parse alignment from result set";
+ exerror = e;
+ }
+ if (al != null)
+ {
+ // deuniquify and construct/merge additional dataset entries if
+ // necessary.
+ context.addAlignment(al);
+ rslt.add(al);
+ deuniquify = true;
+ }
+ }
+ }
+ }
+ if (dta.getType().equals(JvDataType.ANNOTATION))
+ {
+ if (!context.hasAlignments())
+ {
+ errmsg = "No alignment or sequence dataset to associate annotation with.";
+ // could duplicate the dataset reference here as default behaviour for
+ // sequence associated annotation ?
+ }
+ try
+ {
+ BufferedReader br;
+ if (src.getReader() instanceof BufferedReader)
+ {
+ br = (BufferedReader) src.getReader();
+ }
+ else
+ {
+ br = new BufferedReader(src.getReader());
+ }
+ new jalview.io.AnnotationFile().parseAnnotationFrom(
+ context.getLastAlignment(), br);
+
+ } catch (Exception e)
+ {
+ errmsg = ((errmsg == null) ? "" : errmsg)
+ + "Failed to parse the annotation file associated with the alignment.";
+ exerror = e;
+ }
+ }
+ if (dta.getType().equals(JvDataType.SEQASSOCATED))
+ {
+ if (!context.hasSequenceAssoc())
+ {
+ errmsg = "No sequence to associate data with.";
+
+ }
+ errmsg = "parsing of sequence associated data is not implemented";
+ exerror = new Exception(errmsg);
+ }
+ if (dta.getType().equals(JvDataType.FEATURES))
+ {
+ try
+ {
+ jalview.io.FeaturesFile ff = new jalview.io.FeaturesFile(src);
+ ff.parse(context.getLastAlignment(), context.featureColours,
+ false);
+ } catch (Exception e)
+ {
+ errmsg = ("Failed to parse the Features file associated with the alignment.");
+ exerror = e;
+ }
+ }
+ if (dta.getType().equals(JvDataType.TREE))
+ {
+ try
+ {
+ jalview.io.NewickFile nf = new jalview.io.NewickFile(src);
+ if (!nf.isValid())
+ {
+ nf.close();
+ nf = null;
+ }
+ else
+ {
+ // do association to current alignment.
+
+ context.addTreeFromFile(nf);
+ rslt.add(nf);
+ }
+ } catch (Exception e)
+ {
+ errmsg = ("Failed to parse the treeFile associated with the result.");
+ exerror = e;
+ }
+
+ }
+
+ }
+ if (deuniquify)
+ {
+ context.getLastAlignmentSet().deuniquifyAlignment();
+ }
+ return rslt.toArray();
+ }
+
+ /**
+ * simple command line test. Arguments should be one or more pairs of
+ * <DataProvider.JvDataType> <Filename> arguments. The routine will attempt to
+ * read each source in turn, and report what kind of Jalview datamodel objects
+ * would be created.
+ *
+ * @param args
+ */
+ public static void main(String args[])
+ {
+ // make data providers from the set of keys/files
+ int i = 0;
+ List<DataProvider> dp = new ArrayList<DataProvider>();
+ while ((i + 1) < args.length)
+ {
+ String type = args[i++];
+ final String file = args[i++];
+ final JvDataType jtype = DataProvider.JvDataType.valueOf(type
+ .toUpperCase());
+ if (jtype != null)
+ {
+ final FileParse fp;
+ try
+ {
+ fp = new FileParse(file, AppletFormatAdapter.checkProtocol(file));
+ } catch (Exception e)
+ {
+ System.err.println("Couldn't handle datasource of type " + jtype
+ + " using URI " + file);
+ e.printStackTrace();
+ return;
+ }
+ dp.add(new SimpleDataProvider(jtype, fp, null));
+ }
+ else
+ {
+ System.out.println("Couldn't parse source type token '"
+ + type.toUpperCase() + "'");
+ }
+ }
+ if (i < args.length)
+ {
+ System.out.print("** WARNING\nIgnoring unused arguments:\n");
+ while (i < args.length)
+ {
+ System.out.print(" " + args[i]);
+ }
+ System.out.print("\n");
+
+ }
+ System.out.println("Now trying to parse set:");
+ JalviewDataset context;
+ Object[] newdm;
+ ParsePackedSet pps;
+ try
+ {
+ newdm = (pps = new ParsePackedSet()).getAlignment(
+ context = new JalviewDataset(), dp);
+ } catch (Exception e)
+ {
+ System.out.println("Test failed for these arguments.\n");
+ e.printStackTrace(System.out);
+ return;
+ }
+ if (newdm != null)
+ {
+ for (Object o : newdm)
+ {
+ System.out.println("Will need to create an " + o.getClass());
+ }
+
+ // now test uniquify/deuniquify stuff
+ // uniquify alignment and write alignment, annotation, features, and trees
+ // to buffers.
+ // import with deuniquify info, and compare results to input.
+
+ }
+ }
+}
--- /dev/null
+package jalview.io.packed;
+
+import jalview.io.FileParse;
+
+/**
+ * minimal implementation of the DataProvider interface.
+ * Allows a FileParse datasource to be specified as one of the DataProvider.JvDataType content types, with or without some other associated object as external reference.
+ */
+public class SimpleDataProvider implements DataProvider
+{
+ DataProvider.JvDataType jvtype;
+
+ FileParse source;
+
+ Object assocseq;
+
+ /**
+ * create a SimpleDataProvider
+ * @param type - contents of resource accessible via fp
+ * @param fp - datasource
+ * @param assoc - external object that fp's content should be associated with (may be null)
+ */
+ public SimpleDataProvider(DataProvider.JvDataType type, FileParse fp,
+ Object assoc)
+ {
+ jvtype = type;
+ source = fp;
+ assocseq = assoc;
+ }
+
+ @Override
+ public FileParse getDataSource()
+ {
+ return source;
+ }
+
+ @Override
+ public Object getSequenceTarget()
+ {
+ return assocseq;
+ }
+
+ @Override
+ public DataProvider.JvDataType getType()
+ {
+ return jvtype;
+ }
+
+}
\ No newline at end of file