From e9d517ba16a71d6070fbf38df19b539b66d83425 Mon Sep 17 00:00:00 2001 From: jprocter Date: Thu, 9 Dec 2010 15:59:29 +0000 Subject: [PATCH] initial implementation for JAL-718 --- src/jalview/io/packed/DataProvider.java | 61 ++++++ src/jalview/io/packed/JalviewDataset.java | 176 ++++++++++++++++++ src/jalview/io/packed/ParsePackedSet.java | 247 +++++++++++++++++++++++++ src/jalview/io/packed/SimpleDataProvider.java | 49 +++++ 4 files changed, 533 insertions(+) create mode 100644 src/jalview/io/packed/DataProvider.java create mode 100644 src/jalview/io/packed/JalviewDataset.java create mode 100644 src/jalview/io/packed/ParsePackedSet.java create mode 100644 src/jalview/io/packed/SimpleDataProvider.java diff --git a/src/jalview/io/packed/DataProvider.java b/src/jalview/io/packed/DataProvider.java new file mode 100644 index 0000000..964d85f --- /dev/null +++ b/src/jalview/io/packed/DataProvider.java @@ -0,0 +1,61 @@ +package jalview.io.packed; + +/** + * API for a data provider that can be used with jalview.io.packed.ParsePackedSet + * @author JimP + * + */ +public interface DataProvider +{ + /** + * class of data expected to be provided by datasource + * @author JimP + * + */ + public enum JvDataType + { + /** + * any alignment flatfile recognisable by jalview.io.IdentifyFile + */ + ALIGNMENT, + /** + * a jalview annotation file + */ + ANNOTATION, + /** + * a GFF or Jalview features file + */ + FEATURES, + /** + * a tree representation understood by the NewickFile parser + */ + TREE, + /** + * any file that provides data that should be associated with a specified sequence. + */ + SEQASSOCATED; + } + + /** + * data to be parsed according to its type. Each call to getDataSource + * should return a new instance of the same data stream initialised to the + * beginning of the chunk of data that is to be parsed. + * + * @return + */ + jalview.io.FileParse getDataSource(); + + /** + * association context for data. Either null or a specific sequence. + * + * @return + */ + Object getSequenceTarget(); + + /** + * type of data + * + * @return + */ + DataProvider.JvDataType getType(); +} \ No newline at end of file diff --git a/src/jalview/io/packed/JalviewDataset.java b/src/jalview/io/packed/JalviewDataset.java new file mode 100644 index 0000000..1e06636 --- /dev/null +++ b/src/jalview/io/packed/JalviewDataset.java @@ -0,0 +1,176 @@ +package jalview.io.packed; + +import jalview.datamodel.AlignmentI; +import jalview.datamodel.SequenceI; +import jalview.io.NewickFile; + +import java.util.ArrayList; +import java.util.Hashtable; +import java.util.List; + +public class JalviewDataset +{ + /** + * dataset that new data (sequences, alignments) will be added to + */ + AlignmentI parentDataset; + + /** + * current alignment being worked on. + */ + List al; + public class AlignmentSet { + AlignmentI al; + List trees; + AlignmentSet(AlignmentI a) { + al = a; + trees = new ArrayList(); + } + /** + * deuniquify the current alignment in the context, merging any new + * annotation/features with the existing set + * + * @param context + */ + void deuniquifyAlignment() + { + if (seqDetails==null || seqDetails.size()==0) + { + // nothing to do + return; + } + // 1. recover correct names and attributes for each sequence in alignment. + /* + * TODO: housekeeping w.r.t. recovery of dataset and annotation + * references for input sequences, and then dataset sequence creation + * for new sequences retrieved from service // finally, attempt to + * de-uniquify to recover input sequence identity, and try to map back + * onto dataset Note: this + * jalview.analysis.SeqsetUtils.deuniquify(SeqNames, alseqs, true); will + * NOT WORK - the returned alignment may contain multiple versions of + * the input sequence, each being a subsequence of the original. + * deuniquify also removes existing annotation and features added in the + * previous step... al.setDataset(dataset); // add in new sequences + * retrieved from sequence search which are not already in dataset. // + * trigger a 'fetchDBids' to annotate sequences with database ids... + */ + //jalview.analysis.SeqsetUtils.deuniquifyAndMerge(parentDataset, seqDetails, al,true); + + jalview.analysis.SeqsetUtils.deuniquify(seqDetails, al.getSequencesArray(),true); + // 2. Update names of associated nodes in any trees + for (NewickFile nf:trees) + { + // the following works because all trees are already had node/SequenceI associations created. + jalview.analysis.NJTree njt = new jalview.analysis.NJTree(al.getSequencesArray(), nf); + // this just updates the displayed leaf name on the try according to the SequenceIs. + njt.renameAssociatedNodes(); + } + + } + } + + /** + * current set of feature colours + */ + Hashtable featureColours; + + /** + * original identity of each sequence in results + */ + Hashtable seqDetails; + + public JalviewDataset() + { + seqDetails = new Hashtable(); + al = new ArrayList(); + parentDataset = null; + featureColours = new Hashtable(); + } + /** + * context created from an existing alignment. + * @param parentAlignment + */ + public JalviewDataset(AlignmentI aldataset, Hashtable fc, Hashtable seqDets) + { + this(aldataset, fc, seqDets, null); + } + /** + * + * @param aldataset - parent dataset for any new alignment/sequence data (must not be null) + * @param fc (may be null) feature settings for the alignment where new feature renderstyles are stored + * @param seqDets - (may be null) anonymised sequence information created by Sequence uniquifier + * @param parentAlignment (may be null) alignment to associate new annotation and trees with. + */ + public JalviewDataset(AlignmentI aldataset, Hashtable fc, Hashtable seqDets, AlignmentI parentAlignment) + { + this(); + parentDataset = aldataset; + if (parentAlignment!=null) + { + parentDataset = parentAlignment.getDataset(); + if (parentDataset==null) + { + parentDataset = parentAlignment; + } else { + addAlignment(parentAlignment); + } + } + if (seqDets!=null) + { + seqDetails = seqDets; + } + if (fc!=null) + { + featureColours = fc; + } + + + } + + public boolean hasAlignments() + { + return al!=null && al.size()>0; + } + + public AlignmentI getLastAlignment() + { + return (al==null || al.size()<1) ? null: al.get(al.size()-1).al; + } + public AlignmentSet getLastAlignmentSet() + { + return (al==null || al.size()<1) ? null: al.get(al.size()-1); + } + + /** + * post process (deuniquify) the current alignment and its dependent data, and then add newal to the dataset. + * @param newal + */ +public void addAlignment(AlignmentI newal) { + if (!hasAlignments()) + { + al = new ArrayList(); + } + AlignmentSet last = getLastAlignmentSet(); + if (last!=null) { + System.err.println("Deuniquifying last alignment set."); + last.deuniquifyAlignment(); + } + al.add(new AlignmentSet(newal)); +} + +public void addTreeFromFile(NewickFile nf) +{ + AlignmentSet lal = getLastAlignmentSet(); + lal.trees.add(nf); +} + +public boolean hasSequenceAssoc() +{ + // TODO: discover where sequence associated data should be put. + return false; +} +public SequenceI getLastAssociatedSequence() { + // TODO: delineate semantics for associating uniquified data with potentially de-uniquified sequence. + return null; +} +} \ No newline at end of file diff --git a/src/jalview/io/packed/ParsePackedSet.java b/src/jalview/io/packed/ParsePackedSet.java new file mode 100644 index 0000000..58892ed --- /dev/null +++ b/src/jalview/io/packed/ParsePackedSet.java @@ -0,0 +1,247 @@ +package jalview.io.packed; + +import jalview.datamodel.AlignmentI; +import jalview.io.AppletFormatAdapter; +import jalview.io.FileParse; +import jalview.io.FormatAdapter; +import jalview.io.IdentifyFile; +import jalview.io.packed.DataProvider.JvDataType; + +import java.io.BufferedReader; +import java.util.ArrayList; +import java.util.List; + +public class ParsePackedSet +{ + + /** + * return results as a series of jalview.datamodel objects suitable for + * display + * + * @param context + * - context which is updated with new data + * @param files + * - source data + * @return list of data objects added to context + * @throws Exception + */ + public Object[] getAlignment(JalviewDataset context, + Iterable files) throws Exception + { + List rslt = new ArrayList(); + if (context == null) + { + context = new JalviewDataset(); + } + boolean deuniquify = false; + for (DataProvider dta : files) + { + Exception exerror = null; + String errmsg = null; + FileParse src = dta.getDataSource(); + if (dta.getType().equals(DataProvider.JvDataType.ALIGNMENT)) + { + String fmt = null; + try + { + fmt = new IdentifyFile().Identify(src, false); + } catch (Exception ex) + { + exerror = ex; + errmsg = "Couldn't identify alignment format."; + } + + if (fmt != null) + { + if (!FormatAdapter.isValidIOFormat(fmt, false)) + { + errmsg = fmt; + exerror = null; + } + else + { + // parse the alignment + AlignmentI al = null; + try + { + al = new FormatAdapter().readFromFile(src, fmt); + } catch (Exception e) + { + errmsg = "Failed to parse alignment from result set"; + exerror = e; + } + if (al != null) + { + // deuniquify and construct/merge additional dataset entries if + // necessary. + context.addAlignment(al); + rslt.add(al); + deuniquify = true; + } + } + } + } + if (dta.getType().equals(JvDataType.ANNOTATION)) + { + if (!context.hasAlignments()) + { + errmsg = "No alignment or sequence dataset to associate annotation with."; + // could duplicate the dataset reference here as default behaviour for + // sequence associated annotation ? + } + try + { + BufferedReader br; + if (src.getReader() instanceof BufferedReader) + { + br = (BufferedReader) src.getReader(); + } + else + { + br = new BufferedReader(src.getReader()); + } + new jalview.io.AnnotationFile().parseAnnotationFrom( + context.getLastAlignment(), br); + + } catch (Exception e) + { + errmsg = ((errmsg == null) ? "" : errmsg) + + "Failed to parse the annotation file associated with the alignment."; + exerror = e; + } + } + if (dta.getType().equals(JvDataType.SEQASSOCATED)) + { + if (!context.hasSequenceAssoc()) + { + errmsg = "No sequence to associate data with."; + + } + errmsg = "parsing of sequence associated data is not implemented"; + exerror = new Exception(errmsg); + } + if (dta.getType().equals(JvDataType.FEATURES)) + { + try + { + jalview.io.FeaturesFile ff = new jalview.io.FeaturesFile(src); + ff.parse(context.getLastAlignment(), context.featureColours, + false); + } catch (Exception e) + { + errmsg = ("Failed to parse the Features file associated with the alignment."); + exerror = e; + } + } + if (dta.getType().equals(JvDataType.TREE)) + { + try + { + jalview.io.NewickFile nf = new jalview.io.NewickFile(src); + if (!nf.isValid()) + { + nf.close(); + nf = null; + } + else + { + // do association to current alignment. + + context.addTreeFromFile(nf); + rslt.add(nf); + } + } catch (Exception e) + { + errmsg = ("Failed to parse the treeFile associated with the result."); + exerror = e; + } + + } + + } + if (deuniquify) + { + context.getLastAlignmentSet().deuniquifyAlignment(); + } + return rslt.toArray(); + } + + /** + * simple command line test. Arguments should be one or more pairs of + * arguments. The routine will attempt to + * read each source in turn, and report what kind of Jalview datamodel objects + * would be created. + * + * @param args + */ + public static void main(String args[]) + { + // make data providers from the set of keys/files + int i = 0; + List dp = new ArrayList(); + while ((i + 1) < args.length) + { + String type = args[i++]; + final String file = args[i++]; + final JvDataType jtype = DataProvider.JvDataType.valueOf(type + .toUpperCase()); + if (jtype != null) + { + final FileParse fp; + try + { + fp = new FileParse(file, AppletFormatAdapter.checkProtocol(file)); + } catch (Exception e) + { + System.err.println("Couldn't handle datasource of type " + jtype + + " using URI " + file); + e.printStackTrace(); + return; + } + dp.add(new SimpleDataProvider(jtype, fp, null)); + } + else + { + System.out.println("Couldn't parse source type token '" + + type.toUpperCase() + "'"); + } + } + if (i < args.length) + { + System.out.print("** WARNING\nIgnoring unused arguments:\n"); + while (i < args.length) + { + System.out.print(" " + args[i]); + } + System.out.print("\n"); + + } + System.out.println("Now trying to parse set:"); + JalviewDataset context; + Object[] newdm; + ParsePackedSet pps; + try + { + newdm = (pps = new ParsePackedSet()).getAlignment( + context = new JalviewDataset(), dp); + } catch (Exception e) + { + System.out.println("Test failed for these arguments.\n"); + e.printStackTrace(System.out); + return; + } + if (newdm != null) + { + for (Object o : newdm) + { + System.out.println("Will need to create an " + o.getClass()); + } + + // now test uniquify/deuniquify stuff + // uniquify alignment and write alignment, annotation, features, and trees + // to buffers. + // import with deuniquify info, and compare results to input. + + } + } +} diff --git a/src/jalview/io/packed/SimpleDataProvider.java b/src/jalview/io/packed/SimpleDataProvider.java new file mode 100644 index 0000000..6b00ddb --- /dev/null +++ b/src/jalview/io/packed/SimpleDataProvider.java @@ -0,0 +1,49 @@ +package jalview.io.packed; + +import jalview.io.FileParse; + +/** + * minimal implementation of the DataProvider interface. + * Allows a FileParse datasource to be specified as one of the DataProvider.JvDataType content types, with or without some other associated object as external reference. + */ +public class SimpleDataProvider implements DataProvider +{ + DataProvider.JvDataType jvtype; + + FileParse source; + + Object assocseq; + + /** + * create a SimpleDataProvider + * @param type - contents of resource accessible via fp + * @param fp - datasource + * @param assoc - external object that fp's content should be associated with (may be null) + */ + public SimpleDataProvider(DataProvider.JvDataType type, FileParse fp, + Object assoc) + { + jvtype = type; + source = fp; + assocseq = assoc; + } + + @Override + public FileParse getDataSource() + { + return source; + } + + @Override + public Object getSequenceTarget() + { + return assocseq; + } + + @Override + public DataProvider.JvDataType getType() + { + return jvtype; + } + +} \ No newline at end of file -- 1.7.10.2