From 91d9dd665cd82447439e23df334a59e93647a8d0 Mon Sep 17 00:00:00 2001 From: jprocter Date: Wed, 7 Dec 2005 16:33:31 +0000 Subject: [PATCH] Sequence uniquify/deuniquify now uses the SequenceIdMatcher to cope with mangled 'safe sequence names', and JPredClient uses these methods. --- src/jalview/analysis/SeqsetUtils.java | 85 +++++++++++++++++++++++++++------ src/jalview/ws/JPredClient.java | 20 ++++---- 2 files changed, 82 insertions(+), 23 deletions(-) diff --git a/src/jalview/analysis/SeqsetUtils.java b/src/jalview/analysis/SeqsetUtils.java index 564e3f1..4432414 100755 --- a/src/jalview/analysis/SeqsetUtils.java +++ b/src/jalview/analysis/SeqsetUtils.java @@ -114,6 +114,14 @@ public class SeqsetUtils return new String("Sequence" + i); } + /** + * Generates a hash of SeqCharacterHash properties for each sequence + * in a sequence set, and optionally renames the sequences to an + * unambiguous 'safe' name. + * @param sequences SequenceI[] + * @param write_names boolean set this to rename each of the sequences to its unique_name(index) name + * @return Hashtable to be passed to @see deuniquify to recover original names (and properties) for renamed sequences + */ public static Hashtable uniquify(SequenceI[] sequences, boolean write_names) { // Generate a safely named sequence set and a hash to recover the sequence names @@ -134,25 +142,72 @@ public class SeqsetUtils return map; } - + /** + * recover unsafe sequence names and original properties for a sequence + * set using a map generated by @see uniquify(sequences,true) + * @param map Hashtable + * @param sequences SequenceI[] + * @return boolean + */ public static boolean deuniquify(Hashtable map, SequenceI[] sequences) { - // recover unsafe sequence names for a sequence set - boolean allfound = true; - - for (int i = 0; i < sequences.length; i++) - { - if (map.containsKey(sequences[i].getName())) - { - Hashtable sqinfo = (Hashtable) map.get(sequences[i].getName()); - SeqCharacterUnhash(sequences[i], sqinfo); - } - else - { - allfound = false; + jalview.analysis.SequenceIdMatcher matcher = new SequenceIdMatcher(sequences); + SequenceI msq = null; + Enumeration keys = map.keys(); + Vector unmatched = new Vector(); + for (int i=0, j=sequences.length; i0) { + System.err.println("Did not find matches for :"); + for (Enumeration i = unmatched.elements(); i.hasMoreElements(); System.out.println(((SequenceI) i.nextElement()).getName())) + ; + return false; + } - return allfound; + return true; + } + /** + * returns a subset of the sequenceI seuqences, + * including only those that contain at least one residue. + * @param sequences SequenceI[] + * @return SequenceI[] + */ + public static SequenceI[] getNonEmptySequenceSet(SequenceI[] sequences) { + // Identify first row of alignment with residues for prediction + boolean ungapped[] = new boolean[sequences.length]; + int msflen=0; + for (int i=0,j=sequences.length; i