From: jprocter Date: Fri, 21 Jan 2011 17:05:43 +0000 (+0000) Subject: relaxed ID matching parameter JAL-753 X-Git-Tag: Release_2_7~282 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=20c600dc8c628f5a82cc32b1fe026291756f1d8e;p=jalview.git relaxed ID matching parameter JAL-753 --- diff --git a/src/jalview/io/FeaturesFile.java b/src/jalview/io/FeaturesFile.java index 64bb539..34253cb 100755 --- a/src/jalview/io/FeaturesFile.java +++ b/src/jalview/io/FeaturesFile.java @@ -20,6 +20,7 @@ package jalview.io; import java.io.*; import java.util.*; +import jalview.analysis.SequenceIdMatcher; import jalview.datamodel.*; import jalview.schemes.*; import jalview.util.Format; @@ -70,28 +71,60 @@ public class FeaturesFile extends AlignFile } /** - * The Application can render HTML, but the applet will remove HTML tags and - * replace links with %LINK% Both need to read links in HTML however - * - * @throws IOException - * DOCUMENT ME! + * Parse GFF or sequence features file using case-independent matching, discarding URLs + * @param align - alignment/dataset containing sequences that are to be annotated + * @param colours - hashtable to store feature colour definitions + * @param removeHTML - process html strings into plain text + * @return true if features were added */ public boolean parse(AlignmentI align, Hashtable colours, boolean removeHTML) { - return parse(align, colours, null, removeHTML); + return parse(align, colours, null, removeHTML, false); } /** - * The Application can render HTML, but the applet will remove HTML tags and - * replace links with %LINK% Both need to read links in HTML however - * - * @throws IOException - * DOCUMENT ME! + * Parse GFF or sequence features file optionally using case-independent matching, discarding URLs + * @param align - alignment/dataset containing sequences that are to be annotated + * @param colours - hashtable to store feature colour definitions + * @param removeHTML - process html strings into plain text + * @param relaxedIdmatching - when true, ID matches to compound sequence IDs are allowed + * @return true if features were added + */ + public boolean parse(AlignmentI align, + Hashtable colours, boolean removeHTML, boolean relaxedIdMatching) + { + return parse(align, colours, null, removeHTML, relaxedIdMatching); + } + + /** + * Parse GFF or sequence features file optionally using case-independent matching + * @param align - alignment/dataset containing sequences that are to be annotated + * @param colours - hashtable to store feature colour definitions + * @param featureLink - hashtable to store associated URLs + * @param removeHTML - process html strings into plain text + * @return true if features were added */ public boolean parse(AlignmentI align, Hashtable colours, Hashtable featureLink, boolean removeHTML) { + return parse(align, colours, featureLink, removeHTML, false); + } + + /** + /** + * Parse GFF or sequence features file + * @param align - alignment/dataset containing sequences that are to be annotated + * @param colours - hashtable to store feature colour definitions + * @param featureLink - hashtable to store associated URLs + * @param removeHTML - process html strings into plain text + * @param relaxedIdmatching - when true, ID matches to compound sequence IDs are allowed + * @return true if features were added + */ + public boolean parse(AlignmentI align, + Hashtable colours, Hashtable featureLink, boolean removeHTML, boolean relaxedIdmatching) + { + String line = null; try { @@ -369,7 +402,7 @@ public class FeaturesFile extends AlignFile // Still possible this is an old Jalview file, // which does not have type colours at the beginning seqId = token = st.nextToken(); - seq = align.findName(seqId, true); + seq = findName(align, seqId, relaxedIdmatching); if (seq != null) { desc = st.nextToken(); @@ -478,7 +511,7 @@ public class FeaturesFile extends AlignFile if (!token.equals("ID_NOT_SPECIFIED")) { - seq = align.findName(seqId = token, true); + seq = findName(align, seqId = token, relaxedIdmatching); st.nextToken(); } else @@ -548,17 +581,52 @@ public class FeaturesFile extends AlignFile GFFFile = false; } } + resetMatcher(); } catch (Exception ex) { System.out.println(line); System.out.println("Error parsing feature file: " + ex + "\n" + line); ex.printStackTrace(System.err); + resetMatcher(); return false; } return true; } + private AlignmentI lastmatchedAl = null; + + private SequenceIdMatcher matcher = null; + + /** + * clear any temporary handles used to speed up ID matching + */ + private void resetMatcher() + { + lastmatchedAl = null; + matcher = null; + } + + private SequenceI findName(AlignmentI align, String seqId, + boolean relaxedIdMatching) + { + SequenceI match = null; + if (relaxedIdMatching) + { + if (lastmatchedAl != align) + { + matcher = new SequenceIdMatcher( + (lastmatchedAl = align).getSequencesArray()); + } + match = matcher.findIdMatch(seqId); + } + else + { + match = align.findName(seqId, true); + } + return match; + } + public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML) { if (sf.getDescription() == null) @@ -929,19 +997,18 @@ public class FeaturesFile extends AlignFile out.append(source); out.append("\t"); out.append(next[j].type); - out.append( "\t"); - out.append( next[j].begin ); out.append("\t"); - out.append( - next[j].end); - out.append( "\t"); - out.append( next[j].score); - out.append( "\t"); + out.append(next[j].begin); + out.append("\t"); + out.append(next[j].end); + out.append("\t"); + out.append(next[j].score); + out.append("\t"); if (next[j].getValue("STRAND") != null) { out.append(next[j].getValue("STRAND")); - out.append( "\t"); + out.append("\t"); } else { @@ -956,7 +1023,8 @@ public class FeaturesFile extends AlignFile { out.append("."); } - // TODO: verify/check GFF - should there be a /t here before attribute output ? + // TODO: verify/check GFF - should there be a /t here before attribute + // output ? if (next[j].getValue("ATTRIBUTES") != null) { @@ -989,4 +1057,5 @@ public class FeaturesFile extends AlignFile { return "USE printGFFFormat() or printJalviewFormat()"; } + } diff --git a/src/jalview/io/packed/JalviewDataset.java b/src/jalview/io/packed/JalviewDataset.java index 5e5c446..f4fe55b 100644 --- a/src/jalview/io/packed/JalviewDataset.java +++ b/src/jalview/io/packed/JalviewDataset.java @@ -148,6 +148,8 @@ public class JalviewDataset */ Hashtable seqDetails; + public boolean relaxedIdMatching=false; + public JalviewDataset() { seqDetails = new Hashtable(); diff --git a/src/jalview/io/packed/ParsePackedSet.java b/src/jalview/io/packed/ParsePackedSet.java index 787a88f..97802f3 100644 --- a/src/jalview/io/packed/ParsePackedSet.java +++ b/src/jalview/io/packed/ParsePackedSet.java @@ -133,8 +133,8 @@ public class ParsePackedSet try { jalview.io.FeaturesFile ff = new jalview.io.FeaturesFile(src); - context.updateSetModified(ff.parse(context.getLastAlignment(), - context.featureColours, false)); + context.updateSetModified(ff.parse(context.getLastAlignment(), + context.featureColours, false, context.relaxedIdMatching)); } catch (Exception e) { errmsg = ("Failed to parse the Features file associated with the alignment.");