import jalview.datamodel.AlignmentI;
import jalview.datamodel.Annotation;
import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.DBRefSource;
import jalview.datamodel.Mapping;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.schemes.ResidueProperties;
import jalview.util.Comparison;
+import jalview.util.DBRefUtils;
import jalview.util.Format;
import jalview.util.MessageManager;
public static final Regex DETECT_BRACKETS = new Regex(
"(<|>|\\[|\\]|\\(|\\)|\\{|\\})");
- public static final String RNASS_BRACKETS = "<>[]() {}AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz";
+ // WUSS extended symbols. Avoid ambiguity with protein SS annotations by using NOT_RNASS first.
+ public static final String RNASS_BRACKETS = "<>[](){}AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz";
+
+ // use the following regex to decide an annotations (whole) line is NOT an RNA
+ // SS (it contains only E,H,e,h and other non-brace/non-alpha chars)
+ private static final Regex NOT_RNASS = new Regex(
+ "^[^<>[\\](){}A-DF-Za-df-z]*$");
StringBuffer out; // output buffer
if (accAnnotations != null && accAnnotations.containsKey("AC"))
{
- if (dbsource != null)
+ String dbr = (String) accAnnotations.get("AC");
+ if (dbr != null)
{
- String dbr = (String) accAnnotations.get("AC");
- if (dbr != null)
- {
- // we could get very clever here - but for now - just try to
- // guess accession type from source of alignment plus structure
- // of accession
- guessDatabaseFor(seqO, dbr, dbsource);
-
- }
+ // we could get very clever here - but for now - just try to
+ // guess accession type from type of sequence, source of alignment plus
+ // structure
+ // of accession
+ guessDatabaseFor(seqO, dbr, dbsource);
}
// else - do what ? add the data anyway and prompt the user to
// specify what references these are ?
treeName = an.stringMatched(2);
treeString = new StringBuffer();
}
+ // TODO: JAL-3532 - this is where GF comments and database references are lost
+ // suggest overriding this method for Stockholm files to catch and properly
+ // process CC, DR etc into multivalued properties
setAlignmentProperty(an.stringMatched(1), an.stringMatched(2));
}
}
st = -1;
}
}
+ if (dbsource == null)
+ {
+ // make up an origin based on whether the sequence looks like it is nucleotide
+ // or protein
+ dbsource = (seqO.isProtein()) ? "PFAM" : "RFAM";
+ }
if (dbsource.equals("PFAM"))
{
seqdb = "UNIPROT";
// convert1 = OPEN_PAREN.replaceAll(annots);
// convert2 = CLOSE_PAREN.replaceAll(convert1);
// annots = convert2;
-
- // DEBUG
- System.out.println(
- "*** parseAnnotationRow called with\n annotation='"
- + annotation + "'\n label='" + label
- + "'\n annots='" + annots + "'");
String type = label;
if (label.contains("_cons"))
if (type.equalsIgnoreCase("secondary structure"))
{
ss = true;
- isrnass = DETECT_BRACKETS.search(annots);
+ isrnass = !NOT_RNASS.search(annots); // sorry about the double negative
+ // here (it's easier for dealing with
+ // other non-alpha-non-brace chars)
}
if (type.equalsIgnoreCase("posterior probability"))
{
return annot;
}
+ private String dbref_to_ac_record(DBRefEntry ref)
+ {
+ return ref.getSource().toString() + " ; "
+ + ref.getAccessionId().toString();
+ }
@Override
public String print(SequenceI[] s, boolean jvSuffix)
{
int maxid = 0;
int in = 0;
Hashtable dataRef = null;
+ boolean isAA = s[in].isProtein();
while ((in < s.length) && (s[in] != null))
{
+
String tmp = printId(s[in], jvSuffix);
max = Math.max(max, s[in].getLength());
}
if (s[in].getDBRefs() != null)
{
- for (int idb = 0; idb < s[in].getDBRefs().length; idb++)
+ if (dataRef == null)
+ {
+ dataRef = new Hashtable();
+ }
+ List<DBRefEntry> primrefs = s[in].getPrimaryDBRefs();
+ if (primrefs.size() >= 1)
+ {
+ dataRef.put(tmp, dbref_to_ac_record(primrefs.get(0)));
+ }
+ else
{
- if (dataRef == null)
+ for (int idb = 0; idb < s[in].getDBRefs().length; idb++)
{
- dataRef = new Hashtable();
+ DBRefEntry dbref = s[in].getDBRefs()[idb];
+ dataRef.put(tmp, dbref_to_ac_record(dbref));
+ // if we put in a uniprot or EMBL record then we're done:
+ if (isAA && DBRefSource.UNIPROT
+ .equals(DBRefUtils.getCanonicalName(dbref.getSource())))
+ {
+ break;
+ }
+ if (!isAA && DBRefSource.EMBL
+ .equals(DBRefUtils.getCanonicalName(dbref.getSource())))
+ {
+ break;
+ }
}
-
- String datAs1 = s[in].getDBRefs()[idb].getSource().toString()
- + " ; "
- + s[in].getDBRefs()[idb].getAccessionId().toString();
- dataRef.put(tmp, datAs1);
}
}
in++;
String type = (String) dataRef.remove(idd);
out.append(new Format("%-" + (maxid - 2) + "s")
.form("#=GS " + idd.toString() + " "));
- if (type.contains("PFAM") || type.contains("RFAM"))
+ if (isAA && type.contains("UNIPROT")
+ || (!isAA && type.contains("EMBL")))
{
out.append(" AC " + type.substring(type.indexOf(";") + 1));
String key = type2id(alAnot[j].label);
boolean isrna = alAnot[j].isValidStruc();
- // bs debug
- System.out.println("SEQUENCE " + i + "/" + s.length + " ISRNA="
- + isrna + ".");
+
if (isrna)
{
// hardwire to secondary structure if there is RNA secondary
{
seq += outputCharacter(key, k, isrna, ann, s[i]);
}
- // bs debug
- System.out.println("APPENDING SEQ: KEY=" + key + " ISRNA=" + isrna
- + ".\n" + "SEQ=" + seq + "\n");
out.append(seq);
out.append(newline);
}
out.append(new Format("%-" + maxid + "s")
.form(printId(s[i], jvSuffix) + " "));
out.append(s[i].getSequenceAsString());
- // bs debug
- System.out.println("ALSO APPENDING " + s[i].getSequenceAsString());
out.append(newline);
i++;
}
{
seq += outputCharacter(key, j, isrna, aa.annotations, null);
}
-
- // bs debug
- System.out.println(
- "PRINTING SEQ: KEY=" + key + " ISRNA=" + isrna + ".\n"
- + "SEQ=" + seq + "\n");
-
out.append(seq);
out.append(newline);
}
String ch = (annot == null)
? ((sequenceI == null) ? "-"
: Character.toString(sequenceI.getCharAt(k)))
- : annot.displayCharacter;
+ : (annot.displayCharacter == null
+ ? String.valueOf(annot.secondaryStructure)
+ : annot.displayCharacter);
+ if (ch == null)
+ {
+ ch = " ";
+ }
if (key != null && key.equals("SS"))
{
char ssannotchar = ' ';
}
if (charset)
{
- if (ssannotchar == ' ' && isrna)
- {
- ssannotchar = '.';
- }
- return ssannotchar;
+ return (ssannotchar == ' ' && isrna) ? '.' : ssannotchar;
}
}