import jalview.datamodel.AlignmentI;
import jalview.datamodel.Annotation;
import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.DBRefSource;
import jalview.datamodel.Mapping;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.schemes.ResidueProperties;
import jalview.util.Comparison;
+import jalview.util.DBRefUtils;
import jalview.util.Format;
import jalview.util.MessageManager;
if (accAnnotations != null && accAnnotations.containsKey("AC"))
{
- if (dbsource != null)
+ String dbr = (String) accAnnotations.get("AC");
+ if (dbr != null)
{
- String dbr = (String) accAnnotations.get("AC");
- if (dbr != null)
- {
- // we could get very clever here - but for now - just try to
- // guess accession type from source of alignment plus structure
- // of accession
- guessDatabaseFor(seqO, dbr, dbsource);
-
- }
+ // we could get very clever here - but for now - just try to
+ // guess accession type from type of sequence, source of alignment plus
+ // structure
+ // of accession
+ guessDatabaseFor(seqO, dbr, dbsource);
}
// else - do what ? add the data anyway and prompt the user to
// specify what references these are ?
treeName = an.stringMatched(2);
treeString = new StringBuffer();
}
+ // TODO: JAL-3532 - this is where GF comments and database references are lost
+ // suggest overriding this method for Stockholm files to catch and properly
+ // process CC, DR etc into multivalued properties
setAlignmentProperty(an.stringMatched(1), an.stringMatched(2));
}
}
st = -1;
}
}
+ if (dbsource == null)
+ {
+ // make up an origin based on whether the sequence looks like it is nucleotide
+ // or protein
+ dbsource = (seqO.isProtein()) ? "PFAM" : "RFAM";
+ }
if (dbsource.equals("PFAM"))
{
seqdb = "UNIPROT";
return annot;
}
+ private String dbref_to_ac_record(DBRefEntry ref)
+ {
+ return ref.getSource().toString() + " ; "
+ + ref.getAccessionId().toString();
+ }
@Override
public String print(SequenceI[] s, boolean jvSuffix)
{
int maxid = 0;
int in = 0;
Hashtable dataRef = null;
+ boolean isAA = s[in].isProtein();
while ((in < s.length) && (s[in] != null))
{
+
String tmp = printId(s[in], jvSuffix);
max = Math.max(max, s[in].getLength());
}
if (s[in].getDBRefs() != null)
{
- for (int idb = 0; idb < s[in].getDBRefs().length; idb++)
+ if (dataRef == null)
+ {
+ dataRef = new Hashtable();
+ }
+ List<DBRefEntry> primrefs = s[in].getPrimaryDBRefs();
+ if (primrefs.size() >= 1)
{
- if (dataRef == null)
+ dataRef.put(tmp, dbref_to_ac_record(primrefs.get(0)));
+ }
+ else
+ {
+ for (int idb = 0; idb < s[in].getDBRefs().length; idb++)
{
- dataRef = new Hashtable();
+ DBRefEntry dbref = s[in].getDBRefs()[idb];
+ dataRef.put(tmp, dbref_to_ac_record(dbref));
+ // if we put in a uniprot or EMBL record then we're done:
+ if (isAA && DBRefSource.UNIPROT
+ .equals(DBRefUtils.getCanonicalName(dbref.getSource())))
+ {
+ break;
+ }
+ if (!isAA && DBRefSource.EMBL
+ .equals(DBRefUtils.getCanonicalName(dbref.getSource())))
+ {
+ break;
+ }
}
-
- String datAs1 = s[in].getDBRefs()[idb].getSource().toString()
- + " ; "
- + s[in].getDBRefs()[idb].getAccessionId().toString();
- dataRef.put(tmp, datAs1);
}
}
in++;
String type = (String) dataRef.remove(idd);
out.append(new Format("%-" + (maxid - 2) + "s")
.form("#=GS " + idd.toString() + " "));
- if (type.contains("PFAM") || type.contains("RFAM"))
+ if (isAA && type.contains("UNIPROT")
+ || (!isAA && type.contains("EMBL")))
{
out.append(" AC " + type.substring(type.indexOf(";") + 1));
*/
package jalview.io;
+import static org.testng.Assert.assertTrue;
import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.assertNotNull;
import static org.testng.AssertJUnit.assertTrue;
import static org.testng.AssertJUnit.fail;
+import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.Annotation;
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.gui.JvOptionPane;
+import jalview.util.DBRefUtils;
import java.io.File;
import java.util.Arrays;
}
/**
+ * JAL-3529 - verify uniprot refs for sequences are output for sequences
+ * retrieved via Pfam
+ */
+ @Test(groups = { "Functional" })
+ public void dbrefOutput() throws Exception
+ {
+ // sequences retrieved in a Pfam domain alignment also have a PFAM database
+ // reference
+ SequenceI sq = new Sequence("FER2_SPIOL", "AASSDDDFFF");
+ sq.addDBRef(new DBRefEntry("UNIPROT", "1", "P00224"));
+ sq.addDBRef(new DBRefEntry("PFAM", "1", "P00224.1"));
+ sq.addDBRef(new DBRefEntry("PFAM", "1", "PF00111"));
+ AppletFormatAdapter af = new AppletFormatAdapter();
+ String toStockholm = af.formatSequences(FileFormat.Stockholm,
+ new Alignment(new SequenceI[]
+ { sq }), false);
+ System.out.println(toStockholm);
+ // bleh - java.util.Regex sucks
+ assertTrue(
+ Pattern.compile(
+ "^#=GS\\s+FER2_SPIOL(/\\d+-\\d+)?\\s+AC\\s+P00224$",
+ Pattern.MULTILINE).matcher(toStockholm)
+ .find(),
+ "Couldn't locate UNIPROT Accession in generated Stockholm file.");
+ AlignmentI fromStockholm = af.readFile(toStockholm,
+ DataSourceType.PASTE, FileFormat.Stockholm);
+ SequenceI importedSeq = fromStockholm.getSequenceAt(0);
+ assertTrue(importedSeq.getDBRefs().length == 1,
+ "Expected just one database reference to be added to sequence.");
+ assertTrue(
+ importedSeq.getDBRefs()[0].getAccessionId().indexOf(" ") == -1,
+ "Spaces were found in accession ID.");
+ List<DBRefEntry> dbrefs = DBRefUtils.searchRefs(importedSeq.getDBRefs(),
+ "P00224");
+ assertTrue(dbrefs.size() == 1,
+ "Couldn't find Uniprot DBRef on re-imported sequence.");
+
+ }
+
+ /**
* test alignment data in given file can be imported, exported and reimported
* with no dataloss
*
* @param f
- * - source datafile (IdentifyFile.identify() should work with it)
+ * - source datafile (IdentifyFile.identify()
+ * should work with it)
* @param ioformat
- * - label for IO class used to write and read back in the data from
- * f
+ * - label for IO class used to write and read
+ * back in the data from f
* @param ignoreFeatures
* @param ignoreRowVisibility
* @param allowNullAnnotations