/*
* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
* Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see .
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.analysis;
import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.assertFalse;
import static org.testng.AssertJUnit.assertNotSame;
import static org.testng.AssertJUnit.assertNull;
import static org.testng.AssertJUnit.assertSame;
import static org.testng.AssertJUnit.assertTrue;
import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefEntry;
import jalview.datamodel.Mapping;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.util.DBRefUtils;
import jalview.util.MapList;
import jalview.ws.SequenceFetcher;
import jalview.ws.SequenceFetcherFactory;
import java.util.ArrayList;
import java.util.List;
import org.testng.annotations.AfterClass;
import org.testng.annotations.Test;
public class CrossRefTest
{
@Test(groups = { "Functional" })
public void testFindXDbRefs()
{
DBRefEntry ref1 = new DBRefEntry("UNIPROT", "1", "A123");
DBRefEntry ref2 = new DBRefEntry("UNIPROTKB/TREMBL", "1", "A123");
DBRefEntry ref3 = new DBRefEntry("pdb", "1", "A123");
DBRefEntry ref4 = new DBRefEntry("EMBLCDSPROTEIN", "1", "A123");
DBRefEntry ref5 = new DBRefEntry("embl", "1", "A123");
DBRefEntry ref6 = new DBRefEntry("emblCDS", "1", "A123");
DBRefEntry ref7 = new DBRefEntry("GeneDB", "1", "A123");
DBRefEntry ref8 = new DBRefEntry("PFAM", "1", "A123");
// ENSEMBL is a source of either dna or protein sequence data
DBRefEntry ref9 = new DBRefEntry("ENSEMBL", "1", "A123");
DBRefEntry[] refs = new DBRefEntry[] { ref1, ref2, ref3, ref4, ref5,
ref6, ref7, ref8, ref9 };
/*
* Just the DNA refs:
*/
DBRefEntry[] found = DBRefUtils.selectDbRefs(true, refs);
assertEquals(4, found.length);
assertSame(ref5, found[0]);
assertSame(ref6, found[1]);
assertSame(ref7, found[2]);
assertSame(ref9, found[3]);
/*
* Just the protein refs:
*/
found = DBRefUtils.selectDbRefs(false, refs);
assertEquals(5, found.length);
assertSame(ref1, found[0]);
assertSame(ref2, found[1]);
assertSame(ref3, found[2]);
assertSame(ref4, found[3]);
assertSame(ref9, found[4]);
}
/**
* Test the method that finds a sequence's "product" xref source databases,
* which may be direct (dbrefs on the sequence), or indirect (dbrefs on
* sequences which share a dbref with the sequence
*/
@Test(groups = { "Functional" })
public void testFindXrefSourcesForSequence_proteinToDna()
{
SequenceI seq = new Sequence("Seq1", "MGKYQARLSS");
List sources = new ArrayList();
AlignmentI al = new Alignment(new SequenceI[] {});
/*
* first with no dbrefs to search
*/
CrossRef.findXrefSourcesForSequence(seq, false, al, sources);
assertTrue(sources.isEmpty());
/*
* add some dbrefs to sequence
*/
// protein db is not a candidate for findXrefSources
seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
// dna coding databatases are
seq.addDBRef(new DBRefEntry("EMBL", "0", "E2345"));
// a second EMBL xref should not result in a duplicate
seq.addDBRef(new DBRefEntry("EMBL", "0", "E2346"));
seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347"));
seq.addDBRef(new DBRefEntry("GENEDB", "0", "E2348"));
seq.addDBRef(new DBRefEntry("ENSEMBL", "0", "E2349"));
seq.addDBRef(new DBRefEntry("ENSEMBLGENOMES", "0", "E2350"));
CrossRef.findXrefSourcesForSequence(seq, false, al, sources);
assertEquals(4, sources.size());
assertEquals("[EMBL, EMBLCDS, GENEDB, ENSEMBL]",
sources.toString());
/*
* add a sequence to the alignment which has a dbref to UNIPROT|A1234
* and others to dna coding databases
*/
sources.clear();
seq.setDBRefs(null);
seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347"));
SequenceI seq2 = new Sequence("Seq2", "MGKYQARLSS");
seq2.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
seq2.addDBRef(new DBRefEntry("EMBL", "0", "E2345"));
seq2.addDBRef(new DBRefEntry("GENEDB", "0", "E2348"));
// TODO include ENSEMBLGENOMES in DBRefSource.DNACODINGDBS ?
al.addSequence(seq2);
CrossRef.findXrefSourcesForSequence(seq, false, al, sources);
assertEquals(3, sources.size());
assertEquals("[EMBLCDS, EMBL, GENEDB]", sources.toString());
}
/**
* Test for finding 'product' sequences for the case where only an indirect
* xref is found - not on the nucleotide sequence but on a peptide sequence in
* the alignment which which it shares a nucleotide dbref
*/
@Test(groups = { "Functional" })
public void testFindXrefSequences_indirectDbrefToProtein()
{
/*
* Alignment setup:
* - nucleotide dbref EMBL|AF039662
* - peptide dbrefs EMBL|AF039662, UNIPROT|Q9ZTS2
*/
SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
uniprotSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
/*
* Find UNIPROT xrefs for nucleotide
* - it has no UNIPROT dbref of its own
* - but peptide with matching nucleotide dbref does, so is returned
*/
AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
Alignment xrefs = CrossRef.findXrefSequences(
new SequenceI[] { emblSeq }, true, "UNIPROT", al);
assertEquals(1, xrefs.getHeight());
assertSame(uniprotSeq, xrefs.getSequenceAt(0));
}
/**
* Test for finding 'product' sequences for the case where only an indirect
* xref is found - not on the peptide sequence but on a nucleotide sequence in
* the alignment which which it shares a protein dbref
*/
@Test(groups = { "Functional" })
public void testFindXrefSequences_indirectDbrefToNucleotide()
{
/*
* Alignment setup:
* - peptide dbref UNIPROT|Q9ZTS2
* - nucleotide dbref EMBL|AF039662, UNIPROT|Q9ZTS2
*/
SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
emblSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
/*
* find EMBL xrefs for peptide sequence - it has no direct
* dbrefs, but the 'corresponding' nucleotide sequence does, so is returned
*/
/*
* Find EMBL xrefs for peptide
* - it has no EMBL dbref of its own
* - but nucleotide with matching peptide dbref does, so is returned
*/
AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
Alignment xrefs = CrossRef.findXrefSequences(
new SequenceI[] { uniprotSeq }, false, "EMBL", al);
assertEquals(1, xrefs.getHeight());
assertSame(emblSeq, xrefs.getSequenceAt(0));
}
/**
* Test for finding 'product' sequences for the case where the selected
* sequence has no dbref to the desired source, and there are no indirect
* references via another sequence in the alignment
*/
@Test(groups = { "Functional" })
public void testFindXrefSequences_noDbrefs()
{
/*
* two nucleotide sequences, one with UNIPROT dbref
*/
SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
SequenceI dna2 = new Sequence("AJ307031", "AAACCCTTT");
/*
* find UNIPROT xrefs for peptide sequence - it has no direct
* dbrefs, and the other sequence (which has a UNIPROT dbref) is not
* equatable to it, so no results found
*/
AlignmentI al = new Alignment(new SequenceI[] { dna1, dna2 });
Alignment xrefs = CrossRef.findXrefSequences(new SequenceI[] { dna2 },
true, "UNIPROT", al);
assertNull(xrefs);
}
/**
* Tests for the method that searches an alignment (with one sequence
* excluded) for protein/nucleotide sequences with a given cross-reference
*/
@Test(groups = { "Functional" })
public void testSearchDataset()
{
/*
* nucleotide sequence with UNIPROT AND EMBL dbref
* peptide sequence with UNIPROT dbref
*/
SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
dna1.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
SequenceI pep1 = new Sequence("Q9ZTS2", "MLAVSRGQ");
pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
AlignmentI al = new Alignment(new SequenceI[] { dna1, pep1 });
List result = new ArrayList();
/*
* first search for a dbref nowhere on the alignment:
*/
DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "P30419");
boolean found = CrossRef.searchDataset(dna1, dbref, al, result, null,
true, true);
assertFalse(found);
assertTrue(result.isEmpty());
// TODO we are setting direct=true here but it is set to
// false in Jalview code...
/*
* search for a protein sequence with dbref UNIPROT:Q9ZTS2
*/
dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
found = CrossRef.searchDataset(dna1, dbref, al, result, null, true,
true);
assertTrue(found);
assertEquals(1, result.size());
assertSame(pep1, result.get(0));
/*
* search for a nucleotide sequence with dbref UNIPROT:Q9ZTS2
*/
result.clear();
dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
found = CrossRef.searchDataset(pep1, dbref, al, result, null, true,
false);
assertTrue(found);
assertEquals(1, result.size());
assertSame(dna1, result.get(0));
}
/**
* Test for finding 'product' sequences for the case where the selected
* sequence has a dbref with a mapping to a sequence
*/
@Test(groups = { "Functional" })
public void testFindXrefSequences_fromDbRefMap()
{
/*
* two peptide sequences each with a DBRef and SequenceFeature
*/
SequenceI pep1 = new Sequence("Q9ZTS2", "MALFQRSV");
pep1.addDBRef(new DBRefEntry("Pfam", "0", "PF00111"));
pep1.addSequenceFeature(new SequenceFeature("type", "desc", 12, 14, 1f,
"group"));
SequenceI pep2 = new Sequence("P30419", "MTRRSQIF");
pep2.addDBRef(new DBRefEntry("PDB", "0", "3JTK"));
pep2.addSequenceFeature(new SequenceFeature("type2", "desc2", 13, 15,
12f, "group2"));
/*
* nucleotide sequence (to go in the alignment)
*/
SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
/*
* add DBRefEntry's to dna1 with mappings from dna to both peptides
*/
MapList mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 },
3, 1);
Mapping map = new Mapping(pep1, mapList);
DBRefEntry dbRef1 = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map);
dna1.addDBRef(dbRef1);
mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1);
map = new Mapping(pep2, mapList);
DBRefEntry dbRef2 = new DBRefEntry("UNIPROT", "0", "P30419", map);
dna1.addDBRef(dbRef2);
/*
* find UNIPROT xrefs for nucleotide sequence - it should pick up
* mapped sequences
*/
AlignmentI al = new Alignment(new SequenceI[] { dna1 });
Alignment xrefs = CrossRef.findXrefSequences(new SequenceI[] { dna1 },
true, "UNIPROT", al);
assertEquals(2, xrefs.getHeight());
/*
* cross-refs alignment holds copies of the mapped sequences
* including copies of their dbrefs and features
*/
checkCopySequence(pep1, xrefs.getSequenceAt(0));
checkCopySequence(pep2, xrefs.getSequenceAt(1));
}
/**
* Helper method to assert seq1 looks like a copy of seq2
*
* @param seq1
* @param seq2
*/
private void checkCopySequence(SequenceI seq1, SequenceI seq2)
{
assertNotSame(seq1, seq2);
assertEquals(seq1.getName(), seq2.getName());
assertEquals(seq1.getStart(), seq2.getStart());
assertEquals(seq1.getEnd(), seq2.getEnd());
assertEquals(seq1.getSequenceAsString(), seq2.getSequenceAsString());
/*
* compare dbrefs
*/
assertArrayEquals(seq1.getDBRefs(), seq2.getDBRefs());
// check one to verify a copy, not the same object
if (seq1.getDBRefs().length > 0)
{
assertNotSame(seq1.getDBRefs()[0], seq2.getDBRefs()[0]);
}
/*
* compare features
*/
assertArrayEquals(seq1.getSequenceFeatures(),
seq2.getSequenceFeatures());
if (seq1.getSequenceFeatures().length > 0)
{
assertNotSame(seq1.getSequenceFeatures()[0],
seq2.getSequenceFeatures()[0]);
}
}
/**
* Test for finding 'product' sequences for the case where the selected
* sequence has a dbref with no mapping, triggering a fetch from database
*/
@Test(groups = { "Functional" })
public void testFindXrefSequences_withFetch()
{
SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P30419"));
dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314"));
final SequenceI pep1 = new Sequence("Q9ZTS2", "MYQLIRSSW");
final SequenceI pep2 = new Sequence("P00314", "MRKLLAASG");
SequenceFetcher mockFetcher = new SequenceFetcher()
{
@Override
public boolean isFetchable(String source)
{
return true;
}
@Override
public SequenceI[] getSequences(List refs, boolean dna)
{
return new SequenceI[] { pep1, pep2 };
}
};
SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
/*
* find UNIPROT xrefs for nucleotide sequence
*/
AlignmentI al = new Alignment(new SequenceI[] { dna1 });
Alignment xrefs = CrossRef.findXrefSequences(new SequenceI[] { dna1 },
true, "UNIPROT", al);
assertEquals(2, xrefs.getHeight());
assertSame(pep1, xrefs.getSequenceAt(0));
assertSame(pep2, xrefs.getSequenceAt(1));
}
@AfterClass
public void tearDown()
{
SequenceFetcherFactory.setSequenceFetcher(null);
}
}