<p>
<strong>Principal Component Analysis</strong>
</p>
- <p>A principal component analysis can be performed via the
- <a href="calculations.html">calculations dialog</a> which is accessed by selecting <strong>Calculate→Calculate
- Tree or PCA...</strong>.</p>
+ <p>
+ A principal component analysis can be performed via the <a
+ href="calculations.html">calculations dialog</a> which is accessed
+ by selecting <strong>Calculate→Calculate Tree or
+ PCA...</strong>.
+ </p>
<p>This calculation creates a spatial representation of the
similarities within a selected group, or all of the sequences in an
alignment. After the calculation finishes, a 3D viewer displays the
calculation are given in the <strong><em>Change
Parameters</em></strong> menu.
</p>
- <p>
- <em>PCA Calculation modes</em><br /> The default Jalview
- calculation mode (indicated when <em><strong>Jalview
- PCA Calculation</strong></em> is ticked in the <strong><em>Change
- Parameters</em></strong> menu) is to perform a PCA on a matrix where elements
- in the upper diagonal give the sum of scores for mutating in one
- direction, and the lower diagonal is the sum of scores for mutating
- in the other. For protein substitution models like BLOSUM62, this
- gives an asymmetric matrix, and a different PCA to a matrix produced
- with the method described in the paper by G. Casari, C. Sander and
- A. Valencia. Structural Biology volume 2, no. 2, February 1995 (<a
- href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=Abstract&list_uids=7749921">pubmed</a>)
- and implemented at the SeqSpace server at the EBI. This method
- preconditions the matrix by multiplying it with its transpose, and
- can be employed in the PCA viewer by unchecking the <strong><em>Jalview
- PCA Calculation</em></strong> option in the <strong><em>Change
- Parameters</em></strong> menu.
- </p>
<img src="pcaviewer.gif">
<p>
<strong>The PCA Viewer</strong>
added to the Jalview desktop in v2.7.</em> <em>The Reset button
and Change Parameters menu were added in Jalview 2.8.</em> <em>Support
for PAM250 based PCA was added in Jalview 2.8.1.</em>
+ </p>
+ <p>
+ <strong>Reproducing PCA calculations performed with older
+ Jalview releases</strong> Jalview 2.10.2 included a revised PCA
+ implementation which treated Gaps and non-standard residues in the
+ same way as a matrix produced with the method described in the paper
+ by G. Casari, C. Sander and A. Valencia. Structural Biology volume
+ 2, no. 2, February 1995 (<a
+ href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=Abstract&list_uids=7749921">pubmed</a>)
+ and implemented at the SeqSpace server at the EBI. To reproduce
+ calculations performed with earlier Jalview releases it is necessary
+ to execute the following Groovy script:
+ <pre>
+ jalview.analysis.scoremodels.ScoreMatrix.scoreGapAsAny=true
+ jalview.analysis.scoremodels.ScoreModels.instance.BLOSUM62.@matrix[4][1]=3
+ </pre>
+ This script enables the legacy PCA mode where gaps were treated as
+ 'X', and to modify the BLOSUM62 matrix so it is asymmetric for
+ mutations between C to R (this was a typo in the original Jalview
+ BLOSUM62 matrix which was fixed in 2.10.2).
+ </p>
</body>
</html>
redundant alignments
</li>
<li>
- <!-- JAL-2365 -->Cannot configure feature colours with
- lightGray or darkGray via features file
+ <!-- JAL-2365,JAL-2642 -->Cannot configure feature colours with
+ lightGray or darkGray via features file (but can specify lightgray)
</li>
<li>
<!-- JAL-2421 -->Overview window visible region moves
*/
package jalview.ws.dbsources;
-import jalview.datamodel.AlignmentI;
-import jalview.datamodel.DBRefEntry;
import jalview.datamodel.DBRefSource;
-import jalview.io.DataSourceType;
-import jalview.io.FileFormat;
-import jalview.io.FormatAdapter;
import com.stevesoft.pat.Regex;
/*
* (non-Javadoc)
*
- * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
- */
- @Override
- public AlignmentI getSequenceRecords(String queries) throws Exception
- {
- // TODO: this is not a perfect implementation. We need to be able to add
- // individual references to each sequence in each family alignment that's
- // retrieved.
- startQuery();
- AlignmentI rcds = new FormatAdapter().readFile(getXFAMURL()
- + queries.trim().toUpperCase(), DataSourceType.URL,
- FileFormat.Stockholm);
- for (int s = 0, sNum = rcds.getHeight(); s < sNum; s++)
- {
- rcds.getSequenceAt(s).addDBRef(new DBRefEntry(DBRefSource.PFAM,
- // getDbSource(),
- getDbVersion(), queries.trim().toUpperCase()));
- if (!getDbSource().equals(DBRefSource.PFAM))
- { // add the specific ref too
- rcds.getSequenceAt(s).addDBRef(
- new DBRefEntry(getDbSource(), getDbVersion(), queries
- .trim().toUpperCase()));
- }
- }
- stopQuery();
- return rcds;
- }
-
- /*
- * (non-Javadoc)
- *
* @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
*/
@Override
@Override
protected String getXFAMURL()
{
- return "http://pfam.xfam.org/family/alignment/download/format?alnType=full&format=stockholm&order=t&case=l&gaps=default&entry=";
+ return "http://pfam.xfam.org/family/";
+
}
+ @Override
+ public String getXFAMURLSUFFIX()
+ {
+ return "/alignment/full";
+ }
/*
* (non-Javadoc)
*
@Override
protected String getXFAMURL()
{
- return "http://pfam.xfam.org/family/alignment/download/format?alnType=seed&format=stockholm&order=t&case=l&gaps=default&entry=";
+ return "http://pfam.xfam.org/family/";
}
+ @Override
+ public String getXFAMURLSUFFIX()
+ {
+ return "/alignment/seed";
+ }
/*
* (non-Javadoc)
*
@Override
protected String getXFAMURL()
{
- return "http://rfam.xfam.org/family/alignment/download/format?alnType=full&nseLabels=0&format=stockholm&acc=";
+ return "http://rfam.xfam.org/family/";
}
+ @Override
+ public String getXFAMURLSUFFIX()
+ {
+ return "/alignment/full";
+ }
/*
* (non-Javadoc)
*
protected String getXFAMURL()
{
return "http://rfam.xfam.org/family/";
- // Janelia Farms url
- // "http://rfam.janelia.org/cgi-bin/getalignment?type=seed&fmt=stockholm&acc=";
}
@Override
public String getXFAMURLSUFFIX()
{
- return "/alignment";
+ // to download gzipped file add '?gzip=1'
+ return "/alignment/stockholm";
}
/*
*/
package jalview.ws.dbsources;
+import jalview.bin.Cache;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefEntry;
import jalview.io.DataSourceType;
// retrieved.
startQuery();
// TODO: trap HTTP 404 exceptions and return null
- AlignmentI rcds = new FormatAdapter().readFile(getXFAMURL()
- + queries.trim().toUpperCase() + getXFAMURLSUFFIX(),
+ String xfamUrl = getXFAMURL()
+ + queries.trim().toUpperCase() + getXFAMURLSUFFIX();
+
+ if (Cache.log != null)
+ {
+ Cache.log.debug("XFAM URL for retrieval is: " + xfamUrl);
+ }
+
+ AlignmentI rcds = new FormatAdapter().readFile(xfamUrl ,
DataSourceType.URL, FileFormat.Stockholm);
+
for (int s = 0, sNum = rcds.getHeight(); s < sNum; s++)
{
rcds.getSequenceAt(s).addDBRef(new DBRefEntry(getXfamSource(),
WRAP_ALIGNMENT=false
#DAS_REGISTRY_URL=http\://www.dasregistry.org/das/ # retired 01/05/2015
DAS_REGISTRY_URL=http\://www.ebi.ac.uk/das-srv/registry/das/
+logs.Jalview.level=DEBUG
Assert.assertTrue(seedrf.getHeight() > 1,
"Seed Alignment for " + rfs.getTestQuery()
+ " didn't contain more than one sequence.");
+ Assert.assertTrue(seedrf.getProperties().size() > 0,
+ "Seed Alignment for " + rfs.getTestQuery()
+ + " didn't have any properties.");
+
}
@Test(groups = { "External" })
AlignmentI seedpf = pfseed.getSequenceRecords(pff.getTestQuery());
Assert.assertNotNull(seedpf, "Seed Alignment for " + pff.getTestQuery()
+ " didn't retrieve.");
+ Assert.assertTrue(seedpf.getProperties().size() > 0,
+ "Seed Alignment for " + pfseed.getTestQuery()
+ + " didn't have any properties.");
Assert.assertTrue(seedpf.getHeight() < fullpf.getHeight(),
"Expected Full alignment to have more sequences than seed for "
+ pff.getTestQuery());
+ Assert.assertTrue(fullpf.getProperties().size() > 0,
+ "Full Alignment for " + pff.getTestQuery()
+ + " didn't have any properties.");
+
}
}