From 1c66dc9053356a709e9cd9d3282517b5221f1ba2 Mon Sep 17 00:00:00 2001 From: jprocter Date: Wed, 23 May 2012 19:54:44 +0100 Subject: [PATCH] JAL-1067, JAL-1105 - refactor parser to fit Jalview's data parsing architecture --- src/jalview/appletgui/AlignFrame.java | 30 ++++-- src/jalview/bin/JalviewLite.java | 13 +-- src/jalview/gui/AlignFrame.java | 12 +-- src/jalview/io/TCoffeeScoreFile.java | 146 ++++++++++++++++++----------- test/jalview/io/TCoffeeScoreFileTest.java | 47 ++++++---- 5 files changed, 150 insertions(+), 98 deletions(-) diff --git a/src/jalview/appletgui/AlignFrame.java b/src/jalview/appletgui/AlignFrame.java index efb0f4c..e03f2f4 100644 --- a/src/jalview/appletgui/AlignFrame.java +++ b/src/jalview/appletgui/AlignFrame.java @@ -3718,16 +3718,18 @@ public class AlignFrame extends EmbmenuFrame implements ActionListener, ItemLis /** * Load the (T-Coffee) score file from the specified url * - * @param url The absolute path from where download and read the score file + * @param source File/URL/T-COFFEE score file contents * @throws IOException + * @return true if alignment was annotated with data from source */ - public void loadScoreFile( URL url ) throws IOException { - // TODO: refactor to string/standard jalview data importer - TCoffeeScoreFile file = TCoffeeScoreFile.load( new InputStreamReader( url.openStream() ) ); - if( file == null ) { - // TODO: raise a dialog box here rather than bomb out. - - throw new RuntimeException("The file provided does not match the T-Coffee scores file format"); + public boolean loadScoreFile( String source ) throws IOException { + + TCoffeeScoreFile file = new TCoffeeScoreFile(source, AppletFormatAdapter.checkProtocol(source)); + if( !file.isValid()) { + // TODO: raise dialog for gui + System.err.println("Problems parsing T-Coffee scores: "+file.getWarningMessage()); + System.err.println("Origin was:\n"+source); + return false; } /* @@ -3736,17 +3738,25 @@ public class AlignFrame extends EmbmenuFrame implements ActionListener, ItemLis AlignmentI aln; if( (aln=viewport.getAlignment()) != null && (aln.getHeight() != file.getHeight() || aln.getWidth() != file.getWidth()) ) { // TODO: raise a dialog box here rather than bomb out. - throw new RuntimeException("The scores matrix does not match the alignment dimensions"); + System.err.println("The scores matrix does not match the alignment dimensions"); } // TODO add parameter to indicate if matching should be done if (file.annotateAlignment(alignPanel.getAlignment(), false)) { - tcoffeeColour.setEnabled(true); + alignPanel.fontChanged(); + tcoffeeColour.setEnabled(true); // switch to this color changeColour(new TCoffeeColourScheme(alignPanel.getAlignment())); + return true; + } else { + System.err.println("Problems resolving T-Coffee scores:"); + if (file.getWarningMessage()!=null) { + System.err.println(file.getWarningMessage()); + } } + return false; } diff --git a/src/jalview/bin/JalviewLite.java b/src/jalview/bin/JalviewLite.java index 342ff4c..3ae1be6 100644 --- a/src/jalview/bin/JalviewLite.java +++ b/src/jalview/bin/JalviewLite.java @@ -1871,12 +1871,13 @@ public class JalviewLite extends Applet implements String sScoreFile = applet.getParameter("scoreFile"); if( sScoreFile != null && !"".equals(sScoreFile) ) { try { - URL urlScore = new URL(sScoreFile); - newAlignFrame.loadScoreFile(urlScore); - - } - catch( Exception e ) { - // TODO error message log (shows a warning dialogbox?) + if (debug) { + System.err.println("Attempting to load T-COFFEE score file from the scoreFile parameter"); + } + if (!newAlignFrame.loadScoreFile(sScoreFile)) { + System.err.println("Failed to parse T-COFFEE parameter as a valid score file ('"+sScoreFile+"')"); + } + }catch( Exception e ) { System.err.printf("Cannot read score file: '%s'. Cause: %s \n", sScoreFile, e.getMessage()); } } diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java index 6896fb7..198c3bd 100755 --- a/src/jalview/gui/AlignFrame.java +++ b/src/jalview/gui/AlignFrame.java @@ -3945,12 +3945,12 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, try { - TCoffeeScoreFile result = TCoffeeScoreFile.load(new File(sFilePath)); - if( result == null ) { - // TODO: raise a dialog box here rather than bomb out. - - throw new RuntimeException("The file provided does not match the T-Coffee scores file format"); - } + TCoffeeScoreFile result = new TCoffeeScoreFile(sFilePath, FormatAdapter.FILE); + if (!result.isValid()) { + JOptionPane.showMessageDialog(Desktop.desktop, result.getWarningMessage(), + "Problem reading T-COFFEE score file", JOptionPane.WARNING_MESSAGE); + return; + } /* * check that the score matrix matches the alignment dimensions diff --git a/src/jalview/io/TCoffeeScoreFile.java b/src/jalview/io/TCoffeeScoreFile.java index 6cab3ae..ed0f1d9 100644 --- a/src/jalview/io/TCoffeeScoreFile.java +++ b/src/jalview/io/TCoffeeScoreFile.java @@ -72,8 +72,19 @@ import java.util.Map; * @author Paolo Di Tommaso * */ -public class TCoffeeScoreFile { +public class TCoffeeScoreFile extends AlignFile { + public TCoffeeScoreFile(String inFile, String type) throws IOException + { + super(inFile, type); + + } + + public TCoffeeScoreFile(FileParse source) throws IOException + { + super(source); + } + /** The {@link Header} structure holder */ Header header; @@ -81,29 +92,14 @@ public class TCoffeeScoreFile { * Holds the consensues values for each sequences. It uses a LinkedHashMap to maintaint the * insertion order. */ - LinkedHashMap scores = new LinkedHashMap(); + LinkedHashMap scores; Integer fWidth; - - /** - * Parse the specified file. - * - * @param file The file to be parsed - */ - public static TCoffeeScoreFile load(File file) { - try { - return load(new FileReader(file)); - } - catch (FileNotFoundException e) { - throw new RuntimeException(e); - } - } /** * Parse the provided reader for the T-Coffee scores file format * * @param reader - */ public static TCoffeeScoreFile load(Reader reader) { try { @@ -116,6 +112,7 @@ public class TCoffeeScoreFile { throw new RuntimeException(e); } } + */ /** * @return The 'height' of the score matrix i.e. the numbers of score rows that should matches @@ -136,11 +133,6 @@ public class TCoffeeScoreFile { return fWidth != null ? fWidth : 0; } - /** - * The default constructor is marked as {@code protected} since this class is meant to created - * through the {@link #load(File)} or {@link #load(Reader)} factory methods - */ - protected TCoffeeScoreFile() { } /** * Get the string of score values for the specified seqeunce ID. @@ -149,13 +141,17 @@ public class TCoffeeScoreFile { * It return an empty string when the specified ID is missing. */ public String getScoresFor( String id ) { - return scores.containsKey(id) ? scores.get(id).toString() : ""; + return scores!=null && scores.containsKey(id) ? scores.get(id).toString() : ""; } /** * @return The list of score string as a {@link List} object, in the same ordeer of the insertion i.e. in the MSA */ public List getScoresList() { + if (scores==null) + { + return null; + } List result = new ArrayList( scores.size() ); for( Map.Entry it : scores.entrySet() ) { result.add(it.getValue().toString()); @@ -168,6 +164,10 @@ public class TCoffeeScoreFile { * @return The parsed score values a matrix of bytes */ public byte[][] getScoresArray() { + if (scores==null) + { + return null; + } byte[][] result = new byte[ scores.size() ][]; int rowCount = 0; @@ -188,15 +188,15 @@ public class TCoffeeScoreFile { } - private void doParsing(BufferedReader in) throws IOException { - + public void parse() throws IOException + { /* * read the header */ - header = readHeader(in); + header = readHeader(this); - if( header == null ) { return; } - + if( header == null ) { error=true; return;} + scores = new LinkedHashMap(); /* * initilize the structure @@ -209,7 +209,7 @@ public class TCoffeeScoreFile { * go with the reading */ Block block; - while( (block = readBlock(in, header.scores.size())) != null ) { + while( (block = readBlock(this,header.scores.size())) != null ) { /* * append sequences read in the block @@ -217,7 +217,9 @@ public class TCoffeeScoreFile { for( Map.Entry entry : block.items.entrySet() ) { StringBuilder scoreStringBuilder = scores.get(entry.getKey()); if( scoreStringBuilder == null ) { - throw new RuntimeException(String.format("Invalid T-Coffee score file: Sequence ID '%s' is not declared in header section", entry.getKey())); + error=true; + errormessage=String.format("Invalid T-Coffee score file: Sequence ID '%s' is not declared in header section", entry.getKey()); + return ; } scoreStringBuilder.append( entry.getValue() ); @@ -232,12 +234,14 @@ public class TCoffeeScoreFile { fWidth = str.length(); } else if( fWidth != str.length() ) { - throw new RuntimeException("Invalid T-Coffee score file: All the score sequences must have the same length"); + error=true; + errormessage="Invalid T-Coffee score file: All the score sequences must have the same length"; + return ; } } - + return; } @@ -258,30 +262,30 @@ public class TCoffeeScoreFile { * @return The parser {@link Header} instance * @throws RuntimeException when the header is not in the expected format */ - static Header readHeader(BufferedReader reader) { + static Header readHeader(FileParse reader) throws IOException { Header result = null; try { result = new Header(); - result.head = reader.readLine(); + result.head = reader.nextLine(); String line; - while( (line = reader.readLine()) != null ) { + while( (line = reader.nextLine()) != null ) { if( line.startsWith("SCORE=")) { result.score = parseInt( line.substring(6).trim() ); break; } } - if( (line=reader.readLine())==null || !"*".equals(line.trim())) return null; - if( (line=reader.readLine())==null || !"BAD AVG GOOD".equals(line.trim())) return null; - if( (line=reader.readLine())==null || !"*".equals(line.trim())) return null; + if( (line=reader.nextLine())==null || !"*".equals(line.trim())) { error(reader,"Invalid T-COFFEE score format (NO BAD/AVG/GOOD header)"); return null;} + if( (line=reader.nextLine())==null || !"BAD AVG GOOD".equals(line.trim())) { error(reader,"Invalid T-COFFEE score format (NO BAD/AVG/GOOD header)"); return null;} + if( (line=reader.nextLine())==null || !"*".equals(line.trim())) {error(reader,"Invalid T-COFFEE score format (NO BAD/AVG/GOOD header)"); return null;} /* * now are expected a list if sequences ID up to the first blank line */ - while( (line=reader.readLine()) != null ) { + while( (line=reader.nextLine()) != null ) { if( "".equals(line) ) { break; } @@ -302,14 +306,27 @@ public class TCoffeeScoreFile { result.scores.put(id,val); } + if (result==null) { + error(reader, "T-COFFEE score file had no per-sequence scores"); + } + } catch( IOException e ) { - throw new RuntimeException("Cannot parse T-Coffee score ascii file", e); + error(reader,"Unexpected problem parsing T-Coffee score ascii file"); + throw e; } return result; } - + private static void error(FileParse reader, String errm) + { + reader.error=true; + if (reader.errormessage==null) + { reader.errormessage=errm; + } else { + reader.errormessage+="\n"+errm; + } + } /** * Read a scores block ihe provided stream. * @@ -318,18 +335,18 @@ public class TCoffeeScoreFile { * @return The {@link Block} instance read or {link null} null if the end of file has reached. * @throws IOException Something went wrong on the 'wire' */ - static Block readBlock( BufferedReader reader, int size ) throws IOException { + static Block readBlock( FileParse reader, int size ) throws IOException { Block result = new Block(size); String line; /* * read blank lines (eventually) */ - while( (line=reader.readLine()) != null && "".equals(line.trim())) { + while( (line=reader.nextLine()) != null && "".equals(line.trim())) { // consume blank lines } - if( line == null ) return null; + if( line == null ) { return null; } /* * read the scores block @@ -342,11 +359,12 @@ public class TCoffeeScoreFile { // split the line on the first blank // the first part have to contain the sequence id - // theramining part are the scores values + // the remaining part are the scores values int p = line.indexOf(" "); if( p == -1 ) { - //TODO This is an unexpected condition, log a warning or throw an exception ? - continue; + if (reader.warningMessage==null) { reader.warningMessage=""; } + reader.warningMessage+="Possible parsing error - expected to find a space in line: '"+line+"'\n"; + continue; } String id = line.substring(0,p).trim(); @@ -354,7 +372,7 @@ public class TCoffeeScoreFile { result.items.put(id, val); - } while( (line = reader.readLine()) != null ); + } while( (line = reader.nextLine()) != null ); return result; @@ -422,6 +440,11 @@ public class TCoffeeScoreFile { */ public boolean annotateAlignment(AlignmentI al, boolean matchids) { + if (al.getHeight()!=getHeight() || al.getWidth()!=getWidth()) + { + warningMessage="Alignment shape does not match T-Coffee score file shape."; + return false; + } boolean added=false; int i=0; SequenceIdMatcher sidmatcher = new SequenceIdMatcher(al.getSequencesArray()); @@ -449,26 +472,35 @@ public class TCoffeeScoreFile { byte val = srow[j]; annotations[j]=new Annotation(s==null ? ""+val:null,s==null ? ""+val:null,(char) val,val*1f,val >= 0 && val < colors.length ? colors[val] : Color.white); } - AlignmentAnnotation aa=null; + // this will overwrite any existing t-coffee scores for the alignment + AlignmentAnnotation aa=al.findOrCreateAnnotation(TCOFFEE_SCORE,false,s,null); if (s!=null) { - // TODO - set per sequence score - aa=new AlignmentAnnotation(TCOFFEE_SCORE, "Score for "+id.getKey(), annotations); - - aa.setSequenceRef(s); + aa.label="T-COFFEE"; + aa.description="Score for "+id.getKey(); + aa.annotations=annotations; aa.visible=false; aa.belowAlignment=false; } else { - aa=new AlignmentAnnotation("T-COFFEE", "TCoffee column reliability score", annotations); + aa.graph=AlignmentAnnotation.NO_GRAPH; + aa.label="T-COFFEE"; + aa.description="TCoffee column reliability score"; + aa.annotations=annotations; aa.belowAlignment=true; aa.visible=true; - } - al.addAnnotation(aa); + aa.showAllColLabels=true; + aa.setSequenceRef(s); + aa.validateRangeAndDisplay(); added=true; } return added; } - + @Override + public String print() + { + // TODO Auto-generated method stub + return "Not valid."; + } } diff --git a/test/jalview/io/TCoffeeScoreFileTest.java b/test/jalview/io/TCoffeeScoreFileTest.java index 7257e14..85c0bff 100644 --- a/test/jalview/io/TCoffeeScoreFileTest.java +++ b/test/jalview/io/TCoffeeScoreFileTest.java @@ -16,12 +16,15 @@ import org.junit.Test; public class TCoffeeScoreFileTest { - final static File SCORE_FILE = new File("./test/jalview/io/tcoffee.score_ascii"); + final static File SCORE_FILE = new File("test/jalview/io/tcoffee.score_ascii"); + final static File ALIGN_FILE = new File("test/jalview/io/tcoffee.fasta_aln"); @Test - public void testReadHeader() throws FileNotFoundException { + public void testReadHeader() throws IOException, FileNotFoundException { - Header header = TCoffeeScoreFile.readHeader( new BufferedReader(new FileReader(SCORE_FILE)) ); + TCoffeeScoreFile scoreFile = new TCoffeeScoreFile(SCORE_FILE.getPath(),AppletFormatAdapter.FILE); + assertTrue(scoreFile.getWarningMessage(),scoreFile.isValid()); + Header header = scoreFile.header; assertNotNull(header); assertEquals( "T-COFFEE, Version_9.02.r1228 (2012-02-16 18:15:12 - Revision 1228 - Build 336)", header.head ); assertEquals( 90, header.score ); @@ -39,14 +42,19 @@ public class TCoffeeScoreFileTest { @Test public void testWrongFile() { - TCoffeeScoreFile result = TCoffeeScoreFile.load(new File("./test/jalview/io/tcoffee.fasta_aln")); - assertNull(result); + try { + TCoffeeScoreFile result = new TCoffeeScoreFile(ALIGN_FILE.getPath(), FormatAdapter.FILE); + assertFalse(result.isValid()); + } catch (IOException x) + { + assertTrue("File not found exception thrown",x instanceof FileNotFoundException); + } } @Test - public void testHeightAndWidth() { - TCoffeeScoreFile result = TCoffeeScoreFile.load(new File("./test/jalview/io/tcoffee.score_ascii")); - assertNotNull(result); + public void testHeightAndWidth() throws IOException { + TCoffeeScoreFile result = new TCoffeeScoreFile(SCORE_FILE.getPath(), FormatAdapter.FILE); + assertTrue(result.isValid()); assertEquals( 8, result.getHeight() ); assertEquals( 83, result.getWidth() ); } @@ -68,9 +76,10 @@ public class TCoffeeScoreFileTest { "cons 999999999999999999999999999851000110321100001134\n" + "\n" + "\n"; - - Block block = TCoffeeScoreFile.readBlock(new BufferedReader(new StringReader(BLOCK)), 0); - assertNotNull(block); + FileParse source=new FileParse(BLOCK, FormatAdapter.PASTE); + Block block = TCoffeeScoreFile.readBlock(source, 0); + + assertNotNull(block); assertEquals( "999999999999999999999999998762112222543211112134", block.getScoresFor("1PHT") ); assertEquals( "99999999999999999999999999987-------4322----2234", block.getScoresFor("1BB9") ); assertEquals( "99999999999999999999999999987-------5321----2246", block.getScoresFor("1UHC") ); @@ -83,9 +92,9 @@ public class TCoffeeScoreFileTest { } @Test - public void testParse() throws FileNotFoundException { + public void testParse() throws IOException { - TCoffeeScoreFile parser = TCoffeeScoreFile.load(new BufferedReader(new FileReader(SCORE_FILE)) ); + TCoffeeScoreFile parser = new TCoffeeScoreFile(SCORE_FILE.getPath(), FormatAdapter.FILE); assertEquals( "999999999999999999999999998762112222543211112134----------5666642367889999999999889", parser.getScoresFor("1PHT") ); assertEquals( "99999999999999999999999999987-------4322----22341111111111676653-355679999999999889", parser.getScoresFor("1BB9") ); @@ -100,10 +109,10 @@ public class TCoffeeScoreFileTest { @Test - public void testGetAsList() throws FileNotFoundException { + public void testGetAsList() throws IOException { - TCoffeeScoreFile parser = TCoffeeScoreFile.load(new BufferedReader(new FileReader(SCORE_FILE)) ); - + TCoffeeScoreFile parser = new TCoffeeScoreFile(SCORE_FILE.getPath(),FormatAdapter.FILE); + assertTrue(parser.getWarningMessage(),parser.isValid()); List scores = parser.getScoresList(); assertEquals( "999999999999999999999999998762112222543211112134----------5666642367889999999999889", scores.get(0) ); assertEquals( "99999999999999999999999999987-------4322----22341111111111676653-355679999999999889", scores.get(1) ); @@ -119,10 +128,10 @@ public class TCoffeeScoreFileTest { @Test - public void testGetAsArray() throws FileNotFoundException { + public void testGetAsArray() throws IOException { - TCoffeeScoreFile parser = TCoffeeScoreFile.load(new BufferedReader(new FileReader(SCORE_FILE)) ); - + TCoffeeScoreFile parser = new TCoffeeScoreFile(SCORE_FILE.getPath(),FormatAdapter.FILE); + assertTrue(parser.getWarningMessage(),parser.isValid()); byte[][] scores = parser.getScoresArray(); assertEquals( 9, scores[0][0] ); -- 1.7.10.2