X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fio%2Fparsers%2Futil%2FParserUtils.java;h=a1f842ee0df8353be6a37a3606bda2296cfd42d9;hb=44fddb76faa8975295b8b0ad38609256b5011ced;hp=b8dd42e7dd97cfe539bf8aa34416e80198d20226;hpb=038c34792757a86f24296de5683e722fab3f9307;p=jalview.git diff --git a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java index b8dd42e..a1f842e 100644 --- a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java +++ b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java @@ -42,48 +42,51 @@ import org.forester.io.parsers.nexus.NexusPhylogeniesParser; import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.io.parsers.tol.TolParser; +import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.util.ForesterConstants; import org.forester.util.ForesterUtil; public final class ParserUtils { - final public static PhylogenyParser createParserDependingOnUrlContents( final URL url, - final boolean phyloxml_validate_against_xsd ) + final public static PhylogenyParser createParserDependingFileContents( final File file, + final boolean phyloxml_validate_against_xsd ) throws FileNotFoundException, IOException { - final String lc_filename = url.getFile().toString().toLowerCase(); - PhylogenyParser parser = createParserDependingOnSuffix( lc_filename, phyloxml_validate_against_xsd ); - if ( ( parser != null ) && lc_filename.endsWith( ".zip" ) ) { - if ( parser instanceof PhyloXmlParser ) { - ( ( PhyloXmlParser ) parser ).setZippedInputstream( true ); - } - else if ( parser instanceof TolParser ) { - ( ( TolParser ) parser ).setZippedInputstream( true ); - } - } - if ( parser == null ) { - final String first_line = ForesterUtil.getFirstLine( url ).trim().toLowerCase(); - if ( first_line.startsWith( "<" ) ) { - parser = new PhyloXmlParser(); - if ( phyloxml_validate_against_xsd ) { - final ClassLoader cl = PhyloXmlParser.class.getClassLoader(); - final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE ); - if ( xsd_url != null ) { - ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() ); - } - else { + PhylogenyParser parser = null; + final String first_line = ForesterUtil.getFirstLine( file ).trim().toLowerCase(); + if ( first_line.startsWith( "<" ) ) { + parser = new PhyloXmlParser(); + if ( phyloxml_validate_against_xsd ) { + final ClassLoader cl = PhyloXmlParser.class.getClassLoader(); + final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE ); + if ( xsd_url != null ) { + ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() ); + } + else { + if ( ForesterConstants.RELEASE ) { throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from [" + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" ); } } } - else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) ) - || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) { - parser = new NexusPhylogeniesParser(); - } - else { - parser = new NHXParser(); - } + } + else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) ) + || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) { + parser = new NexusPhylogeniesParser(); + } + else { + parser = new NHXParser(); + } + return parser; + } + + final public static PhylogenyParser createParserDependingOnFileType( final File file, + final boolean phyloxml_validate_against_xsd ) + throws FileNotFoundException, IOException { + PhylogenyParser parser = null; + parser = ParserUtils.createParserDependingOnSuffix( file.getName(), phyloxml_validate_against_xsd ); + if ( parser == null ) { + parser = createParserDependingFileContents( file, phyloxml_validate_against_xsd ); } return parser; } @@ -127,46 +130,78 @@ public final class ParserUtils { return parser; } - final public static PhylogenyParser createParserDependingOnFileType( final File file, - final boolean phyloxml_validate_against_xsd ) + final public static PhylogenyParser createParserDependingOnUrlContents( final URL url, + final boolean phyloxml_validate_against_xsd ) throws FileNotFoundException, IOException { - PhylogenyParser parser = null; - parser = ParserUtils.createParserDependingOnSuffix( file.getName(), phyloxml_validate_against_xsd ); - if ( parser == null ) { - parser = createParserDependingFileContents( file, phyloxml_validate_against_xsd ); + final String lc_filename = url.getFile().toString().toLowerCase(); + PhylogenyParser parser = createParserDependingOnSuffix( lc_filename, phyloxml_validate_against_xsd ); + if ( ( parser != null ) && lc_filename.endsWith( ".zip" ) ) { + if ( parser instanceof PhyloXmlParser ) { + ( ( PhyloXmlParser ) parser ).setZippedInputstream( true ); + } + else if ( parser instanceof TolParser ) { + ( ( TolParser ) parser ).setZippedInputstream( true ); + } } - return parser; - } - - final public static PhylogenyParser createParserDependingFileContents( final File file, - final boolean phyloxml_validate_against_xsd ) - throws FileNotFoundException, IOException { - PhylogenyParser parser = null; - final String first_line = ForesterUtil.getFirstLine( file ).trim().toLowerCase(); - if ( first_line.startsWith( "<" ) ) { - parser = new PhyloXmlParser(); - if ( phyloxml_validate_against_xsd ) { - final ClassLoader cl = PhyloXmlParser.class.getClassLoader(); - final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE ); - if ( xsd_url != null ) { - ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() ); - } - else { - if ( ForesterConstants.RELEASE ) { + if ( parser == null ) { + final String first_line = ForesterUtil.getFirstLine( url ).trim().toLowerCase(); + if ( first_line.startsWith( "<" ) ) { + parser = new PhyloXmlParser(); + if ( phyloxml_validate_against_xsd ) { + final ClassLoader cl = PhyloXmlParser.class.getClassLoader(); + final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE ); + if ( xsd_url != null ) { + ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() ); + } + else { throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from [" + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" ); } } } + else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) ) + || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) { + parser = new NexusPhylogeniesParser(); + } + else { + parser = new NHXParser(); + } } - else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) ) - || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) { - parser = new NexusPhylogeniesParser(); + return parser; + } + + public static BufferedReader createReader( final Object source ) throws IOException, FileNotFoundException { + BufferedReader reader = null; + if ( ( source instanceof File ) || ( source instanceof String ) ) { + File f = null; + if ( source instanceof File ) { + f = ( File ) source; + } + else { + f = new File( ( String ) source ); + } + if ( !f.exists() ) { + throw new IOException( "[" + f.getAbsolutePath() + "] does not exist" ); + } + else if ( !f.isFile() ) { + throw new IOException( "[" + f.getAbsolutePath() + "] is not a file" ); + } + else if ( !f.canRead() ) { + throw new IOException( "[" + f.getAbsolutePath() + "] is not a readable" ); + } + reader = new BufferedReader( new FileReader( f ) ); + } + else if ( source instanceof InputStream ) { + reader = new BufferedReader( new InputStreamReader( ( InputStream ) source ) ); + } + else if ( ( source instanceof StringBuffer ) || ( source instanceof StringBuilder ) ) { + reader = new BufferedReader( new StringReader( source.toString() ) ); } else { - parser = new NHXParser(); + throw new IllegalArgumentException( "attempt to parse object of type [" + source.getClass() + + "] (can only parse objects of type File/String, InputStream, StringBuffer, or StringBuilder)" ); } - return parser; + return reader; } /** @@ -189,8 +224,8 @@ public final class ParserUtils { final boolean limit_to_five, final PhylogenyMethods.TAXONOMY_EXTRACTION taxonomy_extraction ) { if ( ( name.indexOf( "_" ) > 0 ) - && ( name.length() < 25 ) - && ( name.lastIndexOf( "_" ) == name.indexOf( "_" ) ) + && ( name.length() < 31 ) + // && ( name.lastIndexOf( "_" ) == name.indexOf( "_" ) ) && ( name.indexOf( "|" ) < 0 ) && ( name.indexOf( "." ) < 0 ) && ( ( taxonomy_extraction != PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ) || ( name @@ -199,59 +234,30 @@ public final class ParserUtils { final String[] s = name.split( "[_/]" ); if ( s.length > 1 ) { String str = s[ 1 ]; - if ( limit_to_five ) { - if ( str.length() > 5 ) { - str = str.substring( 0, 5 ); - } - else if ( ( str.length() < 5 ) && ( str.startsWith( "RAT" ) || str.startsWith( "PIG" ) ) ) { + if ( ( str.length() < 6 ) || ( !limit_to_five && ( str.length() < 7 ) ) ) { + if ( ( str.length() < 5 ) && ( str.startsWith( "RAT" ) || str.startsWith( "PIG" ) ) ) { str = str.substring( 0, 3 ); } + final Matcher uc_letters_and_numbers = NHXParser.UC_LETTERS_NUMBERS_PATTERN.matcher( str ); + if ( !uc_letters_and_numbers.matches() ) { + return null; + } + final Matcher numbers_only = NHXParser.NUMBERS_ONLY_PATTERN.matcher( str ); + if ( numbers_only.matches() ) { + return null; + } + return str; } - final Matcher letters_and_numbers = NHXParser.UC_LETTERS_NUMBERS_PATTERN.matcher( str ); - if ( !letters_and_numbers.matches() ) { - return null; - } - final Matcher numbers_only = NHXParser.NUMBERS_ONLY_PATTERN.matcher( str ); - if ( numbers_only.matches() ) { - return null; - } - return str; } } return null; } - public static BufferedReader createReader( final Object source ) throws IOException, FileNotFoundException { - BufferedReader reader = null; - if ( ( source instanceof File ) || ( source instanceof String ) ) { - File f = null; - if ( source instanceof File ) { - f = ( File ) source; - } - else { - f = new File( ( String ) source ); - } - if ( !f.exists() ) { - throw new IOException( "[" + f.getAbsolutePath() + "] does not exist" ); - } - else if ( !f.isFile() ) { - throw new IOException( "[" + f.getAbsolutePath() + "] is not a file" ); - } - else if ( !f.canRead() ) { - throw new IOException( "[" + f.getAbsolutePath() + "] is not a readable" ); - } - reader = new BufferedReader( new FileReader( f ) ); - } - else if ( source instanceof InputStream ) { - reader = new BufferedReader( new InputStreamReader( ( InputStream ) source ) ); - } - else if ( ( source instanceof StringBuffer ) || ( source instanceof StringBuilder ) ) { - reader = new BufferedReader( new StringReader( source.toString() ) ); - } - else { - throw new IllegalArgumentException( "attempt to parse object of type [" + source.getClass() - + "] (can only parse objects of type File/String, InputStream, StringBuffer, or StringBuilder)" ); - } - return reader; + public final static Phylogeny[] readPhylogenies( final File file ) throws FileNotFoundException, IOException { + return PhylogenyMethods.readPhylogenies( ParserUtils.createParserDependingOnFileType( file, true ), file ); + } + + public final static Phylogeny[] readPhylogenies( final String file_name ) throws FileNotFoundException, IOException { + return readPhylogenies( new File( file_name ) ); } }