X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fio%2Fparsers%2FFastaParser.java;h=348bec419d0f4006655a58fb42b291b8dd953c91;hb=391381d661a230af98751dd6eba77bb2353c4d3e;hp=4c6845cfab7ce885cf0194d7e0fc752c7646ffe1;hpb=48f7a89be9d34f1930a1f863e608235cc27184c5;p=jalview.git diff --git a/forester/java/src/org/forester/io/parsers/FastaParser.java b/forester/java/src/org/forester/io/parsers/FastaParser.java index 4c6845c..348bec4 100644 --- a/forester/java/src/org/forester/io/parsers/FastaParser.java +++ b/forester/java/src/org/forester/io/parsers/FastaParser.java @@ -6,7 +6,7 @@ // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved -// +// // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either @@ -16,18 +16,20 @@ // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. -// +// // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers; import java.io.BufferedReader; import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; @@ -39,13 +41,8 @@ import java.util.regex.Pattern; import org.forester.msa.BasicMsa; import org.forester.msa.Msa; import org.forester.msa.MsaFormatException; -import org.forester.phylogeny.Phylogeny; -import org.forester.phylogeny.PhylogenyNode; -import org.forester.phylogeny.data.Accession; -import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.sequence.BasicSequence; import org.forester.sequence.Sequence; -import org.forester.util.ForesterUtil; public class FastaParser { @@ -53,7 +50,7 @@ public class FastaParser { private static final Pattern SEQ_REGEX = Pattern.compile( "^\\s*(.+)" ); private static final Pattern ANYTHING_REGEX = Pattern.compile( "[\\d\\s]+" ); //>gi|71834668|ref|NP_001025424.1| Bcl2 [Danio rerio] - private static final Pattern FASTA_DESC_LINE = Pattern + public static final Pattern FASTA_DESC_LINE = Pattern .compile( ">?\\s*([^|]+)\\|([^|]+)\\S*\\s+(.+)\\s+\\[(.+)\\]" ); public static void main( final String[] args ) { @@ -71,6 +68,10 @@ public class FastaParser { } } + static public boolean isLikelyFasta( final File f ) throws IOException { + return isLikelyFasta( new FileInputStream( f ) ); + } + static public boolean isLikelyFasta( final InputStream is ) throws IOException { final BufferedReader reader = new BufferedReader( new InputStreamReader( is, "UTF-8" ) ); String line = null; @@ -92,6 +93,10 @@ public class FastaParser { return false; } + static public Msa parseMsa( final File f ) throws IOException { + return parseMsa( new FileInputStream( f ) ); + } + static public Msa parseMsa( final InputStream is ) throws IOException { return BasicMsa.createInstance( parse( is ) ); } @@ -104,6 +109,10 @@ public class FastaParser { return parseMsa( new ByteArrayInputStream( bytes ) ); } + static public List parse( final File f ) throws IOException { + return parse( new FileInputStream( f ) ); + } + static public List parse( final InputStream is ) throws IOException { final BufferedReader reader = new BufferedReader( new InputStreamReader( is, "UTF-8" ) ); String line = null; @@ -144,8 +153,8 @@ public class FastaParser { reader.close(); final List seqs = new ArrayList(); for( int i = 0; i < temp_msa.size(); ++i ) { - seqs.add( BasicSequence.createAaSequence( temp_msa.get( i )[ 0 ].toString(), temp_msa.get( i )[ 1 ] - .toString() ) ); + seqs.add( BasicSequence.createAaSequence( temp_msa.get( i )[ 0 ].toString(), + temp_msa.get( i )[ 1 ].toString() ) ); } return seqs; } @@ -175,36 +184,4 @@ public class FastaParser { } return line; } - - public static void extractFastaInformation( final Phylogeny phy ) { - for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - if ( !ForesterUtil.isEmpty( node.getName() ) ) { - final Matcher name_m = FASTA_DESC_LINE.matcher( node.getName() ); - if ( name_m.lookingAt() ) { - System.out.println(); - // System.out.println( name_m.group( 1 ) ); - // System.out.println( name_m.group( 2 ) ); - // System.out.println( name_m.group( 3 ) ); - // System.out.println( name_m.group( 4 ) ); - final String acc_source = name_m.group( 1 ); - final String acc = name_m.group( 2 ); - final String seq_name = name_m.group( 3 ); - final String tax_sn = name_m.group( 4 ); - if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) { - ForesterUtil.ensurePresenceOfSequence( node ); - node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) ); - } - if ( !ForesterUtil.isEmpty( seq_name ) ) { - ForesterUtil.ensurePresenceOfSequence( node ); - node.getNodeData().getSequence( 0 ).setName( seq_name ); - } - if ( !ForesterUtil.isEmpty( tax_sn ) ) { - ForesterUtil.ensurePresenceOfTaxonomy( node ); - node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn ); - } - } - } - } - } }