X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fio%2Fparsers%2FFastaParser.java;h=5f42df83344f1d6ccbf0c7ffde3da4e1e8afe13b;hb=674f7858341235991a8d0eda5f55a20243944832;hp=4c6845cfab7ce885cf0194d7e0fc752c7646ffe1;hpb=48f7a89be9d34f1930a1f863e608235cc27184c5;p=jalview.git diff --git a/forester/java/src/org/forester/io/parsers/FastaParser.java b/forester/java/src/org/forester/io/parsers/FastaParser.java index 4c6845c..5f42df8 100644 --- a/forester/java/src/org/forester/io/parsers/FastaParser.java +++ b/forester/java/src/org/forester/io/parsers/FastaParser.java @@ -6,7 +6,7 @@ // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved -// +// // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either @@ -16,7 +16,7 @@ // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. -// +// // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA @@ -28,6 +28,8 @@ package org.forester.io.parsers; import java.io.BufferedReader; import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; @@ -39,13 +41,8 @@ import java.util.regex.Pattern; import org.forester.msa.BasicMsa; import org.forester.msa.Msa; import org.forester.msa.MsaFormatException; -import org.forester.phylogeny.Phylogeny; -import org.forester.phylogeny.PhylogenyNode; -import org.forester.phylogeny.data.Accession; -import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.sequence.BasicSequence; import org.forester.sequence.Sequence; -import org.forester.util.ForesterUtil; public class FastaParser { @@ -53,7 +50,7 @@ public class FastaParser { private static final Pattern SEQ_REGEX = Pattern.compile( "^\\s*(.+)" ); private static final Pattern ANYTHING_REGEX = Pattern.compile( "[\\d\\s]+" ); //>gi|71834668|ref|NP_001025424.1| Bcl2 [Danio rerio] - private static final Pattern FASTA_DESC_LINE = Pattern + public static final Pattern FASTA_DESC_LINE = Pattern .compile( ">?\\s*([^|]+)\\|([^|]+)\\S*\\s+(.+)\\s+\\[(.+)\\]" ); public static void main( final String[] args ) { @@ -92,6 +89,10 @@ public class FastaParser { return false; } + static public Msa parseMsa( final File f ) throws IOException { + return parseMsa( new FileInputStream( f ) ); + } + static public Msa parseMsa( final InputStream is ) throws IOException { return BasicMsa.createInstance( parse( is ) ); } @@ -104,6 +105,10 @@ public class FastaParser { return parseMsa( new ByteArrayInputStream( bytes ) ); } + static public List parse( final File f ) throws IOException { + return parse( new FileInputStream( f ) ); + } + static public List parse( final InputStream is ) throws IOException { final BufferedReader reader = new BufferedReader( new InputStreamReader( is, "UTF-8" ) ); String line = null; @@ -144,8 +149,8 @@ public class FastaParser { reader.close(); final List seqs = new ArrayList(); for( int i = 0; i < temp_msa.size(); ++i ) { - seqs.add( BasicSequence.createAaSequence( temp_msa.get( i )[ 0 ].toString(), temp_msa.get( i )[ 1 ] - .toString() ) ); + seqs.add( BasicSequence.createAaSequence( temp_msa.get( i )[ 0 ].toString(), + temp_msa.get( i )[ 1 ].toString() ) ); } return seqs; } @@ -175,36 +180,4 @@ public class FastaParser { } return line; } - - public static void extractFastaInformation( final Phylogeny phy ) { - for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - if ( !ForesterUtil.isEmpty( node.getName() ) ) { - final Matcher name_m = FASTA_DESC_LINE.matcher( node.getName() ); - if ( name_m.lookingAt() ) { - System.out.println(); - // System.out.println( name_m.group( 1 ) ); - // System.out.println( name_m.group( 2 ) ); - // System.out.println( name_m.group( 3 ) ); - // System.out.println( name_m.group( 4 ) ); - final String acc_source = name_m.group( 1 ); - final String acc = name_m.group( 2 ); - final String seq_name = name_m.group( 3 ); - final String tax_sn = name_m.group( 4 ); - if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) { - ForesterUtil.ensurePresenceOfSequence( node ); - node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) ); - } - if ( !ForesterUtil.isEmpty( seq_name ) ) { - ForesterUtil.ensurePresenceOfSequence( node ); - node.getNodeData().getSequence( 0 ).setName( seq_name ); - } - if ( !ForesterUtil.isEmpty( tax_sn ) ) { - ForesterUtil.ensurePresenceOfTaxonomy( node ); - node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn ); - } - } - } - } - } }