X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fio%2Fparsers%2FFastaParser.java;h=5f42df83344f1d6ccbf0c7ffde3da4e1e8afe13b;hb=7019346e7e8cecf22a06ffb59ea86daf0d28189d;hp=b04e25426b9042f8aec3ec852722313c3daa5dbb;hpb=eee996a6476a1e3d84c07f8f690dcde3ff4b2ef5;p=jalview.git diff --git a/forester/java/src/org/forester/io/parsers/FastaParser.java b/forester/java/src/org/forester/io/parsers/FastaParser.java index b04e254..5f42df8 100644 --- a/forester/java/src/org/forester/io/parsers/FastaParser.java +++ b/forester/java/src/org/forester/io/parsers/FastaParser.java @@ -28,6 +28,8 @@ package org.forester.io.parsers; import java.io.BufferedReader; import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; @@ -39,13 +41,8 @@ import java.util.regex.Pattern; import org.forester.msa.BasicMsa; import org.forester.msa.Msa; import org.forester.msa.MsaFormatException; -import org.forester.phylogeny.Phylogeny; -import org.forester.phylogeny.PhylogenyNode; -import org.forester.phylogeny.data.Accession; -import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.sequence.BasicSequence; import org.forester.sequence.Sequence; -import org.forester.util.ForesterUtil; public class FastaParser { @@ -53,7 +50,7 @@ public class FastaParser { private static final Pattern SEQ_REGEX = Pattern.compile( "^\\s*(.+)" ); private static final Pattern ANYTHING_REGEX = Pattern.compile( "[\\d\\s]+" ); //>gi|71834668|ref|NP_001025424.1| Bcl2 [Danio rerio] - private static final Pattern FASTA_DESC_LINE = Pattern + public static final Pattern FASTA_DESC_LINE = Pattern .compile( ">?\\s*([^|]+)\\|([^|]+)\\S*\\s+(.+)\\s+\\[(.+)\\]" ); public static void main( final String[] args ) { @@ -92,6 +89,10 @@ public class FastaParser { return false; } + static public Msa parseMsa( final File f ) throws IOException { + return parseMsa( new FileInputStream( f ) ); + } + static public Msa parseMsa( final InputStream is ) throws IOException { return BasicMsa.createInstance( parse( is ) ); } @@ -104,6 +105,10 @@ public class FastaParser { return parseMsa( new ByteArrayInputStream( bytes ) ); } + static public List parse( final File f ) throws IOException { + return parse( new FileInputStream( f ) ); + } + static public List parse( final InputStream is ) throws IOException { final BufferedReader reader = new BufferedReader( new InputStreamReader( is, "UTF-8" ) ); String line = null; @@ -175,36 +180,4 @@ public class FastaParser { } return line; } - - public static void extractFastaInformation( final Phylogeny phy ) { - for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - if ( !ForesterUtil.isEmpty( node.getName() ) ) { - final Matcher name_m = FASTA_DESC_LINE.matcher( node.getName() ); - if ( name_m.lookingAt() ) { - System.out.println(); - // System.out.println( name_m.group( 1 ) ); - // System.out.println( name_m.group( 2 ) ); - // System.out.println( name_m.group( 3 ) ); - // System.out.println( name_m.group( 4 ) ); - final String acc_source = name_m.group( 1 ); - final String acc = name_m.group( 2 ); - final String seq_name = name_m.group( 3 ); - final String tax_sn = name_m.group( 4 ); - if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) { - ForesterUtil.ensurePresenceOfSequence( node ); - node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) ); - } - if ( !ForesterUtil.isEmpty( seq_name ) ) { - ForesterUtil.ensurePresenceOfSequence( node ); - node.getNodeData().getSequence( 0 ).setName( seq_name ); - } - if ( !ForesterUtil.isEmpty( tax_sn ) ) { - ForesterUtil.ensurePresenceOfTaxonomy( node ); - node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn ); - } - } - } - } - } }