X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fio%2Fparsers%2FFastaParser.java;h=348bec419d0f4006655a58fb42b291b8dd953c91;hb=ea76a6a6fe858b96dfbfa392de2bae77c964b4f2;hp=b99a4d4de4553ea4511e53d534af2ad5b40b6295;hpb=038c34792757a86f24296de5683e722fab3f9307;p=jalview.git diff --git a/forester/java/src/org/forester/io/parsers/FastaParser.java b/forester/java/src/org/forester/io/parsers/FastaParser.java index b99a4d4..348bec4 100644 --- a/forester/java/src/org/forester/io/parsers/FastaParser.java +++ b/forester/java/src/org/forester/io/parsers/FastaParser.java @@ -22,7 +22,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.io.parsers; @@ -38,17 +38,11 @@ import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.forester.archaeopteryx.Util; import org.forester.msa.BasicMsa; import org.forester.msa.Msa; import org.forester.msa.MsaFormatException; -import org.forester.phylogeny.Phylogeny; -import org.forester.phylogeny.PhylogenyNode; -import org.forester.phylogeny.data.Accession; -import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.sequence.BasicSequence; import org.forester.sequence.Sequence; -import org.forester.util.ForesterUtil; public class FastaParser { @@ -56,7 +50,7 @@ public class FastaParser { private static final Pattern SEQ_REGEX = Pattern.compile( "^\\s*(.+)" ); private static final Pattern ANYTHING_REGEX = Pattern.compile( "[\\d\\s]+" ); //>gi|71834668|ref|NP_001025424.1| Bcl2 [Danio rerio] - private static final Pattern FASTA_DESC_LINE = Pattern + public static final Pattern FASTA_DESC_LINE = Pattern .compile( ">?\\s*([^|]+)\\|([^|]+)\\S*\\s+(.+)\\s+\\[(.+)\\]" ); public static void main( final String[] args ) { @@ -74,6 +68,10 @@ public class FastaParser { } } + static public boolean isLikelyFasta( final File f ) throws IOException { + return isLikelyFasta( new FileInputStream( f ) ); + } + static public boolean isLikelyFasta( final InputStream is ) throws IOException { final BufferedReader reader = new BufferedReader( new InputStreamReader( is, "UTF-8" ) ); String line = null; @@ -186,36 +184,4 @@ public class FastaParser { } return line; } - - public static void extractFastaInformation( final Phylogeny phy ) { - for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - if ( !ForesterUtil.isEmpty( node.getName() ) ) { - final Matcher name_m = FASTA_DESC_LINE.matcher( node.getName() ); - if ( name_m.lookingAt() ) { - System.out.println(); - // System.out.println( name_m.group( 1 ) ); - // System.out.println( name_m.group( 2 ) ); - // System.out.println( name_m.group( 3 ) ); - // System.out.println( name_m.group( 4 ) ); - final String acc_source = name_m.group( 1 ); - final String acc = name_m.group( 2 ); - final String seq_name = name_m.group( 3 ); - final String tax_sn = name_m.group( 4 ); - if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) { - Util.ensurePresenceOfSequence( node ); - node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) ); - } - if ( !ForesterUtil.isEmpty( seq_name ) ) { - Util.ensurePresenceOfSequence( node ); - node.getNodeData().getSequence( 0 ).setName( seq_name ); - } - if ( !ForesterUtil.isEmpty( tax_sn ) ) { - Util.ensurePresenceOfTaxonomy( node ); - node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn ); - } - } - } - } - } }