X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=website%2Fdm_javadoc%2Fcompbio%2Fdata%2Fsequence%2FSequenceUtil.html;fp=website%2Fdm_javadoc%2Fcompbio%2Fdata%2Fsequence%2FSequenceUtil.html;h=0000000000000000000000000000000000000000;hb=5be600c3985aa7bcb8d8b51d77d773c76e6802bb;hp=94308d4e9665f78ae50ce70be732697f69ac2358;hpb=bc3346bd7c1c518fad867d4c60a53779e7516588;p=jabaws.git diff --git a/website/dm_javadoc/compbio/data/sequence/SequenceUtil.html b/website/dm_javadoc/compbio/data/sequence/SequenceUtil.html deleted file mode 100644 index 94308d4..0000000 --- a/website/dm_javadoc/compbio/data/sequence/SequenceUtil.html +++ /dev/null @@ -1,946 +0,0 @@ - - - - - - -SequenceUtil - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - -
- -
- - - -
- -

- -compbio.data.sequence -
-Class SequenceUtil

-
-java.lang.Object
-  extended by compbio.data.sequence.SequenceUtil
-
-
-
-
public final class SequenceUtil
extends Object
- - -

-Utility class for operations on sequences -

- -

-

-
Since:
-
1.0
-
Version:
-
2.0 June 2011
-
Author:
-
Peter Troshin
-
-
- -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-Field Summary
-static PatternAA - -
-          Valid Amino acids
-static PatternAMBIGUOUS_AA - -
-          Same as AA pattern but with two additional letters - XU
-static PatternAMBIGUOUS_NUCLEOTIDE - -
-          Ambiguous nucleotide
-static PatternDIGIT - -
-          A digit
-static PatternNON_AA - -
-          inversion of AA pattern
-static PatternNON_NUCLEOTIDE - -
-          Non nucleotide
-static PatternNONWORD - -
-          Non word
-static PatternNUCLEOTIDE - -
-          Nucleotides a, t, g, c, u
-static PatternWHITE_SPACE - -
-          A whitespace character: [\t\n\x0B\f\r]
-  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-Method Summary
-static StringcleanProteinSequence(String sequence) - -
-          Remove all non AA chars from the sequence
-static StringcleanSequence(String sequence) - -
-          Removes all whitespace chars in the sequence string
-static voidcloseSilently(Logger log, - Closeable stream) - -
-          Closes the Closable and logs the exception if any
-static StringdeepCleanSequence(String sequence) - -
-          Removes all special characters and digits as well as whitespace chars - from the sequence
-static booleanisAmbiguosProtein(String sequence) - -
-          Check whether the sequence confirms to amboguous protein sequence
-static booleanisNonAmbNucleotideSequence(String sequence) - -
-          Ambiguous DNA chars : AGTCRYMKSWHBVDN // differs from protein in only one - (!) - B char
-static booleanisNucleotideSequence(FastaSequence s) - -
-           
-static booleanisProteinSequence(String sequence) - -
-           
-static List<FastaSequence>openInputStream(String inFilePath) - -
-          Reads and parses Fasta or Clustal formatted file into a list of - FastaSequence objects
-static HashSet<Score>readAAConResults(InputStream results) - -
-          Read AACon result with no alignment files.
-static HashMap<String,Set<Score>>readDisembl(InputStream input) - -
-          > Foobar_dundeefriends - - # COILS 34-41, 50-58, 83-91, 118-127, 160-169, 191-220, 243-252, 287-343 - - # REM465 355-368 - - # HOTLOOPS 190-204 - - # RESIDUE COILS REM465 HOTLOOPS - - M 0.86010 0.88512 0.37094 - - T 0.79983 0.85864 0.44331 - - >Next Sequence name
-static List<FastaSequence>readFasta(InputStream inStream) - -
-          Reads fasta sequences from inStream into the list of FastaSequence - objects
-static HashMap<String,Set<Score>>readGlobPlot(InputStream input) - -
-          > Foobar_dundeefriends - - # COILS 34-41, 50-58, 83-91, 118-127, 160-169, 191-220, 243-252, 287-343 - - # REM465 355-368 - - # HOTLOOPS 190-204 - - # RESIDUE COILS REM465 HOTLOOPS - - M 0.86010 0.88512 0.37094 - - T 0.79983 0.85864 0.44331 - - >Next Sequence name
-static Map<String,Score>readIUPred(File result) - -
-          Read IUPred output
-static Map<String,Score>readJRonn(File result) - -
-           
-static Map<String,Score>readJRonn(InputStream inStream) - -
-          Reader for JRonn horizontal file format
-static voidwriteFasta(OutputStream os, - List<FastaSequence> sequences) - -
-          Writes FastaSequence in the file, each sequence will take one line only
-static voidwriteFasta(OutputStream outstream, - List<FastaSequence> sequences, - int width) - -
-          Writes list of FastaSequeces into the outstream formatting the sequence - so that it contains width chars on each line
-static voidwriteFastaKeepTheStream(OutputStream outstream, - List<FastaSequence> sequences, - int width) - -
-           
- - - - - - - -
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
-  -

- - - - - - - - -
-Field Detail
- -

-WHITE_SPACE

-
-public static final Pattern WHITE_SPACE
-
-
A whitespace character: [\t\n\x0B\f\r] -

-

-
-
-
- -

-DIGIT

-
-public static final Pattern DIGIT
-
-
A digit -

-

-
-
-
- -

-NONWORD

-
-public static final Pattern NONWORD
-
-
Non word -

-

-
-
-
- -

-AA

-
-public static final Pattern AA
-
-
Valid Amino acids -

-

-
-
-
- -

-NON_AA

-
-public static final Pattern NON_AA
-
-
inversion of AA pattern -

-

-
-
-
- -

-AMBIGUOUS_AA

-
-public static final Pattern AMBIGUOUS_AA
-
-
Same as AA pattern but with two additional letters - XU -

-

-
-
-
- -

-NUCLEOTIDE

-
-public static final Pattern NUCLEOTIDE
-
-
Nucleotides a, t, g, c, u -

-

-
-
-
- -

-AMBIGUOUS_NUCLEOTIDE

-
-public static final Pattern AMBIGUOUS_NUCLEOTIDE
-
-
Ambiguous nucleotide -

-

-
-
-
- -

-NON_NUCLEOTIDE

-
-public static final Pattern NON_NUCLEOTIDE
-
-
Non nucleotide -

-

-
-
- - - - - - - - -
-Method Detail
- -

-isNucleotideSequence

-
-public static boolean isNucleotideSequence(FastaSequence s)
-
-
- -
Returns:
true is the sequence contains only letters a,c, t, g, u
-
-
-
- -

-isNonAmbNucleotideSequence

-
-public static boolean isNonAmbNucleotideSequence(String sequence)
-
-
Ambiguous DNA chars : AGTCRYMKSWHBVDN // differs from protein in only one - (!) - B char -

-

-
-
-
-
- -

-cleanSequence

-
-public static String cleanSequence(String sequence)
-
-
Removes all whitespace chars in the sequence string -

-

-
Parameters:
sequence - -
Returns:
cleaned up sequence
-
-
-
- -

-deepCleanSequence

-
-public static String deepCleanSequence(String sequence)
-
-
Removes all special characters and digits as well as whitespace chars - from the sequence -

-

-
Parameters:
sequence - -
Returns:
cleaned up sequence
-
-
-
- -

-cleanProteinSequence

-
-public static String cleanProteinSequence(String sequence)
-
-
Remove all non AA chars from the sequence -

-

-
Parameters:
sequence - the sequence to clean -
Returns:
cleaned sequence
-
-
-
- -

-isProteinSequence

-
-public static boolean isProteinSequence(String sequence)
-
-
-
Parameters:
sequence - -
Returns:
true is the sequence is a protein sequence, false overwise
-
-
-
- -

-isAmbiguosProtein

-
-public static boolean isAmbiguosProtein(String sequence)
-
-
Check whether the sequence confirms to amboguous protein sequence -

-

-
Parameters:
sequence - -
Returns:
return true only if the sequence if ambiguous protein sequence - Return false otherwise. e.g. if the sequence is non-ambiguous - protein or DNA
-
-
-
- -

-writeFasta

-
-public static void writeFasta(OutputStream outstream,
-                              List<FastaSequence> sequences,
-                              int width)
-                       throws IOException
-
-
Writes list of FastaSequeces into the outstream formatting the sequence - so that it contains width chars on each line -

-

-
Parameters:
outstream -
sequences -
width - - the maximum number of characters to write in one line -
Throws: -
IOException
-
-
-
- -

-writeFastaKeepTheStream

-
-public static void writeFastaKeepTheStream(OutputStream outstream,
-                                           List<FastaSequence> sequences,
-                                           int width)
-                                    throws IOException
-
-
- -
Throws: -
IOException
-
-
-
- -

-readFasta

-
-public static List<FastaSequence> readFasta(InputStream inStream)
-                                     throws IOException
-
-
Reads fasta sequences from inStream into the list of FastaSequence - objects -

-

-
Parameters:
inStream - from -
Returns:
list of FastaSequence objects -
Throws: -
IOException
-
-
-
- -

-writeFasta

-
-public static void writeFasta(OutputStream os,
-                              List<FastaSequence> sequences)
-                       throws IOException
-
-
Writes FastaSequence in the file, each sequence will take one line only -

-

-
Parameters:
os -
sequences - -
Throws: -
IOException
-
-
-
- -

-readIUPred

-
-public static Map<String,Score> readIUPred(File result)
-                                    throws IOException,
-                                           UnknownFileFormatException
-
-
Read IUPred output -

-

-
Parameters:
result - -
Returns:
Map key->sequence name, value->Score -
Throws: -
IOException -
UnknownFileFormatException
-
-
-
- -

-readJRonn

-
-public static Map<String,Score> readJRonn(File result)
-                                   throws IOException,
-                                          UnknownFileFormatException
-
-
- -
Throws: -
IOException -
UnknownFileFormatException
-
-
-
- -

-readJRonn

-
-public static Map<String,Score> readJRonn(InputStream inStream)
-                                   throws IOException,
-                                          UnknownFileFormatException
-
-
Reader for JRonn horizontal file format - -
- >Foobar M G D T T A G 0.48 0.42
- 0.42 0.48 0.52 0.53 0.54
- 
- 
- Where all values are tab delimited
-

-

-
Parameters:
inStream - the InputStream connected to the JRonn output file -
Returns:
Map key=sequence name value=Score -
Throws: -
IOException - is thrown if the inStream has problems accessing the data -
UnknownFileFormatException - is thrown if the inStream represents an unknown source of - data, i.e. not a JRonn output
-
-
-
- -

-closeSilently

-
-public static final void closeSilently(Logger log,
-                                       Closeable stream)
-
-
Closes the Closable and logs the exception if any -

-

-
Parameters:
log -
stream -
-
-
-
- -

-readDisembl

-
-public static HashMap<String,Set<Score>> readDisembl(InputStream input)
-                                              throws IOException,
-                                                     UnknownFileFormatException
-
-
> Foobar_dundeefriends - - # COILS 34-41, 50-58, 83-91, 118-127, 160-169, 191-220, 243-252, 287-343 - - # REM465 355-368 - - # HOTLOOPS 190-204 - - # RESIDUE COILS REM465 HOTLOOPS - - M 0.86010 0.88512 0.37094 - - T 0.79983 0.85864 0.44331 - - >Next Sequence name -

-

-
Parameters:
input - the InputStream -
Returns:
Map key=sequence name, value=set of score -
Throws: -
IOException -
UnknownFileFormatException
-
-
-
- -

-readGlobPlot

-
-public static HashMap<String,Set<Score>> readGlobPlot(InputStream input)
-                                               throws IOException,
-                                                      UnknownFileFormatException
-
-
> Foobar_dundeefriends - - # COILS 34-41, 50-58, 83-91, 118-127, 160-169, 191-220, 243-252, 287-343 - - # REM465 355-368 - - # HOTLOOPS 190-204 - - # RESIDUE COILS REM465 HOTLOOPS - - M 0.86010 0.88512 0.37094 - - T 0.79983 0.85864 0.44331 - - >Next Sequence name -

-

-
Parameters:
input - -
Returns:
Map key=sequence name, value=set of score -
Throws: -
IOException -
UnknownFileFormatException
-
-
-
- -

-readAAConResults

-
-public static HashSet<Score> readAAConResults(InputStream results)
-
-
Read AACon result with no alignment files. This method leaves incoming - InputStream open! -

-

-
Parameters:
results - output file of AAConservation -
Returns:
Map with keys ConservationMethod -> float[]
-
-
-
- -

-openInputStream

-
-public static List<FastaSequence> openInputStream(String inFilePath)
-                                           throws IOException,
-                                                  UnknownFileFormatException
-
-
Reads and parses Fasta or Clustal formatted file into a list of - FastaSequence objects -

-

-
Parameters:
inFilePath - the path to the input file -
Returns:
the List of FastaSequence objects -
Throws: -
IOException - if the file denoted by inFilePath cannot be read -
UnknownFileFormatException - if the inFilePath points to the file which format cannot be - recognised
-
-
- -
- - - - - - - - - - - - - - - - - - - -
- -
- - - -
- - -