X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=website%2Ffull_javadoc%2Fcompbio%2Fdata%2Fsequence%2FSequenceUtil.html;fp=website%2Ffull_javadoc%2Fcompbio%2Fdata%2Fsequence%2FSequenceUtil.html;h=77c2f5a4c76f6af13a6f3ce3cd919ba81be51668;hb=da8c820a7fb2edecb190589f3dc9c362e57a2f24;hp=0000000000000000000000000000000000000000;hpb=0bbebf27d045b1345bc042bdf24ef2e6808df251;p=jabaws.git diff --git a/website/full_javadoc/compbio/data/sequence/SequenceUtil.html b/website/full_javadoc/compbio/data/sequence/SequenceUtil.html new file mode 100644 index 0000000..77c2f5a --- /dev/null +++ b/website/full_javadoc/compbio/data/sequence/SequenceUtil.html @@ -0,0 +1,946 @@ + + + + + + +SequenceUtil + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +compbio.data.sequence +
+Class SequenceUtil

+
+java.lang.Object
+  extended by compbio.data.sequence.SequenceUtil
+
+
+
+
public final class SequenceUtil
extends Object
+ + +

+Utility class for operations on sequences +

+ +

+

+
Since:
+
1.0
+
Version:
+
2.0 June 2011
+
Author:
+
Peter Troshin
+
+
+ +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Field Summary
+static PatternAA + +
+          Valid Amino acids
+static PatternAMBIGUOUS_AA + +
+          Same as AA pattern but with two additional letters - XU
+static PatternAMBIGUOUS_NUCLEOTIDE + +
+          Ambiguous nucleotide
+static PatternDIGIT + +
+          A digit
+static PatternNON_AA + +
+          inversion of AA pattern
+static PatternNON_NUCLEOTIDE + +
+          Non nucleotide
+static PatternNONWORD + +
+          Non word
+static PatternNUCLEOTIDE + +
+          Nucleotides a, t, g, c, u
+static PatternWHITE_SPACE + +
+          A whitespace character: [\t\n\x0B\f\r]
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+static StringcleanProteinSequence(String sequence) + +
+          Remove all non AA chars from the sequence
+static StringcleanSequence(String sequence) + +
+          Removes all whitespace chars in the sequence string
+static voidcloseSilently(Logger log, + Closeable stream) + +
+          Closes the Closable and logs the exception if any
+static StringdeepCleanSequence(String sequence) + +
+          Removes all special characters and digits as well as whitespace chars + from the sequence
+static booleanisAmbiguosProtein(String sequence) + +
+          Check whether the sequence confirms to amboguous protein sequence
+static booleanisNonAmbNucleotideSequence(String sequence) + +
+          Ambiguous DNA chars : AGTCRYMKSWHBVDN // differs from protein in only one + (!) - B char
+static booleanisNucleotideSequence(FastaSequence s) + +
+           
+static booleanisProteinSequence(String sequence) + +
+           
+static List<FastaSequence>openInputStream(String inFilePath) + +
+          Reads and parses Fasta or Clustal formatted file into a list of + FastaSequence objects
+static HashSet<Score>readAAConResults(InputStream results) + +
+          Read AACon result with no alignment files.
+static HashMap<String,Set<Score>>readDisembl(InputStream input) + +
+          > Foobar_dundeefriends + + # COILS 34-41, 50-58, 83-91, 118-127, 160-169, 191-220, 243-252, 287-343 + + # REM465 355-368 + + # HOTLOOPS 190-204 + + # RESIDUE COILS REM465 HOTLOOPS + + M 0.86010 0.88512 0.37094 + + T 0.79983 0.85864 0.44331 + + >Next Sequence name
+static List<FastaSequence>readFasta(InputStream inStream) + +
+          Reads fasta sequences from inStream into the list of FastaSequence + objects
+static HashMap<String,Set<Score>>readGlobPlot(InputStream input) + +
+          > Foobar_dundeefriends + + # COILS 34-41, 50-58, 83-91, 118-127, 160-169, 191-220, 243-252, 287-343 + + # REM465 355-368 + + # HOTLOOPS 190-204 + + # RESIDUE COILS REM465 HOTLOOPS + + M 0.86010 0.88512 0.37094 + + T 0.79983 0.85864 0.44331 + + >Next Sequence name
+static Map<String,Score>readIUPred(File result) + +
+          Read IUPred output
+static Map<String,Score>readJRonn(File result) + +
+           
+static Map<String,Score>readJRonn(InputStream inStream) + +
+          Reader for JRonn horizontal file format
+static voidwriteFasta(OutputStream os, + List<FastaSequence> sequences) + +
+          Writes FastaSequence in the file, each sequence will take one line only
+static voidwriteFasta(OutputStream outstream, + List<FastaSequence> sequences, + int width) + +
+          Writes list of FastaSequeces into the outstream formatting the sequence + so that it contains width chars on each line
+static voidwriteFastaKeepTheStream(OutputStream outstream, + List<FastaSequence> sequences, + int width) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+WHITE_SPACE

+
+public static final Pattern WHITE_SPACE
+
+
A whitespace character: [\t\n\x0B\f\r] +

+

+
+
+
+ +

+DIGIT

+
+public static final Pattern DIGIT
+
+
A digit +

+

+
+
+
+ +

+NONWORD

+
+public static final Pattern NONWORD
+
+
Non word +

+

+
+
+
+ +

+AA

+
+public static final Pattern AA
+
+
Valid Amino acids +

+

+
+
+
+ +

+NON_AA

+
+public static final Pattern NON_AA
+
+
inversion of AA pattern +

+

+
+
+
+ +

+AMBIGUOUS_AA

+
+public static final Pattern AMBIGUOUS_AA
+
+
Same as AA pattern but with two additional letters - XU +

+

+
+
+
+ +

+NUCLEOTIDE

+
+public static final Pattern NUCLEOTIDE
+
+
Nucleotides a, t, g, c, u +

+

+
+
+
+ +

+AMBIGUOUS_NUCLEOTIDE

+
+public static final Pattern AMBIGUOUS_NUCLEOTIDE
+
+
Ambiguous nucleotide +

+

+
+
+
+ +

+NON_NUCLEOTIDE

+
+public static final Pattern NON_NUCLEOTIDE
+
+
Non nucleotide +

+

+
+
+ + + + + + + + +
+Method Detail
+ +

+isNucleotideSequence

+
+public static boolean isNucleotideSequence(FastaSequence s)
+
+
+ +
Returns:
true is the sequence contains only letters a,c, t, g, u
+
+
+
+ +

+isNonAmbNucleotideSequence

+
+public static boolean isNonAmbNucleotideSequence(String sequence)
+
+
Ambiguous DNA chars : AGTCRYMKSWHBVDN // differs from protein in only one + (!) - B char +

+

+
+
+
+
+ +

+cleanSequence

+
+public static String cleanSequence(String sequence)
+
+
Removes all whitespace chars in the sequence string +

+

+
Parameters:
sequence - +
Returns:
cleaned up sequence
+
+
+
+ +

+deepCleanSequence

+
+public static String deepCleanSequence(String sequence)
+
+
Removes all special characters and digits as well as whitespace chars + from the sequence +

+

+
Parameters:
sequence - +
Returns:
cleaned up sequence
+
+
+
+ +

+cleanProteinSequence

+
+public static String cleanProteinSequence(String sequence)
+
+
Remove all non AA chars from the sequence +

+

+
Parameters:
sequence - the sequence to clean +
Returns:
cleaned sequence
+
+
+
+ +

+isProteinSequence

+
+public static boolean isProteinSequence(String sequence)
+
+
+
Parameters:
sequence - +
Returns:
true is the sequence is a protein sequence, false overwise
+
+
+
+ +

+isAmbiguosProtein

+
+public static boolean isAmbiguosProtein(String sequence)
+
+
Check whether the sequence confirms to amboguous protein sequence +

+

+
Parameters:
sequence - +
Returns:
return true only if the sequence if ambiguous protein sequence + Return false otherwise. e.g. if the sequence is non-ambiguous + protein or DNA
+
+
+
+ +

+writeFasta

+
+public static void writeFasta(OutputStream outstream,
+                              List<FastaSequence> sequences,
+                              int width)
+                       throws IOException
+
+
Writes list of FastaSequeces into the outstream formatting the sequence + so that it contains width chars on each line +

+

+
Parameters:
outstream -
sequences -
width - - the maximum number of characters to write in one line +
Throws: +
IOException
+
+
+
+ +

+writeFastaKeepTheStream

+
+public static void writeFastaKeepTheStream(OutputStream outstream,
+                                           List<FastaSequence> sequences,
+                                           int width)
+                                    throws IOException
+
+
+ +
Throws: +
IOException
+
+
+
+ +

+readFasta

+
+public static List<FastaSequence> readFasta(InputStream inStream)
+                                     throws IOException
+
+
Reads fasta sequences from inStream into the list of FastaSequence + objects +

+

+
Parameters:
inStream - from +
Returns:
list of FastaSequence objects +
Throws: +
IOException
+
+
+
+ +

+writeFasta

+
+public static void writeFasta(OutputStream os,
+                              List<FastaSequence> sequences)
+                       throws IOException
+
+
Writes FastaSequence in the file, each sequence will take one line only +

+

+
Parameters:
os -
sequences - +
Throws: +
IOException
+
+
+
+ +

+readIUPred

+
+public static Map<String,Score> readIUPred(File result)
+                                    throws IOException,
+                                           UnknownFileFormatException
+
+
Read IUPred output +

+

+
Parameters:
result - +
Returns:
Map key->sequence name, value->Score +
Throws: +
IOException +
UnknownFileFormatException
+
+
+
+ +

+readJRonn

+
+public static Map<String,Score> readJRonn(File result)
+                                   throws IOException,
+                                          UnknownFileFormatException
+
+
+ +
Throws: +
IOException +
UnknownFileFormatException
+
+
+
+ +

+readJRonn

+
+public static Map<String,Score> readJRonn(InputStream inStream)
+                                   throws IOException,
+                                          UnknownFileFormatException
+
+
Reader for JRonn horizontal file format + +
+ >Foobar M G D T T A G 0.48 0.42
+ 0.42 0.48 0.52 0.53 0.54
+ 
+ 
+ Where all values are tab delimited
+

+

+
Parameters:
inStream - the InputStream connected to the JRonn output file +
Returns:
Map key=sequence name value=Score +
Throws: +
IOException - is thrown if the inStream has problems accessing the data +
UnknownFileFormatException - is thrown if the inStream represents an unknown source of + data, i.e. not a JRonn output
+
+
+
+ +

+closeSilently

+
+public static final void closeSilently(Logger log,
+                                       Closeable stream)
+
+
Closes the Closable and logs the exception if any +

+

+
Parameters:
log -
stream -
+
+
+
+ +

+readDisembl

+
+public static HashMap<String,Set<Score>> readDisembl(InputStream input)
+                                              throws IOException,
+                                                     UnknownFileFormatException
+
+
> Foobar_dundeefriends + + # COILS 34-41, 50-58, 83-91, 118-127, 160-169, 191-220, 243-252, 287-343 + + # REM465 355-368 + + # HOTLOOPS 190-204 + + # RESIDUE COILS REM465 HOTLOOPS + + M 0.86010 0.88512 0.37094 + + T 0.79983 0.85864 0.44331 + + >Next Sequence name +

+

+
Parameters:
input - the InputStream +
Returns:
Map key=sequence name, value=set of score +
Throws: +
IOException +
UnknownFileFormatException
+
+
+
+ +

+readGlobPlot

+
+public static HashMap<String,Set<Score>> readGlobPlot(InputStream input)
+                                               throws IOException,
+                                                      UnknownFileFormatException
+
+
> Foobar_dundeefriends + + # COILS 34-41, 50-58, 83-91, 118-127, 160-169, 191-220, 243-252, 287-343 + + # REM465 355-368 + + # HOTLOOPS 190-204 + + # RESIDUE COILS REM465 HOTLOOPS + + M 0.86010 0.88512 0.37094 + + T 0.79983 0.85864 0.44331 + + >Next Sequence name +

+

+
Parameters:
input - +
Returns:
Map key=sequence name, value=set of score +
Throws: +
IOException +
UnknownFileFormatException
+
+
+
+ +

+readAAConResults

+
+public static HashSet<Score> readAAConResults(InputStream results)
+
+
Read AACon result with no alignment files. This method leaves incoming + InputStream open! +

+

+
Parameters:
results - output file of AAConservation +
Returns:
Map with keys ConservationMethod -> float[]
+
+
+
+ +

+openInputStream

+
+public static List<FastaSequence> openInputStream(String inFilePath)
+                                           throws IOException,
+                                                  UnknownFileFormatException
+
+
Reads and parses Fasta or Clustal formatted file into a list of + FastaSequence objects +

+

+
Parameters:
inFilePath - the path to the input file +
Returns:
the List of FastaSequence objects +
Throws: +
IOException - if the file denoted by inFilePath cannot be read +
UnknownFileFormatException - if the inFilePath points to the file which format cannot be + recognised
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + +