X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=website%2Ffull_javadoc%2Fcompbio%2Fdata%2Fsequence%2FSequenceUtil.html;fp=website%2Ffull_javadoc%2Fcompbio%2Fdata%2Fsequence%2FSequenceUtil.html;h=77c2f5a4c76f6af13a6f3ce3cd919ba81be51668;hb=da8c820a7fb2edecb190589f3dc9c362e57a2f24;hp=0000000000000000000000000000000000000000;hpb=0bbebf27d045b1345bc042bdf24ef2e6808df251;p=jabaws.git diff --git a/website/full_javadoc/compbio/data/sequence/SequenceUtil.html b/website/full_javadoc/compbio/data/sequence/SequenceUtil.html new file mode 100644 index 0000000..77c2f5a --- /dev/null +++ b/website/full_javadoc/compbio/data/sequence/SequenceUtil.html @@ -0,0 +1,946 @@ + + + +
+ +
+
+
|
++ + | +|||||||||
+ PREV CLASS + NEXT CLASS | ++ FRAMES + NO FRAMES + + + + + | +|||||||||
+ SUMMARY: NESTED | FIELD | CONSTR | METHOD | ++DETAIL: FIELD | CONSTR | METHOD | +
+java.lang.Object + compbio.data.sequence.SequenceUtil ++
public final class SequenceUtil
+Utility class for operations on sequences +
+ +
+
+Field Summary | +|
---|---|
+static Pattern |
+AA
+
++ Valid Amino acids |
+
+static Pattern |
+AMBIGUOUS_AA
+
++ Same as AA pattern but with two additional letters - XU |
+
+static Pattern |
+AMBIGUOUS_NUCLEOTIDE
+
++ Ambiguous nucleotide |
+
+static Pattern |
+DIGIT
+
++ A digit |
+
+static Pattern |
+NON_AA
+
++ inversion of AA pattern |
+
+static Pattern |
+NON_NUCLEOTIDE
+
++ Non nucleotide |
+
+static Pattern |
+NONWORD
+
++ Non word |
+
+static Pattern |
+NUCLEOTIDE
+
++ Nucleotides a, t, g, c, u |
+
+static Pattern |
+WHITE_SPACE
+
++ A whitespace character: [\t\n\x0B\f\r] |
+
+Method Summary | +|
---|---|
+static String |
+cleanProteinSequence(String sequence)
+
++ Remove all non AA chars from the sequence |
+
+static String |
+cleanSequence(String sequence)
+
++ Removes all whitespace chars in the sequence string |
+
+static void |
+closeSilently(Logger log,
+ Closeable stream)
+
++ Closes the Closable and logs the exception if any |
+
+static String |
+deepCleanSequence(String sequence)
+
++ Removes all special characters and digits as well as whitespace chars + from the sequence |
+
+static boolean |
+isAmbiguosProtein(String sequence)
+
++ Check whether the sequence confirms to amboguous protein sequence |
+
+static boolean |
+isNonAmbNucleotideSequence(String sequence)
+
++ Ambiguous DNA chars : AGTCRYMKSWHBVDN // differs from protein in only one + (!) - B char |
+
+static boolean |
+isNucleotideSequence(FastaSequence s)
+
++ |
+
+static boolean |
+isProteinSequence(String sequence)
+
++ |
+
+static List<FastaSequence> |
+openInputStream(String inFilePath)
+
++ Reads and parses Fasta or Clustal formatted file into a list of + FastaSequence objects |
+
+static HashSet<Score> |
+readAAConResults(InputStream results)
+
++ Read AACon result with no alignment files. |
+
+static HashMap<String,Set<Score>> |
+readDisembl(InputStream input)
+
++ > Foobar_dundeefriends + + # COILS 34-41, 50-58, 83-91, 118-127, 160-169, 191-220, 243-252, 287-343 + + # REM465 355-368 + + # HOTLOOPS 190-204 + + # RESIDUE COILS REM465 HOTLOOPS + + M 0.86010 0.88512 0.37094 + + T 0.79983 0.85864 0.44331 + + >Next Sequence name |
+
+static List<FastaSequence> |
+readFasta(InputStream inStream)
+
++ Reads fasta sequences from inStream into the list of FastaSequence + objects |
+
+static HashMap<String,Set<Score>> |
+readGlobPlot(InputStream input)
+
++ > Foobar_dundeefriends + + # COILS 34-41, 50-58, 83-91, 118-127, 160-169, 191-220, 243-252, 287-343 + + # REM465 355-368 + + # HOTLOOPS 190-204 + + # RESIDUE COILS REM465 HOTLOOPS + + M 0.86010 0.88512 0.37094 + + T 0.79983 0.85864 0.44331 + + >Next Sequence name |
+
+static Map<String,Score> |
+readIUPred(File result)
+
++ Read IUPred output |
+
+static Map<String,Score> |
+readJRonn(File result)
+
++ |
+
+static Map<String,Score> |
+readJRonn(InputStream inStream)
+
++ Reader for JRonn horizontal file format |
+
+static void |
+writeFasta(OutputStream os,
+ List<FastaSequence> sequences)
+
++ Writes FastaSequence in the file, each sequence will take one line only |
+
+static void |
+writeFasta(OutputStream outstream,
+ List<FastaSequence> sequences,
+ int width)
+
++ Writes list of FastaSequeces into the outstream formatting the sequence + so that it contains width chars on each line |
+
+static void |
+writeFastaKeepTheStream(OutputStream outstream,
+ List<FastaSequence> sequences,
+ int width)
+
++ |
+
Methods inherited from class java.lang.Object | +
---|
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
+
+Field Detail | +
---|
+public static final Pattern WHITE_SPACE+
+
+public static final Pattern DIGIT+
+
+public static final Pattern NONWORD+
+
+public static final Pattern AA+
+
+public static final Pattern NON_AA+
+
+public static final Pattern AMBIGUOUS_AA+
+
+public static final Pattern NUCLEOTIDE+
+
+public static final Pattern AMBIGUOUS_NUCLEOTIDE+
+
+public static final Pattern NON_NUCLEOTIDE+
+
+Method Detail | +
---|
+public static boolean isNucleotideSequence(FastaSequence s)+
+public static boolean isNonAmbNucleotideSequence(String sequence)+
+
+public static String cleanSequence(String sequence)+
+
sequence
-
++public static String deepCleanSequence(String sequence)+
+
sequence
-
++public static String cleanProteinSequence(String sequence)+
+
sequence
- the sequence to clean
++public static boolean isProteinSequence(String sequence)+
sequence
-
++public static boolean isAmbiguosProtein(String sequence)+
+
sequence
-
++public static void writeFasta(OutputStream outstream, + List<FastaSequence> sequences, + int width) + throws IOException+
+
outstream
- sequences
- width
- - the maximum number of characters to write in one line
+IOException
+public static void writeFastaKeepTheStream(OutputStream outstream, + List<FastaSequence> sequences, + int width) + throws IOException+
IOException
+public static List<FastaSequence> readFasta(InputStream inStream) + throws IOException+
+
inStream
- from
+IOException
+public static void writeFasta(OutputStream os, + List<FastaSequence> sequences) + throws IOException+
+
os
- sequences
-
+IOException
+public static Map<String,Score> readIUPred(File result) + throws IOException, + UnknownFileFormatException+
+
result
-
+IOException
+UnknownFileFormatException
+public static Map<String,Score> readJRonn(File result) + throws IOException, + UnknownFileFormatException+
IOException
+UnknownFileFormatException
+public static Map<String,Score> readJRonn(InputStream inStream) + throws IOException, + UnknownFileFormatException+
+ >Foobar M G D T T A G 0.48 0.42 + 0.42 0.48 0.52 0.53 0.54 + ++ Where all values are tab delimited ++
inStream
- the InputStream connected to the JRonn output file
+IOException
- is thrown if the inStream has problems accessing the data
+UnknownFileFormatException
- is thrown if the inStream represents an unknown source of
+ data, i.e. not a JRonn output+public static final void closeSilently(Logger log, + Closeable stream)+
+
log
- stream
- +public static HashMap<String,Set<Score>> readDisembl(InputStream input) + throws IOException, + UnknownFileFormatException+
+
input
- the InputStream
+IOException
+UnknownFileFormatException
+public static HashMap<String,Set<Score>> readGlobPlot(InputStream input) + throws IOException, + UnknownFileFormatException+
+
input
-
+IOException
+UnknownFileFormatException
+public static HashSet<Score> readAAConResults(InputStream results)+
+
results
- output file of AAConservation
+ConservationMethod
-> float[]+public static List<FastaSequence> openInputStream(String inFilePath) + throws IOException, + UnknownFileFormatException+
+
inFilePath
- the path to the input file
+IOException
- if the file denoted by inFilePath cannot be read
+UnknownFileFormatException
- if the inFilePath points to the file which format cannot be
+ recognised
+
+
|
++ + | +|||||||||
+ PREV CLASS + NEXT CLASS | ++ FRAMES + NO FRAMES + + + + + | +|||||||||
+ SUMMARY: NESTED | FIELD | CONSTR | METHOD | ++DETAIL: FIELD | CONSTR | METHOD | +