X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=binaries%2Fsrc%2Fdisembl%2Fbiopython-1.50%2FBio%2FSeqUtils%2FCheckSum.py;fp=binaries%2Fsrc%2Fdisembl%2Fbiopython-1.50%2FBio%2FSeqUtils%2FCheckSum.py;h=ae1a9d6e1c7912d6d14c3b05d94f5ead0153e978;hb=f47da0247a9f9a8ac55571234064a0d3ded06b6c;hp=0000000000000000000000000000000000000000;hpb=6ff5fdd71d597d015fdc1df7eb0a352f8086eaa9;p=jabaws.git diff --git a/binaries/src/disembl/biopython-1.50/Bio/SeqUtils/CheckSum.py b/binaries/src/disembl/biopython-1.50/Bio/SeqUtils/CheckSum.py new file mode 100644 index 0000000..ae1a9d6 --- /dev/null +++ b/binaries/src/disembl/biopython-1.50/Bio/SeqUtils/CheckSum.py @@ -0,0 +1,124 @@ +# Copyright 2002 by Yves Bastide and Brad Chapman. +# All rights reserved. +# This code is part of the Biopython distribution and governed by its +# license. Please see the LICENSE file that should have been included +# as part of this package. + +"""Functions to calculate assorted sequence checksums.""" + +# crc32, crc64, gcg, and seguid +# crc64 is adapted from BioPerl + +from binascii import crc32 as _crc32 + +def crc32(seq) : + """Returns the crc32 checksum for a sequence (string or Seq object)""" + try : + #Assume its a Seq object + return _crc32(seq.tostring()) + except AttributeError : + #Assume its a string + return _crc32(seq) + +def _init_table_h(): + _table_h = [] + for i in range(256): + l = i + part_h = 0 + for j in range(8): + rflag = l & 1 + l >>= 1 + if part_h & 1: l |= (1L << 31) + part_h >>= 1L + if rflag: part_h ^= 0xd8000000L + _table_h.append(part_h) + return _table_h + +# Initialisation +_table_h = _init_table_h() + +def crc64(s): + """Returns the crc64 checksum for a sequence (string or Seq object)""" + crcl = 0 + crch = 0 + for c in s: + shr = (crch & 0xFF) << 24 + temp1h = crch >> 8 + temp1l = (crcl >> 8) | shr + idx = (crcl ^ ord(c)) & 0xFF + crch = temp1h ^ _table_h[idx] + crcl = temp1l + + return "CRC-%08X%08X" % (crch, crcl) + + +def gcg(seq): + """Returns the GCG checksum (int) for a sequence (string or Seq object) + + Given a nucleotide or amino-acid secuence (or any string), + returns the GCG checksum (int). Checksum used by GCG program. + seq type = str. + Based on BioPerl GCG_checksum. Adapted by Sebastian Bassi + with the help of John Lenton, Pablo Ziliani, and Gabriel Genellina. + All sequences are converted to uppercase """ + index = checksum = 0 + if type(seq)!=type("aa"): + seq=seq.tostring() + for char in seq: + index += 1 + checksum += index * ord(char.upper()) + if index == 57: index = 0 + return checksum % 10000 + +def seguid(seq): + """Returns the SEGUID (string) for a sequence (string or Seq object) + + Given a nucleotide or amino-acid secuence (or any string), + returns the SEGUID string (A SEquence Globally Unique IDentifier). + seq type = str. + For more information about SEGUID, see: + http://bioinformatics.anl.gov/seguid/ + DOI: 10.1002/pmic.200600032 """ + try: + #Python 2.5 sha1 is in hashlib + import hashlib + m = hashlib.sha1() + except: + #For older versions + import sha + m = sha.new() + import base64 + if type(seq)!=type("aa"): + seq=seq.tostring().upper() + else: + seq=seq.upper() + m.update(seq) + try: + #For Python 2.5 + return base64.b64encode(m.digest()).rstrip("=") + except: + #For older versions + import os + #Note: Using os.linesep doesn't work on Windows, + #where os.linesep= "\r\n" but the encoded string + #contains "\n" but not "\r\n" + return base64.encodestring(m.digest()).replace("\n","").rstrip("=") + +if __name__ == "__main__" : + print "Quick self test" + + str_light_chain_one = "QSALTQPASVSGSPGQSITISCTGTSSDVGSYNLVSWYQQHPGK" \ + + "APKLMIYEGSKRPSGVSNRFSGSKSGNTASLTISGLQAEDEADY" \ + + "YCSSYAGSSTLVFGGGTKLTVL" + + str_light_chain_two = "QSALTQPASVSGSPGQSITISCTGTSSDVGSYNLVSWYQQHPGK" \ + + "APKLMIYEGSKRPSGVSNRFSGSKSGNTASLTISGLQAEDEADY" \ + + "YCCSYAGSSTWVFGGGTKLTVL" + + assert crc64(str_light_chain_one) == crc64(str_light_chain_two) + assert 'CRC-44CAAD88706CC153' == crc64(str_light_chain_one) + + assert 'BpBeDdcNUYNsdk46JoJdw7Pd3BI' == seguid(str_light_chain_one) + assert 'X5XEaayob1nZLOc7eVT9qyczarY' == seguid(str_light_chain_two) + + print "Done"