1 # Copyright 2002 by Yves Bastide and Brad Chapman.
3 # This code is part of the Biopython distribution and governed by its
4 # license. Please see the LICENSE file that should have been included
5 # as part of this package.
7 """Functions to calculate assorted sequence checksums."""
9 # crc32, crc64, gcg, and seguid
10 # crc64 is adapted from BioPerl
12 from binascii import crc32 as _crc32
15 """Returns the crc32 checksum for a sequence (string or Seq object)"""
17 #Assume its a Seq object
18 return _crc32(seq.tostring())
19 except AttributeError :
31 if part_h & 1: l |= (1L << 31)
33 if rflag: part_h ^= 0xd8000000L
34 _table_h.append(part_h)
38 _table_h = _init_table_h()
41 """Returns the crc64 checksum for a sequence (string or Seq object)"""
45 shr = (crch & 0xFF) << 24
47 temp1l = (crcl >> 8) | shr
48 idx = (crcl ^ ord(c)) & 0xFF
49 crch = temp1h ^ _table_h[idx]
52 return "CRC-%08X%08X" % (crch, crcl)
56 """Returns the GCG checksum (int) for a sequence (string or Seq object)
58 Given a nucleotide or amino-acid secuence (or any string),
59 returns the GCG checksum (int). Checksum used by GCG program.
61 Based on BioPerl GCG_checksum. Adapted by Sebastian Bassi
62 with the help of John Lenton, Pablo Ziliani, and Gabriel Genellina.
63 All sequences are converted to uppercase """
65 if type(seq)!=type("aa"):
69 checksum += index * ord(char.upper())
70 if index == 57: index = 0
71 return checksum % 10000
74 """Returns the SEGUID (string) for a sequence (string or Seq object)
76 Given a nucleotide or amino-acid secuence (or any string),
77 returns the SEGUID string (A SEquence Globally Unique IDentifier).
79 For more information about SEGUID, see:
80 http://bioinformatics.anl.gov/seguid/
81 DOI: 10.1002/pmic.200600032 """
83 #Python 2.5 sha1 is in hashlib
91 if type(seq)!=type("aa"):
92 seq=seq.tostring().upper()
98 return base64.b64encode(m.digest()).rstrip("=")
102 #Note: Using os.linesep doesn't work on Windows,
103 #where os.linesep= "\r\n" but the encoded string
104 #contains "\n" but not "\r\n"
105 return base64.encodestring(m.digest()).replace("\n","").rstrip("=")
107 if __name__ == "__main__" :
108 print "Quick self test"
110 str_light_chain_one = "QSALTQPASVSGSPGQSITISCTGTSSDVGSYNLVSWYQQHPGK" \
111 + "APKLMIYEGSKRPSGVSNRFSGSKSGNTASLTISGLQAEDEADY" \
112 + "YCSSYAGSSTLVFGGGTKLTVL"
114 str_light_chain_two = "QSALTQPASVSGSPGQSITISCTGTSSDVGSYNLVSWYQQHPGK" \
115 + "APKLMIYEGSKRPSGVSNRFSGSKSGNTASLTISGLQAEDEADY" \
116 + "YCCSYAGSSTWVFGGGTKLTVL"
118 assert crc64(str_light_chain_one) == crc64(str_light_chain_two)
119 assert 'CRC-44CAAD88706CC153' == crc64(str_light_chain_one)
121 assert 'BpBeDdcNUYNsdk46JoJdw7Pd3BI' == seguid(str_light_chain_one)
122 assert 'X5XEaayob1nZLOc7eVT9qyczarY' == seguid(str_light_chain_two)