1 /* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
3 /*********************************************************************
4 * Clustal Omega - Multiple sequence alignment
6 * Copyright (C) 2010 University College Dublin
8 * Clustal-Omega is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This file is part of Clustal-Omega.
15 ********************************************************************/
18 * RCS $Id: hash-C.h 143 2010-10-14 13:11:14Z andreas $
22 // Class for Hash data structure
23 // * works in the same way as a hash in Perl
24 // * keys are strings of type char*
25 // * data elements are of type Typ
26 // * objects have to be declared with maximal size, e.g. Hash hash1(10000) (num_slots should not be a power of 2)
27 // * works also if maximal size is exceeded, but gets slower by a factor ~num_keys/num_slots
30 * RCS $Id: hash-C.h 143 2010-10-14 13:11:14Z andreas $
37 #include <iostream> // cin, cout, cerr
38 #include <cstdio> // printf
39 #include <stdlib.h> // exit
40 #include <string> // strcmp, strstr
41 #include <math.h> // sqrt, pow
42 #include <limits.h> // INT_MIN
43 #include <float.h> // FLT_MIN
44 #include <ctype.h> // islower, isdigit etc
45 #include <time.h> // clock
46 #include <errno.h> // perror()
57 #include "list.h" // List<Typ>
58 #include "list-C.h" ////////////////////////////////// DEBUG
65 //////////////////////////////////////////////////////////////////////////////
66 ////////////////////// Methods of class Hash /////////////////////////////////
67 //////////////////////////////////////////////////////////////////////////////
69 //////////////////////////////////////////////////////////////////////////////
73 //////////////////////////////////////////////////////////////////////////////
74 // Constructor and Destructor of Hash
75 //////////////////////////////////////////////////////////////////////////////
76 // Constructor of class Hash
77 //////////////////////////////////////////////////////////////////////////////
81 num_keys=0; max_len=0; prev=curr=num_slots = 0; slot=NULL;
85 Hash<Typ>::Hash(int nslots)
87 num_keys=0; max_len=0; prev=curr=num_slots = nslots;
88 slot = new Slot<Typ>*[num_slots]; //Create array of num_slots slots
89 for (int i=0; i<num_slots; i++) slot[i]=NULL; //set pointers to NULL
90 fail = static_cast<Typ>(0);
94 Hash<Typ>::Hash(int nslots, Typ f)
96 num_keys=0; max_len=0; prev=curr=num_slots = nslots;
97 slot = new Slot<Typ>*[num_slots]; //Create array of num_slots slots
98 for (int i=0; i<num_slots; i++) slot[i]=NULL; //set pointers to NULL
103 ////////////////////////////////////////////////////////////////////////////////////////////
104 // Destructor of class Hash
105 ////////////////////////////////////////////////////////////////////////////////////////////
110 delete[] slot; slot = NULL;
114 ////////////////////////////////////////////////////////////////////////////////////////////
116 ////////////////////////////////////////////////////////////////////////////////////////////
118 inline unsigned int Hash<Typ>::HashValue(char* key) //returns the hash value for key
120 // Calculate a hash value by the division method:
121 // Transform key into a natural number k = sum ( key[i]*128^(L-i) ) and calculate i= k % num_slots.
122 // Since calculating k would lead to an overflow, i is calculated iteratively
123 // and at each iteration the part divisible by num_slots is subtracted, i.e. (% num_slots is taken).
124 if (key==NULL) {printf("Warning from hash.C: key=NULL\n"); return 0;}
125 unsigned int i=0; // Start of iteration: k is zero
127 while(*c) i = ((i<<7) + *(c++)) % num_slots;
129 // cerr<<" Hash value for \'"<<key<<"\' is "<<i<<"\n";
133 ////////////////////////////////////////////////////////////////////////////////////////////
134 // Create new hash (and delete any data present)
135 ////////////////////////////////////////////////////////////////////////////////////////////
137 void Hash<Typ>::New(int nslots, Typ f)
141 delete[] slot; slot = NULL;
142 num_keys=0; max_len=0; prev=curr=num_slots = nslots;
143 slot = new Slot<Typ>*[num_slots]; //Create array of num_slots slots
144 for (int i=0; i<num_slots; i++) slot[i]=NULL; //set pointers to NULL
149 ////////////////////////////////////////////////////////////////////////////////////////////
150 // Methods that work with a key supplied as an argument
152 ////////////////////////////////////////////////////////////////////////////////////////////
153 // Return data element for key. Returns 'fail' if key does not exist
154 ////////////////////////////////////////////////////////////////////////////////////////////
156 Typ Hash<Typ>::Show(char* key)
159 int i = HashValue(key);
162 if (!pslot) return fail;
165 if(!strcmp(pslot->ReadNext().key,key)) return pslot->ReadCurrent().data;
166 } while(!pslot->End());
170 ////////////////////////////////////////////////////////////////////////////////////////////
171 // Add/replace key/data pair to hash and return address of data element
172 ////////////////////////////////////////////////////////////////////////////////////////////
174 Typ* Hash<Typ>::Add(char* key, Typ data)
178 int i = HashValue(key);
181 if (!pslot) { num_keys++; KeyLen(); slot[i]=new(Slot<Typ>); return slot[i]->Push(key_len,key,data);}
185 pairp = pslot->ReadNextAddress();
186 if(!strcmp(pairp->key,key))
189 pslot->Overwrite(*pairp);
190 return &(pairp->data);
192 } while(!pslot->End());
195 return pslot->Push(key_len,key,data);
200 ////////////////////////////////////////////////////////////////////////////////////////////
201 // Add key to hash and return address of data element.
202 // If key exists leave data element unchanged, else set it to 'fail'.
203 ////////////////////////////////////////////////////////////////////////////////////////////
205 Typ* Hash<Typ>::Add(char* key)
208 int i = HashValue(key);
211 if (!pslot) { num_keys++; KeyLen(); slot[i]=new(Slot<Typ>); return slot[i]->Push(key_len,key,fail);}
215 if(!strcmp(pslot->ReadNext().key,key))
217 return &((pslot->ReadCurrentAddress())->data);
219 } while(!pslot->End());
222 return pslot->Push(key_len,key,fail);
226 /////////////////////////////////////////////////////////////////////////////////////////////
227 // Remove key from hash and return data element for key ('fail' if key does not exist)
228 /////////////////////////////////////////////////////////////////////////////////////////////
230 Typ Hash<Typ>::Remove(char* key)
233 int i = HashValue(key);
236 if (!pslot) return fail;
240 if(!strcmp(pslot->ReadNext().key,key))
244 // if key was the only element in pslot then delete whole list
245 if (pslot->Size()==0) {delete pslot; pslot = NULL;slot[i]=0;}
246 return pslot->ReadCurrent().data;
248 } while(!pslot->End());
253 //////////////////////////////////////////////////////////////////////////////
254 // Remove all keys from hash;
255 //////////////////////////////////////////////////////////////////////////////
257 void Hash<Typ>::RemoveAll()
259 for(int i=0; i<num_slots; i++)
260 if(slot[i]) {delete slot[i]; slot[i]=NULL;}
270 //////////////////////////////////////////////////////////////////////////////
271 // Methods that work with an internal "current key":
274 //////////////////////////////////////////////////////////////////////////////
275 // Return data of next key. Return 'fail' data and empty key if at end
276 //////////////////////////////////////////////////////////////////////////////
278 Typ Hash<Typ>::ReadNext()
283 if (curr>=num_slots) {return fail;}
284 pslot = slot[curr]; // current list is never empty, except when current=num_slots
285 pairp = pslot->ReadNextAddress();
288 do // move on to next non-empty list
290 if (++curr>=num_slots) return pairp->data;
298 //////////////////////////////////////////////////////////////////////////////
299 // Write next key into variable key and return data. Return 'fail' data and empty key if at end
300 // Attention: 'key' must have memory of at least char[MaxLen()+1] allocated!
301 //////////////////////////////////////////////////////////////////////////////
303 Typ Hash<Typ>::ReadNext(char* key)
308 if (curr>=num_slots) {*key='\0'; return fail;}
309 pslot = slot[curr]; // current list is never empty, except when current=num_slots
310 pairp = pslot->ReadNextAddress();
311 strcpy(key,pairp->key);
314 do // move on to next non-empty list
316 if (++curr>=num_slots) return pairp->data;
326 //////////////////////////////////////////////////////////////////////////////
327 // Return data of current key
328 //////////////////////////////////////////////////////////////////////////////
330 Typ Hash<Typ>::ReadCurrent()
336 if (curr>=num_slots) {return fail;}
337 pslot = slot[curr]; // current list is never empty, except when current=num_slots
338 pairp = &(pslot->ReadCurrent());
340 do // move on to next non-empty list
342 if (++curr>=num_slots) return pairp->data;
350 //////////////////////////////////////////////////////////////////////////////
351 // Write key last read into variable key and return data
352 // Attention: 'key' must have memory of at least char[MaxLen()+1] allocated!
353 //////////////////////////////////////////////////////////////////////////////
355 Typ Hash<Typ>::ReadCurrent(char* key)
361 if (curr>=num_slots) {*key='\0'; return fail;}
362 pslot = slot[curr]; // current list is never empty, except when current=num_slots
363 pairp = &(pslot->ReadCurrent());
364 strcpy(key,pairp->key);
366 do // move on to next non-empty list
368 if (++curr>=num_slots) return pairp->data;
376 //////////////////////////////////////////////////////////////////////////////
377 // Remove current key, return data, and advance to next key (after Reset() remove first element)
378 //////////////////////////////////////////////////////////////////////////////
380 Typ Hash<Typ>::RemoveCurrent()
386 if (curr>=num_slots) {return fail;}
387 pslot = slot[curr]; // current list is never empty, except when current=num_slots
388 pairp = &(pslot->Delete());
390 // if key was the only element in pslot then delete whole list
391 if (pslot->Size()==0) {delete pslot; pslot = NULL; slot[curr]=0;}
392 if (!pslot || pslot->End()) {
393 do // move on to next non-empty list
395 if (++curr>=num_slots) {prev=curr; return pairp->data;}
404 //////////////////////////////////////////////////////////////////////////////
405 // Remove current key, return data, copy current key into key, and advance to next key
406 // (After Reset() remove first element)
407 // Attention: 'key' must have memory of at least char[MaxLen()+1] allocated!
408 //////////////////////////////////////////////////////////////////////////////
410 Typ Hash<Typ>::RemoveCurrent(char* key)
416 if (curr>=num_slots) {*key='\0'; return fail;}
417 pslot = slot[curr]; // current list is never empty, except when current=num_slots
418 pairp = &(pslot->Delete());
419 strcpy(key,pairp->key);
421 // if key was the only element in pslot then delete whole list
422 if (pslot->Size()==0) {delete pslot; pslot = NULL; slot[curr]=0;}
423 if (!pslot || pslot->End()) {
424 do // move on to next non-empty list
426 if (++curr>=num_slots) {prev=curr; return pairp->data;}
437 //////////////////////////////////////////////////////////////////////////////
438 // Reset current position to beginning of hash
439 //////////////////////////////////////////////////////////////////////////////
441 void Hash<Typ>::Reset()
448 if (curr>=num_slots) {prev=curr; return;}
457 /////////////////////////////////////////////////////////////////////////////////////////////
458 // Methods that return usefull information about the data stored in Hash:
461 ////////////////////////////////////////////////////////////////////////////////////////////
462 // Returns 1 if the hash contains key, 0 otherwise
463 ////////////////////////////////////////////////////////////////////////////////////////////
465 int Hash<Typ>::Contains(char* key)
468 int i = HashValue(key);
471 if (!pslot) return 0;
474 if(!strcmp(pslot->ReadNext().key,key)) return 1;
475 } while(!pslot->End());
479 /////////////////////////////////////////////////////////////////////////////////////////////
480 //print out list of keys and data
481 /////////////////////////////////////////////////////////////////////////////////////////////
483 void Hash<Typ>::Print()
485 char key[MaxLen()+1];
487 cout<<"\nPrint hash:\n";
490 cout<<key<<"->"<<ReadNext(key)<<"\n";
493 /////////////////////////////////////////////////////////////////////////////////////////////
494 //Print out hash with internal representation as array
495 /////////////////////////////////////////////////////////////////////////////////////////////
497 void Hash<Typ>::DebugPrint()
503 cout<<"Debug-print hash:";
504 for(int i=0; i<num_slots; i++)
509 cout<<"\nhash value "<<i;
513 pairp = pslot->ReadNextAddress();
514 cout<<" "<<pairp->key<<"->"<<pairp->data;
528 ////////////////////////////////////////////////////////////////////////////////
529 // Main program: test class Hash
530 ////////////////////////////////////////////////////////////////////////////////
533 // Hash<int> ihash(5);
534 // char* key=new char[128];
538 // ihash.Add("So many monsters",36);
539 // ihash.Add("So many chickens",25);
541 // // cerr<<"Address of ihash(\"So many monsters\") is "<<ihash("So many monsters")<<"\n";
542 // // cerr<<"Address of ihash(\"So many monsters\") is "<<ihash("So many monsters")<<"\n";
543 // // *ihash("So many monsters")=2;
544 // // (*ihash("So many monsters"))++;
545 // // (*ihash("So many monsters"))++;
546 // cout<<"Size of hash="<<ihash.Size()<<"\n";
547 // ihash.DebugPrint();
550 // strcpy(key,"Some more monsters");
552 // strcpy(key,"Even more monsters");
554 // cout<<"Size of hash="<<ihash.Size()<<"\n";
555 // cout<<"Maximum key length = "<<ihash.MaxLen()<<"\n";
558 // cout<<"ihash.Remove(\"Even more monsters\") returns "<<ihash.Remove("Even more monsters")<<"\n";
562 // cout<<"ihash.Remove(\"More monsters\") returns "<<ihash.Remove("More monsters")<<"\n";
563 // ihash.Add("So many chickens",999);
564 // ihash.Add("So many monster",1);
567 // cout<<"So many chickens:"<<ihash.Show("So many chickens")<<"\n";
568 // cout<<"Size of hash="<<ihash.Size()<<"\n";
573 // while (!ihash.End())
575 // data = ihash.ReadNext(key);
576 // cout<<" "<<key<<"->"<<data<<endl;
577 // data = ihash.ReadCurrent(key);
578 // cout<<" "<<key<<"->"<<data<<endl;
580 // cout<<"Removing all keys..."<<endl;
581 // cout<<"Size of hash="<<ihash.Size()<<"\n";
583 // while (ihash.Size())
585 // data = ihash.RemoveCurrent(key);
586 // cout<<" "<<key<<"->"<<data<<"\n";
587 // cout<<"Size of hash="<<ihash.Size()<<"\n";
590 // ihash.ReadCurrent(key);