diff options
author | Arthur de Jong <arthur@arthurdejong.org> | 2008-04-19 12:22:22 +0200 |
---|---|---|
committer | Arthur de Jong <arthur@arthurdejong.org> | 2008-04-19 12:22:22 +0200 |
commit | 953a00fd4b8463f9b6ba8616a507746fe8bc56af (patch) | |
tree | 74ddeeafa8103cde238ef053fd89132f9b414a5a /common | |
parent | 67e674892d981effd8b55e098c84ad49bf3916c7 (diff) |
implement new dict module that uses a hashtable which is around 40 times faster for large (around 2000) entries but with around 40% more memory used
git-svn-id: http://arthurdejong.org/svn/nss-pam-ldapd/nss-ldapd@680 ef36b2f9-881f-0410-afb5-c4e39611909c
Diffstat (limited to 'common')
-rw-r--r-- | common/dict.c | 344 |
1 files changed, 252 insertions, 92 deletions
diff --git a/common/dict.c b/common/dict.c index 70c746a..92e5360 100644 --- a/common/dict.c +++ b/common/dict.c @@ -22,147 +22,307 @@ #include "config.h" -#include <stdio.h> #include <stdlib.h> #include <string.h> #include <strings.h> +#include <ctype.h> +#include <stdint.h> #include "dict.h" +/* + This module uses a hashtable to store it's key to value mappings. + The structure is basically as follows: + + [struct dictionary] + \- holds an array of pointers to a linked list of [struct dict_entry] + \- each entry has a key/value mapping + + The hashmap can be resized when the total number of elements + in the hashmap exceeds a certain load factor. + + All the keys are copied in a separate linked list of buffers + where each new buffer that is allocated is larger than the previous one. + The first buffer in the linked list is always the current one. + + Note that the initial sizes of hashtable and the loadfactor + still need to be tuned to the use in this application. +*/ + +/* an entry stores one key/value pair */ struct dict_entry { - const char *key; - void *value; + uint32_t hash; /* used for quick matching and rehashing */ + const char *key; /* a reference to a copy of the key */ + void *value; /* the stored value */ struct dict_entry *next; }; -struct dictionary { - struct dict_entry *head; - struct dict_entry *ptr; /* for searching */ +/* initial size allocated for the key strings */ +#define KEYSTORAGE_INITSIZE 100 + +/* the initial size of the hashtable */ +#define DICT_INITSIZE 7 + +/* load factor at which point to grow hashtable */ +#define DICT_LOADPERCENTAGE 400 + +/* storage for key strings */ +struct dict_keystorage { + size_t size; /* the number of bytes allocated */ + size_t off; /* where in the buffer we can begin storing */ + char *buf; /* storage for strings */ + /* newly allocated keystorages should be put in front of the list */ + struct dict_keystorage *next; }; -static struct dict_entry *dict_entry_new(const char *key) -{ - struct dict_entry *entry; - entry=(struct dict_entry *)malloc(sizeof(struct dict_entry)); - if (entry==NULL) - return NULL; - entry->key=strdup(key); - if (entry->key==NULL) - { - free(entry); - return NULL; - } - entry->value=NULL; - entry->next=NULL; - return entry; -} +/* the dictionary is a hashtable */ +struct dictionary { + int size; /* size of the hashtable */ + int num; /* total number of keys stored */ + struct dict_entry **table; /* the hashtable */ + struct dict_keystorage *keys; /* for storing key strings */ + int loop_idx; /* for looping */ + struct dict_entry *loop_entry; /* for looping */ +}; -static void dict_entry_free(struct dict_entry *entry) +/* Simple hash function that computes the hash value of a lower-cased + string. */ +static uint32_t stringhash(const char *str) { - /* free key */ - free((void *)entry->key); - /* free entry */ - free(entry); + uint32_t hash=0; + while (*str!='\0') + hash=3*hash+tolower(*str++); + return hash; } -static struct dict_entry *dict_entry_find( - DICT *dict,const char *key) +/* Grow the hashtable. */ +static void growhashtable(DICT *dict) { - struct dict_entry *ptr; - for (ptr=dict->head;ptr!=NULL;ptr=ptr->next) + int i; + int newsize; + struct dict_entry **newtable; + struct dict_entry *entry,*tmp; + newsize=dict->size*3+1; + /* allocate room for new hashtable */ + newtable=(struct dict_entry **)malloc(newsize*sizeof(struct dict_entry *)); + if (newtable==NULL) + return; /* allocating memory failed continue to fill the existing table */ + /* clear new table */ + for (i=0;i<newsize;i++) + newtable[i]=NULL; + /* copy old hashtable into new table */ + for (i=0;i<dict->size;i++) { - if (strcasecmp(ptr->key,key)==0) - return ptr; + /* go over elements in linked list */ + entry=dict->table[i]; + while (entry!=NULL) + { + tmp=entry; + entry=entry->next; + /* put in new position */ + tmp->next=newtable[tmp->hash%newsize]; + newtable[tmp->hash%newsize]=tmp; + } } - return NULL; + /* free the old hashtable */ + free(dict->table); + /* put new hashtable in place */ + dict->size=newsize; + dict->table=newtable; } DICT *dict_new(void) { + char *buf; struct dictionary *dict; - dict=(struct dictionary *)malloc(sizeof(struct dictionary)); - if (dict==NULL) + int i; + /* allocate room for dictionary information */ + buf=(char *)malloc(sizeof(struct dictionary)+sizeof(struct dict_keystorage)+KEYSTORAGE_INITSIZE); + if (buf==NULL) + return NULL; + /* set up toplevel structs and buffers */ + dict=(struct dictionary *)buf; + dict->size=DICT_INITSIZE; + dict->num=0; + dict->keys=(struct dict_keystorage *)(buf+sizeof(struct dictionary)); + dict->keys->size=KEYSTORAGE_INITSIZE; + dict->keys->off=0; + dict->keys->buf=buf+sizeof(struct dictionary)+sizeof(struct dict_keystorage); + dict->keys->next=NULL; + /* allocate initial hashtable */ + dict->table=(struct dict_entry **)malloc(DICT_INITSIZE*sizeof(struct dict_entry *)); + if (dict->table==NULL) + { + free(buf); return NULL; - dict->head=NULL; - dict->ptr=NULL; + } + /* clear the hashtable */ + for (i=0;i<DICT_INITSIZE;i++) + dict->table[i]=NULL; + /* we're done */ return dict; } -int dict_put(DICT *dict,const char *key,void *value) +/* Copy the key to the storage. Returns the copy of the key + in the storage. */ +static const char *storekey(DICT *dict,const char *key) { - struct dict_entry *entry; - if (key==NULL) - return -1; - entry=dict_entry_find(dict,key); - if ((entry==NULL)&&(value==NULL)) + size_t l; + size_t newsize; + char *buf; + struct dict_keystorage *newkeys; + l=strlen(key)+1; + /* ensure that we have enough space */ + if (l>=(dict->keys->size-dict->keys->off)) { - /* the key didn't exist, no point in creating one */ - return 0; + newsize=((dict->keys->size+l)*3)/2; + buf=(char *)malloc(sizeof(struct dict_keystorage)+newsize); + if (buf==NULL) + return NULL; + newkeys=(struct dict_keystorage *)buf; + newkeys->size=newsize; + newkeys->off=0; + newkeys->buf=(char *)(buf+sizeof(struct dict_keystorage)); + newkeys->next=dict->keys; + /* put new keystorage in front of linked list */ + dict->keys=newkeys; } - else if (entry==NULL) + /* add the value to the buffer */ + buf=dict->keys->buf+dict->keys->off; + strcpy(buf,key); + dict->keys->off+=l; + return buf; +} + +void dict_free(DICT *dict) +{ + struct dict_entry *entry,*etmp; + struct dict_keystorage *keys,*ktmp; + int i; + /* free hashtable entries */ + for (i=0;i<dict->size;i++) { - /* create new entry and insert it in the list */ - entry=dict_entry_new(key); - if (entry==NULL) - return -1; - /* insert entry in list */ - entry->next=dict->head; - dict->head=entry; + entry=dict->table[i]; + while (entry!=NULL) + { + etmp=entry; + entry=entry->next; + free(etmp); + } } - /* set value */ - entry->value=value; - return 0; + /* free the hashtable */ + free(dict->table); + /* free key storage + (except last which was allocated with the dict) */ + keys=dict->keys; + while (keys->next!=NULL) + { + ktmp=keys; + keys=keys->next; + free(ktmp); + } + /* free dictionary struct itself */ + free(dict); } void *dict_get(DICT *dict,const char *key) { + uint32_t hash; struct dict_entry *entry; - entry=dict_entry_find(dict,key); - if (entry==NULL) - return NULL; - return entry->value; + /* calculate the hash */ + hash=stringhash(key); + /* loop over the linked list in the hashtable */ + for (entry=dict->table[hash%dict->size];entry!=NULL;entry=entry->next) + { + if ( (entry->hash==hash) && + (strcasecmp(entry->key,key)==0) ) + return entry->value; + } + /* no matches found */ + return NULL; } -void dict_free(DICT *dict) +int dict_put(DICT *dict,const char *key,void *value) { - struct dict_entry *ptr,*nxt; - /* free all entries */ - ptr=dict->head; - while (ptr!=NULL) + uint32_t hash; + int idx; + struct dict_entry *entry,*prev; + /* check if we should grow the hashtable */ + if ( dict->num >= ((dict->size*DICT_LOADPERCENTAGE)/100) ) + growhashtable(dict); + /* calculate the hash and position in the hashtable */ + hash=stringhash(key); + idx=hash%dict->size; + /* check if the entry is already present */ + for (entry=dict->table[idx],prev=NULL; + entry!=NULL; + prev=entry,entry=entry->next) { - nxt=ptr->next; - dict_entry_free(ptr); - ptr=nxt; + if ( (entry->hash==hash) && + (strcasecmp(entry->key,key)==0) ) + { + /* check if we should unset the entry */ + if (value==NULL) + { + /* remove from linked list */ + if (prev==NULL) + dict->table[idx]=entry->next; + else + prev->next=entry->next; + /* free entry memory and register removal */ + free(entry); + dict->num--; + return 0; + } + /* just set the new value */ + entry->value=value; + return 0; + } } - /* clear some references */ - dict->head=NULL; - dict->ptr=NULL; - /* free struct itself */ - free(dict); + /* if entry should be unset we're done */ + if (value==NULL) + return 0; + /* entry is not present, make new entry */ + entry=(struct dict_entry *)malloc(sizeof(struct dict_entry)); + if (entry==NULL) + return -1; + entry->hash=hash; + entry->key=storekey(dict,key); + if (entry->key==NULL) + { + free(entry); + return -1; /* problem duplicating key */ + } + entry->value=value; + /* insert into hashtable/linked list */ + entry->next=dict->table[idx]; + dict->table[idx]=entry; + /* increment number of stored items */ + dict->num++; + return 0; } void dict_loop_first(DICT *dict) { - dict->ptr=dict->head; + dict->loop_idx=0; + dict->loop_entry=dict->table[dict->loop_idx]; } const char *dict_loop_next(DICT *dict,const char **key,void **value) { - struct dict_entry *ptr; - ptr=dict->ptr; - /* search until we get a non-NULL value */ - while ((ptr!=NULL)&&(ptr->value==NULL)) - ptr=ptr->next; - /* we went through the whole list */ - if (ptr==NULL) - { - dict->ptr=NULL; - if (key!=NULL) *key=NULL; - if (value!=NULL) *value=NULL; - return NULL; - } - dict->ptr=ptr->next; - if (key!=NULL) *key=ptr->key; - if (value!=NULL) *value=ptr->value; - return ptr->key; + struct dict_entry *entry; + /* find non-empty entry */ + while ( (dict->loop_idx<dict->size) && (dict->loop_entry==NULL) ) + dict->loop_entry=dict->table[dict->loop_idx++]; + if (dict->loop_entry==NULL) + return NULL; /* no more entries to check */ + /* save current result and go to next entry */ + entry=dict->loop_entry; + dict->loop_entry=entry->next; + /* return results */ + if (key!=NULL) + *key=entry->key; + if (value!=NULL) + *value=entry->value; + return entry->key; } |