Arthur de Jong

Open Source / Free Software developer

summaryrefslogtreecommitdiffstats
path: root/common
diff options
context:
space:
mode:
authorArthur de Jong <arthur@arthurdejong.org>2008-04-19 12:22:22 +0200
committerArthur de Jong <arthur@arthurdejong.org>2008-04-19 12:22:22 +0200
commit953a00fd4b8463f9b6ba8616a507746fe8bc56af (patch)
tree74ddeeafa8103cde238ef053fd89132f9b414a5a /common
parent67e674892d981effd8b55e098c84ad49bf3916c7 (diff)
implement new dict module that uses a hashtable which is around 40 times faster for large (around 2000) entries but with around 40% more memory used
git-svn-id: http://arthurdejong.org/svn/nss-pam-ldapd/nss-ldapd@680 ef36b2f9-881f-0410-afb5-c4e39611909c
Diffstat (limited to 'common')
-rw-r--r--common/dict.c344
1 files changed, 252 insertions, 92 deletions
diff --git a/common/dict.c b/common/dict.c
index 70c746a..92e5360 100644
--- a/common/dict.c
+++ b/common/dict.c
@@ -22,147 +22,307 @@
#include "config.h"
-#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
+#include <ctype.h>
+#include <stdint.h>
#include "dict.h"
+/*
+ This module uses a hashtable to store it's key to value mappings.
+ The structure is basically as follows:
+
+ [struct dictionary]
+ \- holds an array of pointers to a linked list of [struct dict_entry]
+ \- each entry has a key/value mapping
+
+ The hashmap can be resized when the total number of elements
+ in the hashmap exceeds a certain load factor.
+
+ All the keys are copied in a separate linked list of buffers
+ where each new buffer that is allocated is larger than the previous one.
+ The first buffer in the linked list is always the current one.
+
+ Note that the initial sizes of hashtable and the loadfactor
+ still need to be tuned to the use in this application.
+*/
+
+/* an entry stores one key/value pair */
struct dict_entry {
- const char *key;
- void *value;
+ uint32_t hash; /* used for quick matching and rehashing */
+ const char *key; /* a reference to a copy of the key */
+ void *value; /* the stored value */
struct dict_entry *next;
};
-struct dictionary {
- struct dict_entry *head;
- struct dict_entry *ptr; /* for searching */
+/* initial size allocated for the key strings */
+#define KEYSTORAGE_INITSIZE 100
+
+/* the initial size of the hashtable */
+#define DICT_INITSIZE 7
+
+/* load factor at which point to grow hashtable */
+#define DICT_LOADPERCENTAGE 400
+
+/* storage for key strings */
+struct dict_keystorage {
+ size_t size; /* the number of bytes allocated */
+ size_t off; /* where in the buffer we can begin storing */
+ char *buf; /* storage for strings */
+ /* newly allocated keystorages should be put in front of the list */
+ struct dict_keystorage *next;
};
-static struct dict_entry *dict_entry_new(const char *key)
-{
- struct dict_entry *entry;
- entry=(struct dict_entry *)malloc(sizeof(struct dict_entry));
- if (entry==NULL)
- return NULL;
- entry->key=strdup(key);
- if (entry->key==NULL)
- {
- free(entry);
- return NULL;
- }
- entry->value=NULL;
- entry->next=NULL;
- return entry;
-}
+/* the dictionary is a hashtable */
+struct dictionary {
+ int size; /* size of the hashtable */
+ int num; /* total number of keys stored */
+ struct dict_entry **table; /* the hashtable */
+ struct dict_keystorage *keys; /* for storing key strings */
+ int loop_idx; /* for looping */
+ struct dict_entry *loop_entry; /* for looping */
+};
-static void dict_entry_free(struct dict_entry *entry)
+/* Simple hash function that computes the hash value of a lower-cased
+ string. */
+static uint32_t stringhash(const char *str)
{
- /* free key */
- free((void *)entry->key);
- /* free entry */
- free(entry);
+ uint32_t hash=0;
+ while (*str!='\0')
+ hash=3*hash+tolower(*str++);
+ return hash;
}
-static struct dict_entry *dict_entry_find(
- DICT *dict,const char *key)
+/* Grow the hashtable. */
+static void growhashtable(DICT *dict)
{
- struct dict_entry *ptr;
- for (ptr=dict->head;ptr!=NULL;ptr=ptr->next)
+ int i;
+ int newsize;
+ struct dict_entry **newtable;
+ struct dict_entry *entry,*tmp;
+ newsize=dict->size*3+1;
+ /* allocate room for new hashtable */
+ newtable=(struct dict_entry **)malloc(newsize*sizeof(struct dict_entry *));
+ if (newtable==NULL)
+ return; /* allocating memory failed continue to fill the existing table */
+ /* clear new table */
+ for (i=0;i<newsize;i++)
+ newtable[i]=NULL;
+ /* copy old hashtable into new table */
+ for (i=0;i<dict->size;i++)
{
- if (strcasecmp(ptr->key,key)==0)
- return ptr;
+ /* go over elements in linked list */
+ entry=dict->table[i];
+ while (entry!=NULL)
+ {
+ tmp=entry;
+ entry=entry->next;
+ /* put in new position */
+ tmp->next=newtable[tmp->hash%newsize];
+ newtable[tmp->hash%newsize]=tmp;
+ }
}
- return NULL;
+ /* free the old hashtable */
+ free(dict->table);
+ /* put new hashtable in place */
+ dict->size=newsize;
+ dict->table=newtable;
}
DICT *dict_new(void)
{
+ char *buf;
struct dictionary *dict;
- dict=(struct dictionary *)malloc(sizeof(struct dictionary));
- if (dict==NULL)
+ int i;
+ /* allocate room for dictionary information */
+ buf=(char *)malloc(sizeof(struct dictionary)+sizeof(struct dict_keystorage)+KEYSTORAGE_INITSIZE);
+ if (buf==NULL)
+ return NULL;
+ /* set up toplevel structs and buffers */
+ dict=(struct dictionary *)buf;
+ dict->size=DICT_INITSIZE;
+ dict->num=0;
+ dict->keys=(struct dict_keystorage *)(buf+sizeof(struct dictionary));
+ dict->keys->size=KEYSTORAGE_INITSIZE;
+ dict->keys->off=0;
+ dict->keys->buf=buf+sizeof(struct dictionary)+sizeof(struct dict_keystorage);
+ dict->keys->next=NULL;
+ /* allocate initial hashtable */
+ dict->table=(struct dict_entry **)malloc(DICT_INITSIZE*sizeof(struct dict_entry *));
+ if (dict->table==NULL)
+ {
+ free(buf);
return NULL;
- dict->head=NULL;
- dict->ptr=NULL;
+ }
+ /* clear the hashtable */
+ for (i=0;i<DICT_INITSIZE;i++)
+ dict->table[i]=NULL;
+ /* we're done */
return dict;
}
-int dict_put(DICT *dict,const char *key,void *value)
+/* Copy the key to the storage. Returns the copy of the key
+ in the storage. */
+static const char *storekey(DICT *dict,const char *key)
{
- struct dict_entry *entry;
- if (key==NULL)
- return -1;
- entry=dict_entry_find(dict,key);
- if ((entry==NULL)&&(value==NULL))
+ size_t l;
+ size_t newsize;
+ char *buf;
+ struct dict_keystorage *newkeys;
+ l=strlen(key)+1;
+ /* ensure that we have enough space */
+ if (l>=(dict->keys->size-dict->keys->off))
{
- /* the key didn't exist, no point in creating one */
- return 0;
+ newsize=((dict->keys->size+l)*3)/2;
+ buf=(char *)malloc(sizeof(struct dict_keystorage)+newsize);
+ if (buf==NULL)
+ return NULL;
+ newkeys=(struct dict_keystorage *)buf;
+ newkeys->size=newsize;
+ newkeys->off=0;
+ newkeys->buf=(char *)(buf+sizeof(struct dict_keystorage));
+ newkeys->next=dict->keys;
+ /* put new keystorage in front of linked list */
+ dict->keys=newkeys;
}
- else if (entry==NULL)
+ /* add the value to the buffer */
+ buf=dict->keys->buf+dict->keys->off;
+ strcpy(buf,key);
+ dict->keys->off+=l;
+ return buf;
+}
+
+void dict_free(DICT *dict)
+{
+ struct dict_entry *entry,*etmp;
+ struct dict_keystorage *keys,*ktmp;
+ int i;
+ /* free hashtable entries */
+ for (i=0;i<dict->size;i++)
{
- /* create new entry and insert it in the list */
- entry=dict_entry_new(key);
- if (entry==NULL)
- return -1;
- /* insert entry in list */
- entry->next=dict->head;
- dict->head=entry;
+ entry=dict->table[i];
+ while (entry!=NULL)
+ {
+ etmp=entry;
+ entry=entry->next;
+ free(etmp);
+ }
}
- /* set value */
- entry->value=value;
- return 0;
+ /* free the hashtable */
+ free(dict->table);
+ /* free key storage
+ (except last which was allocated with the dict) */
+ keys=dict->keys;
+ while (keys->next!=NULL)
+ {
+ ktmp=keys;
+ keys=keys->next;
+ free(ktmp);
+ }
+ /* free dictionary struct itself */
+ free(dict);
}
void *dict_get(DICT *dict,const char *key)
{
+ uint32_t hash;
struct dict_entry *entry;
- entry=dict_entry_find(dict,key);
- if (entry==NULL)
- return NULL;
- return entry->value;
+ /* calculate the hash */
+ hash=stringhash(key);
+ /* loop over the linked list in the hashtable */
+ for (entry=dict->table[hash%dict->size];entry!=NULL;entry=entry->next)
+ {
+ if ( (entry->hash==hash) &&
+ (strcasecmp(entry->key,key)==0) )
+ return entry->value;
+ }
+ /* no matches found */
+ return NULL;
}
-void dict_free(DICT *dict)
+int dict_put(DICT *dict,const char *key,void *value)
{
- struct dict_entry *ptr,*nxt;
- /* free all entries */
- ptr=dict->head;
- while (ptr!=NULL)
+ uint32_t hash;
+ int idx;
+ struct dict_entry *entry,*prev;
+ /* check if we should grow the hashtable */
+ if ( dict->num >= ((dict->size*DICT_LOADPERCENTAGE)/100) )
+ growhashtable(dict);
+ /* calculate the hash and position in the hashtable */
+ hash=stringhash(key);
+ idx=hash%dict->size;
+ /* check if the entry is already present */
+ for (entry=dict->table[idx],prev=NULL;
+ entry!=NULL;
+ prev=entry,entry=entry->next)
{
- nxt=ptr->next;
- dict_entry_free(ptr);
- ptr=nxt;
+ if ( (entry->hash==hash) &&
+ (strcasecmp(entry->key,key)==0) )
+ {
+ /* check if we should unset the entry */
+ if (value==NULL)
+ {
+ /* remove from linked list */
+ if (prev==NULL)
+ dict->table[idx]=entry->next;
+ else
+ prev->next=entry->next;
+ /* free entry memory and register removal */
+ free(entry);
+ dict->num--;
+ return 0;
+ }
+ /* just set the new value */
+ entry->value=value;
+ return 0;
+ }
}
- /* clear some references */
- dict->head=NULL;
- dict->ptr=NULL;
- /* free struct itself */
- free(dict);
+ /* if entry should be unset we're done */
+ if (value==NULL)
+ return 0;
+ /* entry is not present, make new entry */
+ entry=(struct dict_entry *)malloc(sizeof(struct dict_entry));
+ if (entry==NULL)
+ return -1;
+ entry->hash=hash;
+ entry->key=storekey(dict,key);
+ if (entry->key==NULL)
+ {
+ free(entry);
+ return -1; /* problem duplicating key */
+ }
+ entry->value=value;
+ /* insert into hashtable/linked list */
+ entry->next=dict->table[idx];
+ dict->table[idx]=entry;
+ /* increment number of stored items */
+ dict->num++;
+ return 0;
}
void dict_loop_first(DICT *dict)
{
- dict->ptr=dict->head;
+ dict->loop_idx=0;
+ dict->loop_entry=dict->table[dict->loop_idx];
}
const char *dict_loop_next(DICT *dict,const char **key,void **value)
{
- struct dict_entry *ptr;
- ptr=dict->ptr;
- /* search until we get a non-NULL value */
- while ((ptr!=NULL)&&(ptr->value==NULL))
- ptr=ptr->next;
- /* we went through the whole list */
- if (ptr==NULL)
- {
- dict->ptr=NULL;
- if (key!=NULL) *key=NULL;
- if (value!=NULL) *value=NULL;
- return NULL;
- }
- dict->ptr=ptr->next;
- if (key!=NULL) *key=ptr->key;
- if (value!=NULL) *value=ptr->value;
- return ptr->key;
+ struct dict_entry *entry;
+ /* find non-empty entry */
+ while ( (dict->loop_idx<dict->size) && (dict->loop_entry==NULL) )
+ dict->loop_entry=dict->table[dict->loop_idx++];
+ if (dict->loop_entry==NULL)
+ return NULL; /* no more entries to check */
+ /* save current result and go to next entry */
+ entry=dict->loop_entry;
+ dict->loop_entry=entry->next;
+ /* return results */
+ if (key!=NULL)
+ *key=entry->key;
+ if (value!=NULL)
+ *value=entry->value;
+ return entry->key;
}