哈希表的C语言实现

时间：2020-05-13 15:05:24 阅读：69 评论：0 收藏：0 [点我收藏+]

构造一种存储结构，通过某种函数(hashFunc)使元素的存储位置与它的关键码之间能够建立一一映射的关系，那么在查找时通过该函数可以很快找到该元素。

说到哈希表，首先就得说到哈希函数，哈希函数是用来得到给定key值的在哈希表中的存储位置的。
哈希函数也并不是固定的，可以自己根据情况来定，一般常用常见的有直接定制法，除留余数法，平方取中法，折叠法，随机数法，数学分析法。

当向该结构插入元素时，存入根据关键码以此函数计算出的位置，当搜索时，也是先要将给定的关键码用函数转换成存储位置进行查找，将得到位置处的元素进行比较，若关键码相同，则搜索成功。
但是通过一个哈希函数得到的位置，一定是会有冲突的，
例如用除留余数法，哈希函数为key/100。在此情况下数字1与数字101得到的存储位置就是相同的，这样就是哈希冲突， 哈希冲突一般有两种解决方式，一种是闭散列，另一种是开散列。
闭散列（开放地址法）：当发生哈希冲突时，如果哈希表未被装满，说明在哈希表中还有空位，那就可以把key值存放到了列表的下一个空位。
开散列（链地址法）：首先对关键码集合用哈希函数计算哈希表中的偏移位置，具有相同地址的关键码归于同一子集合，每一个子集合称为一个桶，各个桶中的元素通过一个单链表链接起来，各链表的头结点存储在哈希表中。

哈希表原理

这里不讲高深理论，只说直观感受。哈希表的目的就是为了根据数据的部分内容（关键字），直接计算出存放完整数据的内存地址。

试想一下，如果从链表中根据关键字查找一个元素，那么就需要遍历才能得到这个元素的内存地址，如果链表长度很大，查找就需要更多的时间.

链表查找：

void* list_find_by_key(list,key)
{
    for(p=list;p!=NULL; p=p->next){
        if(p->key == key){
            return p;
        }
        return p;
    }
}

为了解决根据关键字快速找到元素的存放地址，哈希表应运而生。它通过某种算法（哈希函数）直接根据关键字计算出元素的存放地址，由于无需遍历，所以效率很高。

void* hash_table_find_by_key(table, key)
{
    void* p = hash(key);
    return p;
}

当然，上面的伪代码忽略了一个重要的事实：那就是不同的关键字可能产生出同样的hash值。

hash("张三") = 23;
hash("李四") = 30;
hash("王五") = 23;

这种情况称为“冲突”，为了解决这个问题，有两种方法：一是链式扩展；二是开放寻址。这里只讲第一种：链式扩展。

也就是把具有相同hash值的元素放到一起，形成一个链表。这样在插入和寻找数据的时候就需要进一步判断。

void* hash_table_find_by_key(table, key)
{
    void* list = hash(key);
    return list_find_by_key(list, key);
}

需要注意的是，只要hash函数合适，这里的链表通常都长度不大，所以查找效率依然很高。

hash.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define BUCKETCOUNT 16

struct hashEntry
{
    const char* key;
    char* value;
    struct hashEntry* next;
};

typedef struct hashEntry entry;

struct hashTable
{
    entry bucket[BUCKETCOUNT];  //先默认定义16个桶
};

typedef struct hashTable table;

//初始化哈希表
void initHashTable(table* t)
{
    int i;
    if (t == NULL)return;

    for (i = 0; i < BUCKETCOUNT; ++i) {
        t->bucket[i].key = NULL;
        t->bucket[i].value = NULL;
        t->bucket[i].next = NULL;
    }
}

//释放哈希表
void freeHashTable(table* t)
{
    int i;
    entry* e,*ep;
    if (t == NULL)return;
    for (i = 0; i<BUCKETCOUNT; ++i) {
        e = &(t->bucket[i]);
        while (e->next != NULL) {
            ep = e->next;
            e->next = ep->next;
            free(ep->key);
            free(ep->value);
            free(ep);
        }
    }
}

//哈希散列方法函数
int keyToIndex(const char* key)
{
    int index , len , i;
    if (key == NULL)return -1;

    len = strlen(key);
    index = (int)key[0];
    for (i = 1; i<len; ++i) {
        index *= 1103515245 + (int)key[i];
    }
    index >>= 27;
    index &= (BUCKETCOUNT - 1);
    return index;
}

//在堆上分配足以保存str的内存
//并拷贝str内容到新分配位置
char* strDup(const char* str)
{
    int len;
    char* ret;
    if (str == NULL)return NULL;

    len = strlen(str);
    ret = (char*)malloc(len + 1);
    if (ret != NULL) {
        memcpy(ret , str , len);
        ret[len] = ‘\0‘;
    }
    return ret;
}

//向哈希表中插入数据
//insertEntry(&t , "电脑型号" , "华硕 X550JK 笔记本电脑");
int insertEntry(table* t , const char* key , const char* value)
{
    int index , vlen1 , vlen2;
    entry* e , *ep;

    if (t == NULL || key == NULL || value == NULL) {
        return -1;
    }

    index = keyToIndex(key);
    printf("index = %d\n",index);
    if (t->bucket[index].key == NULL) {
        t->bucket[index].key = strDup(key);
        t->bucket[index].value = strDup(value);
    }
    else {
        e = ep = &(t->bucket[index]);
        while (e != NULL) { //先从已有的找
            if (strcmp(e->key , key) == 0) {
                //找到key所在，替换值
                vlen1 = strlen(value);
                vlen2 = strlen(e->value);
                if (vlen1 > vlen2) {
                    free(e->value);
                    e->value = (char*)malloc(vlen1 + 1);
                }
                memcpy(e->value , value , vlen1 + 1);
                return index;   //插入完成了
            }
            ep = e;
            e = e->next;
        } // end while(e...

        //没有在当前桶中找到
        //创建条目加入
        e = (entry*)malloc(sizeof (entry));
        e->key = strDup(key);
        e->value = strDup(value);
        e->next = NULL;
        ep->next = e;
    }
    return index;
}

//在哈希表中查找key对应的value
//找到了返回value的地址，没找到返回NULL
const char* findValueByKey(const table* t , const char* key)
{
    int index;
    const entry* e;
    if (t == NULL || key == NULL) {
        return NULL;
    }
    index = keyToIndex(key);
    e = &(t->bucket[index]);
    if (e->key == NULL) return NULL;//这个桶还没有元素
    while (e != NULL) {
        if (0 == strcmp(key , e->key)) {
            return e->value;    //找到了，返回值
        }
        e = e->next;
    }
    return NULL;
}


//在哈希表中查找key对应的entry
//找到了返回entry，并将其从哈希表中移除
//没找到返回NULL
entry* removeEntry(table* t , char* key)
{
    int index;
    entry* e,*ep;   //查找的时候，把ep作为返回值
    if (t == NULL || key == NULL) {
        return NULL;
    }
    index = keyToIndex(key);
    e = &(t->bucket[index]);
    while (e != NULL) {
        if (0 == strcmp(key , e->key)) {
            //如果是桶的第一个
            if (e == &(t->bucket[index])) {
                //如果这个桶有两个或以上元素
                //交换第一个和第二个，然后移除第二个
                ep = e->next;
                if (ep != NULL) {
                    entry tmp = *e; //做浅拷贝交换
                    *e = *ep;//相当于链表的头节点已经移除
                    *ep = tmp;  //这就是移除下来的链表头节点
                    ep->next = NULL;
                }
                else {//这个桶只有第一个元素
                    ep = (entry*)malloc(sizeof(entry));
                    *ep = *e;
                    e->key = e->value = NULL;
                    e->next = NULL;
                }
            }
            else {
                //如果不是桶的第一个元素
                //找到它的前一个(这是前面设计不佳导致的多余操作)
                ep = &(t->bucket[index]);
                while (ep->next != e)ep = ep->next;
                //将e从中拿出来
                ep->next = e->next;
                e->next = NULL;
                ep = e;
            }
            return ep;
        }// end if(strcmp...
        e = e->next;
    }
    return NULL;
}


void printTable(table* t)
{
    int i;
    entry* e;
    if (t == NULL)return;
    for (i = 0; i<BUCKETCOUNT; ++i) {
        printf("\nbucket[%d]:\n" , i);
        e = &(t->bucket[i]);
        while (e->key != NULL) {
            printf("\t%s\t=\t%s\n" , e->key , e->value);
            if (e->next == NULL)break;
            e = e->next;
        }
    }
}


int main()
{
    table t;
    initHashTable(&t);

    insertEntry(&t , "电脑型号" , "华硕 X550JK 笔记本电脑");
    insertEntry(&t , "操作系统" , "Windows 8.1 64位 (DirectX 11)");
    insertEntry(&t , "处理器" , "英特尔 Core i7 - 4710HQ @ 2.50GHz 四核");
    insertEntry(&t , "主板" , "华硕 X550JK(英特尔 Haswell)");
    insertEntry(&t , "内存" , "4 GB(Hynix / Hyundai)");
    insertEntry(&t , "主硬盘" , "日立 HGST HTS541010A9E680(1 TB / 5400 转 / 分)");
    insertEntry(&t , "显卡" , "NVIDIA GeForce GTX 850M       (2 GB / 华硕)");
    insertEntry(&t , "显示器" , "奇美 CMN15C4(15.3 英寸)");
    insertEntry(&t , "光驱" , "松下 DVD - RAM UJ8E2 S DVD刻录机");
    insertEntry(&t , "声卡" , "Conexant SmartAudio HD @ 英特尔 Lynx Point 高保真音频");
    insertEntry(&t , "网卡" , "瑞昱 RTL8168 / 8111 / 8112 Gigabit Ethernet Controller / 华硕");
    insertEntry(&t , "主板型号" , "华硕 X550JK");
    insertEntry(&t , "芯片组" , "英特尔 Haswell");
    insertEntry(&t , "BIOS" , "X550JK.301");
    insertEntry(&t , "制造日期" , "06 / 26 / 2014");
    insertEntry(&t , "主人" , "就是我");
    insertEntry(&t , "价格" , "六十张红色毛主席");
    insertEntry(&t , "主硬盘" , "换了个120G的固态");

    entry* e = removeEntry(&t , "主板型号");
    if (e != NULL) {
        puts("找到后要释放");
        free(e->key);
        free(e->value);
        free(e);
        e = NULL;
    }

    printTable(&t);

    const char* keys[] = { "显示器" , "主人","没有" , "处理器" };
    for (int i = 0; i < 4; ++i) {
        const char* value = findValueByKey(&t , keys[i]);
        if (value != NULL) {
            printf("find %s\t=\t%s\n" ,keys[i], value);
        }
        else {
            printf("not found %s\n",keys[i]);
        }
    }


    freeHashTable(&t);
    getchar();
    return 0;
}

哈希表的C语言实现

原文：https://www.cnblogs.com/vx-cg248805770/p/12881957.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)