I am relatively new to C, although I have been developing with other languages for an extended period of time. In an attempt to "Learn by doing", whilst working through a coursera course on C & C I decided to challenge myself with both by writing a wrapper for an existing project, which would translate to what I would eventually be using them for in my real world application of them.
I am building a C interface for a C implementation of Google Sparse Hash Map, using a guide on C interfaces / wrapping (here), but seem to be having issues with either data not inserting, or not being able to read the data once inserted?
This is what I have.
The C Object
sparsehashmap.cpp
#include "sparsehashmap.h"
#include<string>
SparseHashMap::SparseHashMap(){_shash.set_deleted_key("");}
void SparseHashMap::insert(const char* arg1, const char* arg2)
{
_shash[arg1] = arg2;
}
const char* SparseHashMap::read(const char* key)
{
return _shash[key];
}
int SparseHashMap::exists(const char* key)
{
if (_shash.find(key) == _shash.end())
{
return false;
}
else
{
return true;
}
}
void SparseHashMap::remove(const char* key)
{
_shash.erase(key);
}
int SparseHashMap::length()
{
return (int) _shash.size();
}
void SparseHashMap::flush()
{
_shash.clear();
}
sparsehashmap.h
#ifndef __CPPSPARSEHASH__
#define __CPPSPARSEHASH__
#include <iostream>
#include <sparsehash/sparse_hash_map> //https://github.com/sparsehash/sparsehash
typedef google::sparse_hash_map<const char*, const char*> _SPARSEHASH;
class SparseHashMap
{
private:
_SPARSEHASH _shash;
public:
SparseHashMap();
void insert(const char* arg1, const char* arg2);
const char* read(const char* key);
int exists(const char* key);
void remove(const char* key);
int length();
void flush();
};
#endif // __CPPSPARSEHASH__
The C Wrapper
sparse.h
#ifndef __FOSPARSE_H__
#define __FOSPARSE_H__
#ifdef __cplusplus
extern "C" {
#endif
struct CSparseHashMap;
typedef struct CSparseHashMap _SparseHashMap;
_SparseHashMap *sparsehashmap_create();
void sparsehashmap_destroy(_SparseHashMap *shm);
void sparsehashmap_insert (_SparseHashMap *shm, const char* arg1, const char* arg2);
const char* sparsehashmap_read (_SparseHashMap *shm, const char* key);
int sparsehashmap_exists (_SparseHashMap *shm, const char* key);
void sparsehashmap_remove (_SparseHashMap *shm, const char* key);
int sparsehashmap_length (_SparseHashMap *shm);
void sparsehashmap_flush (_SparseHashMap *shm);
#ifdef __cplusplus
}
#endif
#endif
sparse.cpp
#include <stdlib.h>
#include "sparse.h"
#include "sparsehashmap.h"
struct CSparseHashMap {
void *obj;
};
_SparseHashMap *sparsehashmap_create()
{
_SparseHashMap *shm;
SparseHashMap *obj;
shm = (__typeof__(shm))malloc(sizeof(*shm));
obj = new SparseHashMap();
shm->obj = obj;
return shm;
}
void sparsehashmap_destroy(_SparseHashMap *shm)
{
if (shm == NULL)
return;
delete static_cast<SparseHashMap *>(shm->obj);
free(shm);
}
void sparsehashmap_insert(_SparseHashMap *shm, const char* arg1, const char* arg2)
{
SparseHashMap *obj;
if (shm == NULL)
return;
obj = static_cast<SparseHashMap *>(shm->obj);
obj->insert(arg1, arg2);
}
const char* sparsehashmap_read(_SparseHashMap *shm, const char* key)
{
SparseHashMap *obj;
if (shm == NULL)
return 0;
obj = static_cast<SparseHashMap *>(shm->obj);
return obj->read(key);
}
int sparsehashmap_exists(_SparseHashMap *shm, const char* key)
{
SparseHashMap *obj;
if (shm == NULL)
return 0;
obj = static_cast<SparseHashMap *>(shm->obj);
return obj->exists(key);
}
void sparsehashmap_remove(_SparseHashMap *shm, const char* key)
{
SparseHashMap *obj;
if (shm == NULL)
return;
obj = static_cast<SparseHashMap *>(shm->obj);
obj->remove(key);
}
int sparsehashmap_length(_SparseHashMap *shm)
{
SparseHashMap *obj;
if (shm == NULL)
return 0;
obj = static_cast<SparseHashMap *>(shm->obj);
return obj->length();
}
void sparsehashmap_flush(_SparseHashMap *shm)
{
SparseHashMap *obj;
if (shm == NULL)
return;
obj = static_cast<SparseHashMap *>(shm->obj);
obj->flush();
}
The Test
main.c
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include "sparse.h"
#define MAXCHAR 256
int main(int argc, char* argv[])
{
// Create Pointer to Sparsehashmap object
_SparseHashMap *fshm = sparsehashmap_create();
/*
Open File specified in command line argument <argv[1]>
and initialise array pointer to hold the line data
*/
FILE *fp;
char row[MAXCHAR];
int ct = 0;
fp = fopen(argv[1], "r");
/*
Loop through the file, split the row using a single space
as a delimeter, and insert as <key> => <value> into the
sparsehashmap
*/
while (feof(fp) != true)
{
char key[MAXCHAR], value[MAXCHAR];
fgets(row, MAXCHAR, fp);
sscanf(row, "%s %s", key, value);
// if( (ct % 100) == 0 )
// printf("Key: %s\tValue: %s\n", key, value);
sparsehashmap_insert(fshm, key, value);
ct ;
}
// close the file as it is no longer needed.
fclose(fp);
// Print the total number of times the while loop ran
printf("%d Total entries.\n", ct);
// Print the length of the Sparsehashmap
printf("C SparseHashMap: Length of Hashmap = %d\n", sparsehashmap_length(fshm));
/*
Read, Exists and Remove Examples
*/
printf("C SparseHashMap: _SparseHashMap[\"63-5039337\"] = %s\n", sparsehashmap_read(fshm, "63-5039337"));
printf("C SparseHashMap: _SparseHashMap[\"26-8663826\"] = %s\n", sparsehashmap_read(fshm, "26-8663826"));
printf("C SparseHashMap: _SparseHashMap[\"87-9801138\"] = %s\n", sparsehashmap_read(fshm, "87-9801138"));
printf("C SparseHashMap: Key Exists [\"15-8895492\"] = %d\n", sparsehashmap_exists(fshm, "15-8895492"));
printf("C SparseHashMap: Key Exists [\"22-4942175\"] = %d\n", sparsehashmap_exists(fshm, "22-4942175"));
printf("C SparseHashMap: Remove Key From Hashmap...\n");
printf("C SparseHashMap: Remove Key From Hashmap [\"63-5039337\"]\n");
sparsehashmap_remove(fshm, "63-5039337");
printf("C SparseHashMap: Remove Key From Hashmap [\"20-6892893\"]\n");
sparsehashmap_remove(fshm, "20-6892893");
printf("C SparseHashMap: Remove Key From Hashmap [\"58-8150180\"]\n");
sparsehashmap_remove(fshm, "58-8150180");
printf("C SparseHashMap: Length of Hashmap = %d\n", sparsehashmap_length(fshm));
/*
Destroy the Sparsehashmap to free its
memory allocation.
*/
printf("C SparseHashMap: Destroy Hashmap\n");
sparsehashmap_destroy(fshm);
printf("C SparseHashMap: Freed");
return 0;
}
I have created a gist of the data that I am inserting and querying, which can be found here.
Once built, I run it against the file of mock data, but of the file of 2000 test entries, these are the results I get
2001 Total entries. /* times file is looped with fgets() */
C SparseHashMap: Length of Hashmap = 1 /* Size of the hashmap */
C SparseHashMap: _SparseHashMap["63-5039337"] = (null) /* Read Function Response */
C SparseHashMap: _SparseHashMap["26-8663826"] = (null) /* Read Function Response */
C SparseHashMap: _SparseHashMap["87-9801138"] = (null) /* Read Function Response */
C SparseHashMap: Key Exists ["15-8895492"] = 0
C SparseHashMap: Key Exists ["22-4942175"] = 0
C SparseHashMap: Remove Key From Hashmap ["63-5039337"]
C SparseHashMap: Remove Key From Hashmap ["20-6892893"]
C SparseHashMap: Remove Key From Hashmap ["58-8150180"]
C SparseHashMap: Length of Hashmap = 3 /* Size of the hashmap */
C SparseHashMap: Destroy Hashmap
C SparseHashMap: Freed%
The size of the Hash Map starts at one (after apparently inserting 2000 entries), it is unable to read any keys from the input data, and then after it performs a number of removals, the length jumps to three?
I understand that the guide I am working from is a bit more basic than working with a hash map, but was wondering if anyone could point me to where I am going wrong with this? and how I should be looking at modifying it to have the operations working as expected?
Any guidance would be greatly appreciated.
======== EDIT ========
The issue seemed to be mapping pointers to pointers. After amending the *.cpp
files to take / return std::string
, and performing the relevant conversions before passing from the C instructions the interface began working as intended
./run MOCK_DATA.txt
2001 Total entries.
C SparseHashMap: Length of Hashmap = 2000
C SparseHashMap: _SparseHashMap["63-5039337"] = {"id":1,"first_name":"Ilario","last_name":"Thickins","email":"[email protected]","gender":"Male","ip_address":"54.113.8.103"}
C SparseHashMap: _SparseHashMap["26-8663826"] = {"id":35,"first_name":"Mignonne","last_name":"Oakden","email":"[email protected]","gender":"Female","ip_address":"41.14.68.148"}
C SparseHashMap: _SparseHashMap["87-9801138"] = {"id":1989,"first_name":"Mavis","last_name":"Collingwood","email":"[email protected]","gender":"Female","ip_address":"56.70.97.65"}
C SparseHashMap: Key Exists ["15-8895492"] = 1
C SparseHashMap: Key Exists ["22-4942175"] = 1
C SparseHashMap: Remove Key From Hashmap ["63-5039337"]
C SparseHashMap: Remove Key From Hashmap ["20-6892893"]
C SparseHashMap: Remove Key From Hashmap ["58-8150180"]
C SparseHashMap: Length of Hashmap = 1997
C SparseHashMap: Destroy Hashmap
C SparseHashMap: Freed
CodePudding user response:
You are mapping pointers to pointers, but you likely intended to map strings to strings.
typedef google::sparse_hash_map<const char*, const char*> _SPARSEHASH;
With a std::map
that might be:
std::map<const char*, const char*>
That just means that you are mapping pointers to pointers.
All of your pointers come from these two objects.
char key[MAXCHAR], value[MAXCHAR];
Every time you add a key to your hash map, you are using a key which is a pointer to your local key
array (which is always the same on every iteration of the loop). And the value is a pointer to your local value
array (again, which is always the same on every iteration of the loop). That's why you have only 1 entry in your map. You have successfully mapped &key[0]
to &value[0]
.
What you probably want is to use std::string
.
std::map<std::string, std::string>
Or
typedef google::sparse_hash_map<std::string, std::string> _SPARSEHASH;
Then you have bigger fish to fry
Now you have to worry about the fact that your implementation of read
just returns a pointer, rather than returning a string. You just inherited all the problems of managing strings in C. You could treat it like std::string::c_str
where you return the hash entry's c_str
, but this will be invalidated if you add another entry to the map or remove one, so your semantics will have to be clearly documented, or else you'll have to allocate a buffer for a copy of your returned string and permit freeing that buffer again as is the way in C. (I'm glad C is a thing.)