Home > database >  SHA1 hash of git object calculated by my program not matching the one from git
SHA1 hash of git object calculated by my program not matching the one from git

Time:12-03

I am writing a C program that would replicate the SHA1 hashes that git gives but the hash it gives keeps changing and never matches the one from git. I don't think anything is wrong with my implementation of SHA1. I tried substituting my implementation with another one but still, get the same results. This is my code:

sha1.h:

#ifndef SHA1_H
#define SHA1_H

#include <stdint.h>

typedef struct {
    uint32_t state[5];
    uint32_t count[2];
    unsigned char buffer[64];
} SHA1_CTX;

void SHA1Transform(uint32_t state[5], const unsigned char buffer[64]);

void SHA1Init(SHA1_CTX *context);

void SHA1Update(SHA1_CTX *context, const unsigned char *data, uint32_t len);

void SHA1Final(unsigned char digest[20], SHA1_CTX *context);

void SHA1(char *hash_out, const char *str, int len);

#endif /* _SHA1_H */

sha1.c:

#if defined(_MSC_VER) && !defined(_CRT_SECURE_NO_WARNINGS)
#define _CRT_SECURE_NO_WARNINGS
#endif

#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sha1.h"

#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))

/* blk0() and blk() perform the initial expand. */
#if BYTE_ORDER == LITTLE_ENDIAN
#define blk0(i) (block->l[i] = (rol(block->l[i], 24) & 0xFF00FF00) | (rol(block->l[i], 8) & 0x00FF00FF))
#elif BYTE_ORDER == BIG_ENDIAN
#define blk0(i) block->l[i]
#else
#error "Endianness not defined!"
#endif
#define blk(i) (block->l[i & 15] = rol(block->l[(i   13) & 15] ^ block->l[(i   8) & 15] ^ block->l[(i   2) & 15] ^ block->l[i & 15], 1))

/* (R0 R1), R2, R3, R4 are the different operations used in SHA1 */
#define R0(v, w, x, y, z, i)                                     \
    z  = ((w & (x ^ y)) ^ y)   blk0(i)   0x5A827999   rol(v, 5); \
    w = rol(w, 30);
#define R1(v, w, x, y, z, i)                                    \
    z  = ((w & (x ^ y)) ^ y)   blk(i)   0x5A827999   rol(v, 5); \
    w = rol(w, 30);
#define R2(v, w, x, y, z, i)                            \
    z  = (w ^ x ^ y)   blk(i)   0x6ED9EBA1   rol(v, 5); \
    w = rol(w, 30);
#define R3(v, w, x, y, z, i)                                          \
    z  = (((w | x) & y) | (w & x))   blk(i)   0x8F1BBCDC   rol(v, 5); \
    w = rol(w, 30);
#define R4(v, w, x, y, z, i)                            \
    z  = (w ^ x ^ y)   blk(i)   0xCA62C1D6   rol(v, 5); \
    w = rol(w, 30);

void SHA1Transform(uint32_t state[5], const unsigned char buffer[64])
{
    uint32_t a, b, c, d, e;

    typedef union {
        unsigned char c[64];
        uint32_t l[16];
    } CHAR64LONG16;

    CHAR64LONG16 block[1]; /* use array to appear as a pointer */

    memcpy(block, buffer, 64);
    /* Copy context->state[] to working vars */
    a = state[0];
    b = state[1];
    c = state[2];
    d = state[3];
    e = state[4];
    /* 4 rounds of 20 operations each. Loop unrolled. */
    R0(a, b, c, d, e, 0);
    R0(e, a, b, c, d, 1);
    R0(d, e, a, b, c, 2);
    R0(c, d, e, a, b, 3);
    R0(b, c, d, e, a, 4);
    R0(a, b, c, d, e, 5);
    R0(e, a, b, c, d, 6);
    R0(d, e, a, b, c, 7);
    R0(c, d, e, a, b, 8);
    R0(b, c, d, e, a, 9);
    R0(a, b, c, d, e, 10);
    R0(e, a, b, c, d, 11);
    R0(d, e, a, b, c, 12);
    R0(c, d, e, a, b, 13);
    R0(b, c, d, e, a, 14);
    R0(a, b, c, d, e, 15);
    R1(e, a, b, c, d, 16);
    R1(d, e, a, b, c, 17);
    R1(c, d, e, a, b, 18);
    R1(b, c, d, e, a, 19);
    R2(a, b, c, d, e, 20);
    R2(e, a, b, c, d, 21);
    R2(d, e, a, b, c, 22);
    R2(c, d, e, a, b, 23);
    R2(b, c, d, e, a, 24);
    R2(a, b, c, d, e, 25);
    R2(e, a, b, c, d, 26);
    R2(d, e, a, b, c, 27);
    R2(c, d, e, a, b, 28);
    R2(b, c, d, e, a, 29);
    R2(a, b, c, d, e, 30);
    R2(e, a, b, c, d, 31);
    R2(d, e, a, b, c, 32);
    R2(c, d, e, a, b, 33);
    R2(b, c, d, e, a, 34);
    R2(a, b, c, d, e, 35);
    R2(e, a, b, c, d, 36);
    R2(d, e, a, b, c, 37);
    R2(c, d, e, a, b, 38);
    R2(b, c, d, e, a, 39);
    R3(a, b, c, d, e, 40);
    R3(e, a, b, c, d, 41);
    R3(d, e, a, b, c, 42);
    R3(c, d, e, a, b, 43);
    R3(b, c, d, e, a, 44);
    R3(a, b, c, d, e, 45);
    R3(e, a, b, c, d, 46);
    R3(d, e, a, b, c, 47);
    R3(c, d, e, a, b, 48);
    R3(b, c, d, e, a, 49);
    R3(a, b, c, d, e, 50);
    R3(e, a, b, c, d, 51);
    R3(d, e, a, b, c, 52);
    R3(c, d, e, a, b, 53);
    R3(b, c, d, e, a, 54);
    R3(a, b, c, d, e, 55);
    R3(e, a, b, c, d, 56);
    R3(d, e, a, b, c, 57);
    R3(c, d, e, a, b, 58);
    R3(b, c, d, e, a, 59);
    R4(a, b, c, d, e, 60);
    R4(e, a, b, c, d, 61);
    R4(d, e, a, b, c, 62);
    R4(c, d, e, a, b, 63);
    R4(b, c, d, e, a, 64);
    R4(a, b, c, d, e, 65);
    R4(e, a, b, c, d, 66);
    R4(d, e, a, b, c, 67);
    R4(c, d, e, a, b, 68);
    R4(b, c, d, e, a, 69);
    R4(a, b, c, d, e, 70);
    R4(e, a, b, c, d, 71);
    R4(d, e, a, b, c, 72);
    R4(c, d, e, a, b, 73);
    R4(b, c, d, e, a, 74);
    R4(a, b, c, d, e, 75);
    R4(e, a, b, c, d, 76);
    R4(d, e, a, b, c, 77);
    R4(c, d, e, a, b, 78);
    R4(b, c, d, e, a, 79);
    /* Add the working vars back into context.state[] */
    state[0]  = a;
    state[1]  = b;
    state[2]  = c;
    state[3]  = d;
    state[4]  = e;
    /* Wipe variables */
    a = b = c = d = e = 0;
    memset(block, '\0', sizeof(block));
}

void SHA1Init(SHA1_CTX *context)
{
    /* SHA1 initialization constants */
    context->state[0] = 0x67452301;
    context->state[1] = 0xEFCDAB89;
    context->state[2] = 0x98BADCFE;
    context->state[3] = 0x10325476;
    context->state[4] = 0xC3D2E1F0;
    context->count[0] = context->count[1] = 0;
}

void SHA1Update(SHA1_CTX *context, const unsigned char *data, uint32_t len)
{
    uint32_t i;

    uint32_t j;

    j = context->count[0];
    if ((context->count[0]  = len << 3) < j)
        context->count[1]  ;
    context->count[1]  = (len >> 29);
    j = (j >> 3) & 63;
    if ((j   len) > 63) {
        memcpy(&context->buffer[j], data, (i = 64 - j));
        SHA1Transform(context->state, context->buffer);
        for (; i   63 < len; i  = 64) {
            SHA1Transform(context->state, &data[i]);
        }
        j = 0;
    } else {
        i = 0;
    }
    memcpy(&context->buffer[j], &data[i], len - i);
}

void SHA1Final(unsigned char digest[20], SHA1_CTX *context)
{
    unsigned i;

    unsigned char finalcount[8];

    unsigned char c;

    for (i = 0; i < 8; i  )
        finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)] >> ((3 - (i & 3)) * 8)) & 255); /* Endian independent */
    c = 0200;
    SHA1Update(context, &c, 1);
    while ((context->count[0] & 504) != 448) {
        c = 0000;
        SHA1Update(context, &c, 1);
    }
    SHA1Update(context, finalcount, 8); /* Should cause a SHA1Transform() */
    for (i = 0; i < 20; i  )
        digest[i] = (unsigned char)((context->state[i >> 2] >> ((3 - (i & 3)) * 8)) & 255);
    /* Wipe variables */
    memset(context, '\0', sizeof(*context));
    memset(&finalcount, '\0', sizeof(finalcount));
}

void SHA1(char *hash_out, const char *str, int len)
{
    SHA1_CTX ctx;
    unsigned char *hash = malloc(20);

    SHA1Init(&ctx);
    for (int i = 0; i < len; i  = 1)
        SHA1Update(&ctx, (const unsigned char *)str   i, 1);
    SHA1Final(hash, &ctx);

    /* Convert to hex */
    for (int i = 0; i < 20; i  )
        sprintf(hash_out   i * 2, "x", hash[i]);
}

object hash function:

char *object_hash(struct object *object)
{
    /* Get size of object */
    int size = object->size;
    /* Size to string */
    char *sizeStr = malloc(20);
    sprintf(sizeStr, "%d", size);
    size  = strlen(objectTypeStrings[object->type]);
    size  = strlen(sizeStr);
    size  = 2;
    char *format = malloc(size   1);
    sprintf(format, "%s %s\0%s", objectTypeStrings[object->type], sizeStr, object->data);
    /* Get hash of object */
    char *hash = calloc(41, sizeof(char));
    SHA1(hash, format, size);
    free(format);
    free(sizeStr);
    return hash;
}

other relevant things:

enum objectType {
    commit,
    tree,
    blob,
    tag
};

struct object {
    enum objectType type;
    int size;
    char *data;
};

char *objectTypeStrings[] = {
    "commit",
    "tree",
    "blob",
    "tag"
};

all code here: https://github.com/arnavbhate/avcs

CodePudding user response:

Your format string has a literal NUL ('\0') in it, so your last argument (the data) isn't being added (since C strings end at the first NUL).

If you want sprintf to append the NUL, you'll have to use %c and pass '\0' as an argument (though as sprintf is already putting a null at the end of the string, you don't actually need to).

You should also note that arbitrary files may include NUL characters in the data, which will be truncated by sprintf when passed as %s. I'd suggest that you memcpy your data in, instead of using sprintf for this purpose.

(Also, you can avoid calling strlen on your size string, since sprintf returns the number of characters printed, excluding the terminating NUL)

I'll also add that as shown by @Nickolay Olshevsky, you don't actually need to copy your data into the same buffer, you can just feed the hash with your header, followed by the data.

CodePudding user response:

More robust solution would be to use SHA1Init() and subsequent SHA1Update() calls for the data, ending with SHA1Finish(). Current approach would be slow as hashes data byte-by-byte in SHA1() function call. I.e. something like the following:


char *object_hash(struct object *object)
{
    SHA1_CTX ctx;
    SHA1Init(&ctx);
    /* Size to string */
    char sizeStr[20] = {0};
    snprintf(sizeStr, sizeof(sizeStr), "%d", object->size);
    SHA1Update(&ctx, (const uint8_t *) objectTypeStrings[object->type], strlen(objectTypeStrings[object->type]);
    SHA1Update(&ctx, (const uint8_t *) " ", 1);
    SHA1Update(&ctx, (const uint8_t *) sizeStr, strlen(sizeStr));
    SHA1Update(&ctx, (const uint8_t *) "", 1);
    SHA1Update(&ctx, (const uint8_t *) object->data, object->size);

    /* Get hash of object */
    unsigned char hash[20] = {0};
    SHA1Final(hash, &ctx);

    /* Convert to hex */
    char *hash_out = calloc(41, sizeof(char));
    for (size_t i = 0; i < 20; i  )
        sprintf(hash_out   i * 2, "x", hash[i]);
    return hash_out;
}


  • Related