Error in my MD5 code

Associate
Joined
22 Sep 2009
Posts
2,085
Location
Leicester
I'm in the process of writing an app using MD5, and rather than using a library I decided to write my own function to really understand what's going on.

Anyway, my main reference is Wikipedia, I have taken certain parts from other source code, specifically values for k and r.

Code:
// generated by using floor(fabs(sin(loop + 1))) * 2^32 where 0 <= loop < 64
unsigned int k[] =
{
	3614090360u, 3905402710u,  606105819u, 3250441966u, 4118548399u, 1200080426u, 2821735955u, 4249261313u,
	1770035416u, 2336552879u, 4294925233u, 2304563134u, 1804603682u, 4254626195u, 2792965006u, 1236535329u,
	4129170786u, 3225465664u,  643717713u, 3921069994u, 3593408605u,   38016083u, 3634488961u, 3889429448u,
	 568446438u, 3275163606u, 4107603335u, 1163531501u, 2850285829u, 4243563512u, 1735328473u, 2368359562u,
	4294588738u, 2272392833u, 1839030562u, 4259657740u, 2763975236u, 1272893353u, 4139469664u, 3200236656u,
	 681279174u, 3936430074u, 3572445317u,   76029189u, 3654602809u, 3873151461u,  530742520u, 3299628645u,
	4096336452u, 1126891415u, 2878612391u, 4237533241u, 1700485571u, 2399980690u, 4293915773u, 2240044497u,
	1873313359u, 4264355552u, 2734768916u, 1309151649u, 4149444226u, 3174756917u,  718787259u, 3951481745u
};


unsigned int r[] =
{
	7, 12, 17, 22,  7, 12, 17, 22,  7, 12, 17, 22,  7, 12, 17, 22,
	5,  9, 14, 20,  5,  9, 14, 20,  5,  9, 14, 20,  5,  9, 14, 20,
	4, 11, 16, 23,  4, 11, 16, 23,  4, 11, 16, 23,  4, 11, 16, 23,
	6, 10, 15, 21,  6, 10, 15, 21,  6, 10, 15, 21,  6, 10, 15, 21
};
	 
inline unsigned int left_rotate(unsigned int x, unsigned int c)
{
	return (x << c) | (x >> (32 - c));
}

void c_md5::md5(const char *input, unsigned int length)
{
	unsigned int h0 = 0x67452301;
	unsigned int h1 = 0xEFCDAB89;
	unsigned int h2 = 0x98BADCFE;
	unsigned int h3 = 0x10325476;
	char *buffer;
	int bufLen;
	
	// get the original input length
	bufLen = length;
	
	if(((bufLen % 64) > 0) || (bufLen == 0))
	{
		if(64 - bufLen < 8)
		{
			bufLen += 64;
		}		
		int tempVal;
		tempVal = bufLen % 64;
		bufLen += 64 - tempVal;
	}
	if(debug)
	{
		printf("Checking if buffer length is divisible by 512... ");
		if(bufLen % 64 == 0)
		{
			printf("done!\n");
		}
		else
		{
			printf("failed!\n");
			return;
		}
	}
	
	// create our new buffer
	buffer = new char [bufLen];
	if(debug)
	{
		printf("Checking if buffer is created... ");
		if(buffer)
		{
			printf("done!\n");
		}
		else
		{
			printf("failed\n");
			return;
		}
	}
	
	// copy content over
	for(int loop = 0; loop < length; loop++)
	{
		buffer[loop] = input[loop];
	}

	// blank the padding
	for(int loop = length; loop < bufLen; loop++)
	{
		buffer[loop] = 0x00u;
	}
	// set the first bit of the padding
	buffer[length] = 0x80u;
	
	// fill in the length of the original stream into the padding
	{
		int tempLength = length * 8;
		int tempVal;
		tempVal = tempLength;
		
		// use some bitwise shifting to access the length correctly
		buffer[bufLen - 1] = (unsigned char)tempVal;
		tempVal = tempLength >> 8;
		buffer[bufLen - 2] = (unsigned char)tempVal;
		//tempVal = tempLength >> 8;
		tempVal = tempLength >> 16;
		buffer[bufLen - 3] = (unsigned char)tempVal;
		//tempVal = tempLength >> 8;
		tempVal = tempLength >> 24;
		buffer[bufLen - 4] = (unsigned char)tempVal;		
	}
	if(debug)
	{
		printf("Checking reported length = stream length... ");
		if((unsigned char)buffer[bufLen - 4] * 256 * 256 * 256 + (unsigned char)buffer[bufLen - 3] * 256 * 256 + (unsigned char)buffer[bufLen - 2] * 256 + (unsigned char)buffer[bufLen - 1] == length * 8)
		{
			printf("done!\n");
		}
		else
		{
			printf("failed!\n");
			printf("length (in bits) : %u\n", length * 8);
			printf("padding(in bits) : %u\n", (unsigned char)buffer[bufLen - 4] * 256 * 256 * 256 + (unsigned char)buffer[bufLen - 3] * 256 * 256 + (unsigned char)buffer[bufLen - 2] * 256 + (unsigned char)buffer[bufLen - 1]);
			return;
		}
	}
	
	
	///
	/// !Error likely introduced somewhere below!
	///
	
	// below based on psuedo code from Wikipedia
	for(int MD5_loop = 0; MD5_loop < bufLen / 64; MD5_loop++)
	{
		unsigned int a, b, c, d;
		a = h0;
		b = h1;
		c = h2;
		d = h3;
		
		unsigned int word[16];		
		for(int loop = 0; loop < 16; loop++)
		{
			int tempVal;
			tempVal = 0;
			
			tempVal |= (unsigned char)buffer[MD5_loop * 64 + loop * 4];
			tempVal <<= 8;
			tempVal |= (unsigned char)buffer[MD5_loop * 64 + loop * 4 + 1];
			tempVal <<= 8;
			tempVal |= (unsigned char)buffer[MD5_loop * 64 + loop * 4 + 2];
			tempVal <<= 8;
			tempVal |= (unsigned char)buffer[MD5_loop * 64 + loop * 4 + 3];
			if(debug)
			{
				printf("%08x\n", tempVal);
			}
			
			word[loop] = tempVal;
		}
		
		unsigned int f, g, tempVal;
		for(int loop = 0; loop < 64; loop++)
		{
			if(loop < 16)
			{
				// (b and c) or (not b and d)
				f = ((b & c) | ((!b) & d));
				g = loop;
			}
			else if(loop < 32)
			{
				// (d and b) or (not d and c)
				f = ((d & b) | ((!d) & c));
				g = (5 * loop + 1) % 16;
			}
			else if(loop < 48)
			{
				// b xor c xor d
				f = (b ^ c ^ d);
				g = (3 * loop + 5) % 16;
			}
			else if(loop < 64)
			{
				// c xor (b or not d)
				f = (c ^ (b | (!d)));
				g = (7 * loop) % 16;
			}
			
			tempVal = d;
			d = c;
			c = b;
			b = (unsigned int)b + (unsigned int)left_rotate(a + f + k[loop] + word[g], r[loop]);
			a = tempVal;
		}
		
		h0 += (unsigned int)a;
		h1 += (unsigned int)b;
		h2 += (unsigned int)c;
		h3 += (unsigned int)d;
	}
	
	if(debug)
	{
		printf("Stream content:\n");
		for(int loop = 0; loop < bufLen; loop++)
		{
			printf("0x%02X ", (unsigned char)buffer[loop]);
		}
	}
	
	printf("%08x%08x%08x%08x\n", h0, h1, h2, h3);
}

Using the input "" the expected hash is d41d8cd98f00b204e9800998ecf8427e however I'm getting 5de5ae832d1f479f89adb8bdfa2a0655, obviously due to the nature of the hash it's extremely difficult to trace the issue back so I'm hoping someone might be able to notice my mistake and let me know so I can get on and fix it!
 
The pseudocode on wikipedia is basically ****.

Your code, constants and padding are all correct as far as I can tell up to the main loop for(int MD5_loop = 0; MD5_loop < bufLen / 64; MD5_loop++)

And inside that loop, the chunk splitting into 16 32-bit words is also correct.

I'm not convinced the round state operations with left_rotate are correct though.

Rather than wiki, follow the actual RFC here: http://www.faqs.org/rfcs/rfc1321.html
And there is an example of correct pseudocode here: http://www.herongyang.com/Cryptography/MD5-Message-Digest-Algorithm-Overview.html

You should define the functions F,G,H and I, and then define the four round functions. Something like this I think (double check them). I'm not sure how dissimilar your code is from these, but it's much clearer to define the functions following the RFC standard rather than wiki-standard.

Code:
#define F(x, y, z) (((x) & (y)) | (~(x) & (z)))
#define G(x, y, z) (((x) & (z)) | ((y) & ~(z)))
#define H(x, y, z) ((x) ^ (y) ^ (z))
#define I(x, y, z) ((y) ^ ((x) | ~(z)))

// These can be further optimized should you wish, just quickly mocked up at 4am :(
void round1(unsigned int &a, unsigned int b, unsigned int c, unsigned int d, unsigned int X, int s, int i)
{
	a = b + ((a + F(b,c,d) + X + k[i]) << s);
}

void round2(unsigned int &a, unsigned int b, unsigned int c, unsigned int d, unsigned int X, int s, int i)
{
	a = b + ((a + G(b,c,d) + X + k[i]) << s);
}

void round3(unsigned int &a, unsigned int b, unsigned int c, unsigned int d, unsigned int X, int s, int i)
{
	a = b + ((a + H(b,c,d) + X + k[i]) << s);
}

void round4(unsigned int &a, unsigned int b, unsigned int c, unsigned int d, unsigned int X, int s, int i)
{
	a = b + ((a + I(b,c,d) + X + k[i]) << s);
}

Then in the main loop you apply the specific integer round shift (6th parameter - value is from RFC & r[]):

Code:
     round1(h0,h1,h2,h3, word[0], 7, 0); // 7 is from r[]

Double check the above with the RFC but it should fit into your code quite nicely, replacing what's in the main loop for each of the 64 rounds and using all 4 round functions. round1() = 0->16 round2()->16->... etc.

Hope that makes some sense, it's very hard to narrow down your bug without analysing the raw bit strings from each stage (which is why with these kind of things you use an existing implementation). But start by following well defined RFC style code and compare it to a working example like the one below and hopefully you will spot the problem.

There is a very good example I have used before here of MD5 in C, you should be able to adapt it to fit into your foundations: http://cr.yp.to/2004-494/gaim/0.81-src/md5.c and http://cr.yp.to/2004-494/gaim/0.81-src/md5.h


And finally, use Whirlpool or SHA2 instead of MD5!
 
Last edited:
I actually fixed this last night before hitting the sack (otherwise I would have edited my OP). I managed to get my hands on some other MD5 code and saw how much crap that Wiki code was; I also realised I was feeding everything as big endian instead of little endian :rolleyes:

As for the other hashes, those will be coming at a later date (well SHA-2 anyway).
 
Back
Top Bottom