Error in my MD5 code

Associate
Joined
22 Sep 2009
Posts
2,085
Location
Leicester
I'm in the process of writing an app using MD5, and rather than using a library I decided to write my own function to really understand what's going on.

Anyway, my main reference is Wikipedia, I have taken certain parts from other source code, specifically values for k and r.

Code:
// generated by using floor(fabs(sin(loop + 1))) * 2^32 where 0 <= loop < 64
unsigned int k[] =
{
	3614090360u, 3905402710u,  606105819u, 3250441966u, 4118548399u, 1200080426u, 2821735955u, 4249261313u,
	1770035416u, 2336552879u, 4294925233u, 2304563134u, 1804603682u, 4254626195u, 2792965006u, 1236535329u,
	4129170786u, 3225465664u,  643717713u, 3921069994u, 3593408605u,   38016083u, 3634488961u, 3889429448u,
	 568446438u, 3275163606u, 4107603335u, 1163531501u, 2850285829u, 4243563512u, 1735328473u, 2368359562u,
	4294588738u, 2272392833u, 1839030562u, 4259657740u, 2763975236u, 1272893353u, 4139469664u, 3200236656u,
	 681279174u, 3936430074u, 3572445317u,   76029189u, 3654602809u, 3873151461u,  530742520u, 3299628645u,
	4096336452u, 1126891415u, 2878612391u, 4237533241u, 1700485571u, 2399980690u, 4293915773u, 2240044497u,
	1873313359u, 4264355552u, 2734768916u, 1309151649u, 4149444226u, 3174756917u,  718787259u, 3951481745u
};


unsigned int r[] =
{
	7, 12, 17, 22,  7, 12, 17, 22,  7, 12, 17, 22,  7, 12, 17, 22,
	5,  9, 14, 20,  5,  9, 14, 20,  5,  9, 14, 20,  5,  9, 14, 20,
	4, 11, 16, 23,  4, 11, 16, 23,  4, 11, 16, 23,  4, 11, 16, 23,
	6, 10, 15, 21,  6, 10, 15, 21,  6, 10, 15, 21,  6, 10, 15, 21
};
	 
inline unsigned int left_rotate(unsigned int x, unsigned int c)
{
	return (x << c) | (x >> (32 - c));
}

void c_md5::md5(const char *input, unsigned int length)
{
	unsigned int h0 = 0x67452301;
	unsigned int h1 = 0xEFCDAB89;
	unsigned int h2 = 0x98BADCFE;
	unsigned int h3 = 0x10325476;
	char *buffer;
	int bufLen;
	
	// get the original input length
	bufLen = length;
	
	if(((bufLen % 64) > 0) || (bufLen == 0))
	{
		if(64 - bufLen < 8)
		{
			bufLen += 64;
		}		
		int tempVal;
		tempVal = bufLen % 64;
		bufLen += 64 - tempVal;
	}
	if(debug)
	{
		printf("Checking if buffer length is divisible by 512... ");
		if(bufLen % 64 == 0)
		{
			printf("done!\n");
		}
		else
		{
			printf("failed!\n");
			return;
		}
	}
	
	// create our new buffer
	buffer = new char [bufLen];
	if(debug)
	{
		printf("Checking if buffer is created... ");
		if(buffer)
		{
			printf("done!\n");
		}
		else
		{
			printf("failed\n");
			return;
		}
	}
	
	// copy content over
	for(int loop = 0; loop < length; loop++)
	{
		buffer[loop] = input[loop];
	}

	// blank the padding
	for(int loop = length; loop < bufLen; loop++)
	{
		buffer[loop] = 0x00u;
	}
	// set the first bit of the padding
	buffer[length] = 0x80u;
	
	// fill in the length of the original stream into the padding
	{
		int tempLength = length * 8;
		int tempVal;
		tempVal = tempLength;
		
		// use some bitwise shifting to access the length correctly
		buffer[bufLen - 1] = (unsigned char)tempVal;
		tempVal = tempLength >> 8;
		buffer[bufLen - 2] = (unsigned char)tempVal;
		//tempVal = tempLength >> 8;
		tempVal = tempLength >> 16;
		buffer[bufLen - 3] = (unsigned char)tempVal;
		//tempVal = tempLength >> 8;
		tempVal = tempLength >> 24;
		buffer[bufLen - 4] = (unsigned char)tempVal;		
	}
	if(debug)
	{
		printf("Checking reported length = stream length... ");
		if((unsigned char)buffer[bufLen - 4] * 256 * 256 * 256 + (unsigned char)buffer[bufLen - 3] * 256 * 256 + (unsigned char)buffer[bufLen - 2] * 256 + (unsigned char)buffer[bufLen - 1] == length * 8)
		{
			printf("done!\n");
		}
		else
		{
			printf("failed!\n");
			printf("length (in bits) : %u\n", length * 8);
			printf("padding(in bits) : %u\n", (unsigned char)buffer[bufLen - 4] * 256 * 256 * 256 + (unsigned char)buffer[bufLen - 3] * 256 * 256 + (unsigned char)buffer[bufLen - 2] * 256 + (unsigned char)buffer[bufLen - 1]);
			return;
		}
	}
	
	
	///
	/// !Error likely introduced somewhere below!
	///
	
	// below based on psuedo code from Wikipedia
	for(int MD5_loop = 0; MD5_loop < bufLen / 64; MD5_loop++)
	{
		unsigned int a, b, c, d;
		a = h0;
		b = h1;
		c = h2;
		d = h3;
		
		unsigned int word[16];		
		for(int loop = 0; loop < 16; loop++)
		{
			int tempVal;
			tempVal = 0;
			
			tempVal |= (unsigned char)buffer[MD5_loop * 64 + loop * 4];
			tempVal <<= 8;
			tempVal |= (unsigned char)buffer[MD5_loop * 64 + loop * 4 + 1];
			tempVal <<= 8;
			tempVal |= (unsigned char)buffer[MD5_loop * 64 + loop * 4 + 2];
			tempVal <<= 8;
			tempVal |= (unsigned char)buffer[MD5_loop * 64 + loop * 4 + 3];
			if(debug)
			{
				printf("%08x\n", tempVal);
			}
			
			word[loop] = tempVal;
		}
		
		unsigned int f, g, tempVal;
		for(int loop = 0; loop < 64; loop++)
		{
			if(loop < 16)
			{
				// (b and c) or (not b and d)
				f = ((b & c) | ((!b) & d));
				g = loop;
			}
			else if(loop < 32)
			{
				// (d and b) or (not d and c)
				f = ((d & b) | ((!d) & c));
				g = (5 * loop + 1) % 16;
			}
			else if(loop < 48)
			{
				// b xor c xor d
				f = (b ^ c ^ d);
				g = (3 * loop + 5) % 16;
			}
			else if(loop < 64)
			{
				// c xor (b or not d)
				f = (c ^ (b | (!d)));
				g = (7 * loop) % 16;
			}
			
			tempVal = d;
			d = c;
			c = b;
			b = (unsigned int)b + (unsigned int)left_rotate(a + f + k[loop] + word[g], r[loop]);
			a = tempVal;
		}
		
		h0 += (unsigned int)a;
		h1 += (unsigned int)b;
		h2 += (unsigned int)c;
		h3 += (unsigned int)d;
	}
	
	if(debug)
	{
		printf("Stream content:\n");
		for(int loop = 0; loop < bufLen; loop++)
		{
			printf("0x%02X ", (unsigned char)buffer[loop]);
		}
	}
	
	printf("%08x%08x%08x%08x\n", h0, h1, h2, h3);
}

Using the input "" the expected hash is d41d8cd98f00b204e9800998ecf8427e however I'm getting 5de5ae832d1f479f89adb8bdfa2a0655, obviously due to the nature of the hash it's extremely difficult to trace the issue back so I'm hoping someone might be able to notice my mistake and let me know so I can get on and fix it!
 
I actually fixed this last night before hitting the sack (otherwise I would have edited my OP). I managed to get my hands on some other MD5 code and saw how much crap that Wiki code was; I also realised I was feeding everything as big endian instead of little endian :rolleyes:

As for the other hashes, those will be coming at a later date (well SHA-2 anyway).
 
Back
Top Bottom