python looping through a file

Soldato
Joined
22 Aug 2005
Posts
8,841
Location
Clydebank
Hi all

I'm trying to extract JPEG data from a OLD style jpeg encoded multipage tiff.

Suffice to say this involves extracting each section of data in the file between and including bytes FF D8 FF upto the next FF D9


I can loop through the file and detect the bytes byte by byte but how do I detect the string of bytes FF D8 FF ? i.e. I need to compare 3 iterations of my loop at the one time...

Here is the code I have with my attempt which is very ugly un almost certainly breaks some holy pythonesque rule:

The problem here is it prints 140 times instead of just 40 (my test file has 40 pages)

Code:
import media
from PIL import Image
import sys
import os.path
from sys import argv



def bytes_from_file(filename, chunksize=8192):
    with open(filename, "rb") as f:
        while True:
            chunk = f.read(chunksize)
            if chunk:
                for b in chunk:
                    yield b
            else:
                break

# example:
ffflag = 0
d8flag = 0
for b in bytes_from_file('testjpeg.jpg'):
    # do_stuff_with(b)
    if ( ffflag == 1 ):
        if ( ord(b) == 216 ):
            print "b was D8"
            d8flag = 1
            ffflag = 0
    if ( ord(b) == 255 ):
        if ( ffflag != 1 ):
            print "b was FF"
            ffflag = 1
            if ( d8flag == 1 ):
                print "3 bytes start marker found"
                d8flag = 0
                ffflag = 0


Any helps?
 
Soldato
OP
Joined
22 Aug 2005
Posts
8,841
Location
Clydebank
hmmm I have got it doing what I want -- is there a better way to do this ?

Code:
import media
from PIL import Image
import sys
import os.path
from sys import argv

# remove_header('xaba.tif')


def bytes_from_file(filename, chunksize=8192):
    with open(filename, "rb") as f:
        while True:
            chunk = f.read(chunksize)
            if chunk:
                for b in chunk:
                    yield b
            else:
                break

# example:

prevb = 0
prevprevb = 0
pagecount = 0
copyflag = 0
for b in bytes_from_file('00000017.TIF'):
    # do_stuff_with(b)

    if (copyflag == 1 ):
        new_path = '%s.jpg' % pagecount
       
        test = b'\xFF'
        test += b'\xD8'
        test += b'\xFF'
        fh = open(new_path, 'ab')
        fh.write(test)
        copyflag = 2

    if (copyflag == 2 ):
        #with open(new_path, 'wb') as output_file:
        fh.write(b)

    if ( prevprevb == 255 ):
        if ( prevb == 216 ):
            if ( ord(b) == 255 ):
                print "string is FF D8 FF - start"
                pagecount = pagecount + 1
                copyflag = 1
    
    if ( prevb == 255 ):
        if ( ord(b) == 217 ):
            print "string is FF D9 - END"
            test = b'\xFF'
            test += b'\xD9'
            #with open(new_path, 'wb') as output_file:
            fh.write(test)
            fh.close()
            copyflag = 0
    
    prevprevb = prevb
    prevb = ord(b)


#        prevrat = imgdata[previdx][4]
#        prevpic = Image.
 
Last edited:
Back
Top Bottom