Python log filtering help

Soldato
Joined
3 Feb 2010
Posts
3,034
So i have 7 different files:

u_ex150601.log
u_ex150601.log
u_ex150601.log
u_ex150601.log
u_ex150601.log
u_ex150601.log
u_ex150601.log

and i am trying to filter the data from the logs in to a CSV file, i am having no problem doing it 1 file at a time, but is there any way of running it across multiple files in one go, all collating into one CSV file?

Any help would be greatly appreciated


Code:
import apache_log_parser

from csv import DictWriter

parser = apache_log_parser.make_parser('%h %l %u %t "%r" %>s "%{User-agent}i"')


with open('cleaned_log_1.csv', 'w') as out_f:
    writer = DictWriter(out_f,
                        fieldnames=['remote_host',
                                    'time_received_isoformat',
                                    'request_method',
                                    'request_url_path',
                                    'request_url_query',
                                    'status',
                                    'request_header_user_agent'],
                        extrasaction='ignore')
    writer.writeheader()

    ip_map = {}
    with open('u_ex150601.log') as in_f:
        for line in in_f:
            line = parser(line)


            #start of IP address anonymization
            ip_addr = line['remote_host']
            if ip_addr not in ip_map:
                ip_map[ip_addr] = str(len(ip_map) + 1)
            line['remote_host'] = ip_map[ip_addr]
            #end of IP address  anonymization
            writer.writerow(line)
 
Nevermind - sorted it, not the most "majestic" code.. but it works..

added a bot filter too

Code:
import apache_log_parser
import user_agents
from csv import DictWriter

parser = apache_log_parser.make_parser('%h %l %u %t "%r" %>s "%{User-agent}i"')


with open('cleaned_log.csv', 'w') as c:
   writer = DictWriter(c,
                        fieldnames=['remote_host',
                                    'time_received_isoformat',
                                    'request_method',
                                    'request_url_path',
                                    'request_url_query',
                                    'request_header_user_agent'],
                        extrasaction='ignore')
    writer.writeheader()

    ip_map = {}


    with open('u_ex150601.log') as a1:
        for line in a1:
            line = parser(line)
            ua = user_agents.parse(line['request_header_user_agent'])
            if not ua.is_bot:

                #start of IP address anonymization
                ip_addr = line['remote_host']
                if ip_addr not in ip_map:
                    ip_map[ip_addr] = str(len(ip_map) + 1)
                line['remote_host'] = ip_map[ip_addr]
                #end of IP address  anonymization
                writer.writerow(line)

    with open('u_ex150602.log') as a2:
        for line in a2:
            line = parser(line)
            ua = user_agents.parse(line['request_header_user_agent'])
            if not ua.is_bot:
                #start of IP address anonymization
                ip_addr = line['remote_host']
                if ip_addr not in ip_map:
                    ip_map[ip_addr] = str(len(ip_map) + 1)
                line['remote_host'] = ip_map[ip_addr]
                #end of IP address  anonymization
                writer.writerow(line)

    with open('u_ex150603.log') as a3:
        for line in a3:
            line = parser(line)
            ua = user_agents.parse(line['request_header_user_agent'])
            if not ua.is_bot:
                #start of IP address anonymization
                ip_addr = line['remote_host']
                if ip_addr not in ip_map:
                    ip_map[ip_addr] = str(len(ip_map) + 1)
                line['remote_host'] = ip_map[ip_addr]
                #end of IP address  anonymization
                writer.writerow(line)

    with open('u_ex150604.log') as a4:
        for line in a4:
            line = parser(line)
            ua = user_agents.parse(line['request_header_user_agent'])
            if not ua.is_bot:
                #start of IP address anonymization
                ip_addr = line['remote_host']
                if ip_addr not in ip_map:
                    ip_map[ip_addr] = str(len(ip_map) + 1)
                line['remote_host'] = ip_map[ip_addr]
                #end of IP address  anonymization
                writer.writerow(line)

    with open('u_ex150605.log') as a5:
        for line in a5:
            line = parser(line)
            ua = user_agents.parse(line['request_header_user_agent'])
            if not ua.is_bot:
                #start of IP address anonymization
                ip_addr = line['remote_host']
                if ip_addr not in ip_map:
                    ip_map[ip_addr] = str(len(ip_map) + 1)
                line['remote_host'] = ip_map[ip_addr]
                #end of IP address  anonymization
                writer.writerow(line)

    with open('u_ex150606.log') as a6:
        for line in a6:
            line = parser(line)
            ua = user_agents.parse(line['request_header_user_agent'])
            if not ua.is_bot:
                #start of IP address anonymization
                ip_addr = line['remote_host']
                if ip_addr not in ip_map:
                    ip_map[ip_addr] = str(len(ip_map) + 1)
                line['remote_host'] = ip_map[ip_addr]
                #end of IP address  anonymization
                writer.writerow(line)

    with open('u_ex150607.log') as a7:
        for line in a7:
            line = parser(line)
            ua = user_agents.parse(line['request_header_user_agent'])
            if not ua.is_bot:
                #start of IP address anonymization
                ip_addr = line['remote_host']
                if ip_addr not in ip_map:
                    ip_map[ip_addr] = str(len(ip_map) + 1)
                line['remote_host'] = ip_map[ip_addr]
                #end of IP address  anonymization
                writer.writerow(line)
 
Back
Top Bottom