Python: Upload S3 logs to MySQL

I just switched from DynamoDB to MySQL and it is much faster and better to analyze data. I used SqlAlchemy ORM layer so you need to install few modules in Python. Here is the code.

#!/usr/bin/env python3
import boto3, re, json, hashlib, sys, os, sqlalchemy, base
from sqlalchemy.ext.declarative import *
from sqlalchemy import Column, Integer, String

mysql_user = 'USER'
mysql_db = 'DB'
mysql_pass = 'PASS'

Base = declarative_base()
engine = sqlalchemy.create_engine('mysql://%s:%[email protected]/%s' % (mysql_user, mysql_pass, mysql_db))
Session = sqlalchemy.orm.sessionmaker(bind=engine)
session = Session()

class s3logs(Base):
    __tablename__ = 'logs'
    id = Column(String, primary_key=True)
    repository = Column(String)
    date = Column(String)
    ip = Column(String)
    item = Column(String)
    referer = Column(String)
    agent = Column(String)

    def __init__(self, id, repository, date, ip, item, referer, agent):
        self.id = id
        self.repository = repository
        self.date = date
        self.ip = ip
        self.item = item
        self.referer = referer
        self.agent = agent

# https://stackoverflow.com/questions/12544510/parsing-apache-log-files
# https://regex101.com/r/cgY3Zu/1

def parse_file(file):
    try:
        regex = '(\w+) ([\w_-]+) \[(.*?)\] ([(\d\.)]+) - (\w+) ([\w\.]*) (.+?) "(.*?)" (\d+) - (\d+) (\d+) (\d+) (\d+) "(.*?)" "(.*?)" -'

        with open(file) as f:
            line = f.readline()

        match = re.match(regex, line).groups()

        id = hashlib.sha224(line.encode('utf-8')).hexdigest()
        repository = match[1]
        date = match[2]
        ip = match[3]
        item = match[6]
        referer = match[13]
        agent = match[14]

        session.add(s3logs(id, repository, date, ip, item, referer, agent))
        session.commit()

        print("PutItem succeeded")
    except Exception as e:
        print(e)
        print("PutItem failed")
        session.rollback()


inputdir = sys.argv[1]

for file in os.listdir(inputdir):
    parse_file(inputdir + '/' + file)