1
0
Fork 0
hg2git/authors.py

89 lines
4.2 KiB
Python
Raw Normal View History

import re
import csv
import os
failed_pattern = re.compile("^\*\*\*.*$")
null_author = re.compile("^<>$")
full_name_no_email = re.compile("^([A-Z][\w\-]*\s?)+$")
full_name_null_email = re.compile("^([A-Z][\w\-]*\s?)+<>$")
full_name_with_email = re.compile("^([A-Z][\w\-]*\s?)+<.+>$")
username_no_email = re.compile("^[\w\-]*$")
username_null_email = re.compile("^[\w\-]*\s?<>$")
username_with_email = re.compile("^[\w\-]*\s?<.+>$")
username_sqr_email = re.compile("^[\w\-]*\s?\[.*\]$")
username_rnd_name = re.compile("^[\w\-]*\s?\(.*\)$")
username_address = re.compile("^[\w\-]*@.*$")
first_last_name = re.compile("^[\w\-]*\.[\w\-]*$")
any_any = re.compile("^.+<.*>$")
null_any = re.compile("^<.*>$")
any_email = re.compile("^.+\s\S+@\S+$")
any_null = re.compile("^.+$")
with open(os.path.dirname(os.path.abspath(__file__)) + "/users.csv", "r") as users_file:
users_reader = csv.reader(users_file, delimiter=',')
users = [{"name": user[0], "username": user[1], "email": user[2]} for user in users_reader]
def email_from_fullname(author):
return next((user["email"] for user in users if user["name"].lower() == author.lower()), "")
def email_from_username(author):
return next((user["email"] for user in users if user["username"].lower() == author.lower()), "")
def username_from_email(author):
return next((user["username"] for user in users if user["email"].lower() == author.lower()), "")
def username_from_fullname(author):
return next((user["username"] for user in users if user["name"].lower() == author.lower()), "")
def username_from_firstname_lastname(author):
return next((user["username"] for user in users if user["name"].lower() == author.replace(".", " ").lower()), "")
def replace_author(author):
if failed_pattern.match(author):
return "nulluser <>"
if null_author.match(author):
return "nulluser " + author
if full_name_with_email.match(author):
fullname = author.split("<")[0].strip()
email = author.split("<")[1].split(">")[0].strip()
username = username_from_email(email)
return (username if username else fullname) + " <{}>".format(email)
if full_name_null_email.match(author):
fullname = author.strip()[:-2].strip()
return username_from_fullname(fullname) + " <{}>".format(email_from_fullname(fullname))
if full_name_no_email.match(author):
username = username_from_fullname(author.strip())
return (username if username else author.strip()) + " <{}>".format(email_from_fullname(author.strip()))
if username_with_email.match(author):
return author
if username_null_email.match(author):
username = author.strip()[:-2].strip()
return username + " <{}>".format(email_from_username(username))
if username_no_email.match(author):
return author.strip() + " <{}>".format(email_from_username(author.strip()))
if username_sqr_email.match(author):
return author.replace("[", "<").replace("]", ">")
if username_rnd_name.match(author):
username = author.split("(")[0].strip()
return username + " <{}>".format(email_from_username(username))
if username_address.match(author):
username = author.split("@")[0]
return username + " <{}>".format(email_from_username(username))
if first_last_name.match(author):
username = username_from_firstname_lastname(author)
return (username if username else author.strip()) + " <{}>".format(email_from_username(username))
if any_any.match(author):
return author
if null_any.match(author):
return username_from_email(author[1:-1]) + " " + author
if any_email.match(author):
bad_email = author.split(" ")[-1]
email = bad_email.strip().replace("<", "").replace(">", "")
return author.replace(bad_email, "<{}>".format(email))
if any_null.match(author):
return "nulluser <>"
with open("authors.txt", "r") as in_authors, open("reformatted-authors.txt", "w") as out_authors:
for author in in_authors:
out_authors.write("\"{0}\"=\"{1}\"\n".format(author.strip(), replace_author(author).strip()))