2017-06-09 00:05:45 +00:00
|
|
|
import re
|
2017-06-13 22:04:37 +00:00
|
|
|
import csv
|
2017-06-09 00:05:45 +00:00
|
|
|
|
|
|
|
failed_pattern = re.compile("^\*\*\*.*$")
|
|
|
|
null_author = re.compile("^<>$")
|
2017-06-11 00:36:37 +00:00
|
|
|
visier_prepended = re.compile("^VISIER\\\.*$")
|
2017-06-09 00:05:45 +00:00
|
|
|
full_name_no_email = re.compile("^([A-Z]\w*\s?)+$")
|
|
|
|
full_name_null_email = re.compile("^([A-Z]\w*\s?)+<>$")
|
2017-06-13 22:04:37 +00:00
|
|
|
full_name_with_email = re.compile("^([A-Z]\w*\s?)+<.+>$")
|
2017-06-09 00:05:45 +00:00
|
|
|
username_no_email = re.compile("^\w*$")
|
|
|
|
username_null_email = re.compile("^\w*\s?<>$")
|
2017-06-13 22:04:37 +00:00
|
|
|
username_with_email = re.compile("^\w*\s?<.+>$")
|
2017-06-09 00:05:45 +00:00
|
|
|
username_sqr_email = re.compile("^\w*\s?\[.*\]$")
|
|
|
|
username_rnd_name = re.compile("^\w*\s?\(.*\)$")
|
|
|
|
username_address = re.compile("^\w*@.*$")
|
|
|
|
any_any = re.compile("^.+<.*>$")
|
|
|
|
null_any = re.compile("^<.*>$")
|
|
|
|
any_email = re.compile("^.+\s\S+@\S+$")
|
|
|
|
any_null = re.compile("^.+$")
|
|
|
|
|
2017-06-13 22:04:37 +00:00
|
|
|
with open("users.csv", "r") as users_file:
|
|
|
|
users_reader = csv.reader(users_file, delimiter=',')
|
|
|
|
users = [{"name": user[0], "username": user[1], "email": user[2]} for user in users_reader]
|
2017-06-09 00:05:45 +00:00
|
|
|
|
|
|
|
def email_from_fullname(author):
|
2017-06-13 22:04:37 +00:00
|
|
|
return next((user["email"] for user in users if user["name"] == author), "")
|
2017-06-09 00:05:45 +00:00
|
|
|
|
|
|
|
def email_from_username(author):
|
2017-06-13 22:04:37 +00:00
|
|
|
return next((user["email"] for user in users if user["username"] == author), "")
|
2017-06-09 00:05:45 +00:00
|
|
|
|
|
|
|
def username_from_email(author):
|
2017-06-13 22:04:37 +00:00
|
|
|
return next((user["username"] for user in users if user["email"] == author), "")
|
2017-06-09 00:05:45 +00:00
|
|
|
|
|
|
|
def replace_author(author):
|
|
|
|
if failed_pattern.match(author):
|
|
|
|
return "nulluser <>"
|
|
|
|
if null_author.match(author):
|
|
|
|
return "nulluser " + author
|
|
|
|
if visier_prepended.match(author):
|
|
|
|
return replace_author(author[7:])
|
2017-06-11 00:36:37 +00:00
|
|
|
if full_name_with_email.match(author):
|
|
|
|
return author
|
2017-06-09 00:05:45 +00:00
|
|
|
if full_name_null_email.match(author):
|
|
|
|
fullname = author.strip()[:-2].strip()
|
|
|
|
return fullname + " <{}>".format(email_from_fullname(fullname))
|
2017-06-11 00:36:37 +00:00
|
|
|
if full_name_no_email.match(author):
|
|
|
|
return author.strip() + " <{}>".format(email_from_fullname(author.strip()))
|
|
|
|
if username_with_email.match(author):
|
2017-06-09 00:05:45 +00:00
|
|
|
return author
|
|
|
|
if username_null_email.match(author):
|
|
|
|
username = author.strip()[:-2].strip()
|
|
|
|
return username + " <{}>".format(email_from_username(username))
|
2017-06-11 00:36:37 +00:00
|
|
|
if username_no_email.match(author):
|
|
|
|
return author.strip() + " <{}>".format(email_from_username(author.strip()))
|
2017-06-09 00:05:45 +00:00
|
|
|
if username_sqr_email.match(author):
|
|
|
|
return author.replace("[", "<").replace("]", ">")
|
|
|
|
if username_rnd_name.match(author):
|
|
|
|
username = author.split("(")[0].strip()
|
2017-06-11 00:36:37 +00:00
|
|
|
return username + " <{}>".format(email_from_username(username))
|
2017-06-09 00:05:45 +00:00
|
|
|
if username_address.match(author):
|
|
|
|
username = author.split("@")[0]
|
|
|
|
return username + " <{}>".format(email_from_username(username))
|
|
|
|
if any_any.match(author):
|
|
|
|
return author
|
|
|
|
if null_any.match(author):
|
|
|
|
return username_from_email(author[1:-1]) + " " + author
|
|
|
|
if any_email.match(author):
|
2017-06-11 00:36:37 +00:00
|
|
|
bad_email = author.split(" ")[-1]
|
|
|
|
email = bad_email.strip().replace("<", "").replace(">", "")
|
|
|
|
return author.replace(bad_email, "<{}>".format(email))
|
2017-06-09 00:05:45 +00:00
|
|
|
if any_null.match(author):
|
|
|
|
return "nulluser <>"
|
|
|
|
|
2017-06-13 22:04:37 +00:00
|
|
|
with open("authors.txt", "r") as in_authors, open("reformatted-authors.txt", "w") as out_authors:
|
|
|
|
for author in in_authors:
|
|
|
|
out_authors.write("{0}={1}\n".format(author.strip(), replace_author(author).strip()))
|