1
0
Fork 0

Initial commit of generic migration files

This commit is contained in:
jochan 2017-06-08 17:05:45 -07:00
commit 28a7803f38
3 changed files with 142 additions and 0 deletions

74
authors.py Normal file
View File

@ -0,0 +1,74 @@
import re
failed_pattern = re.compile("^\*\*\*.*$")
null_author = re.compile("^<>$")
visier_prepended = re.compile("^VISIER\\.*$")
full_name_no_email = re.compile("^([A-Z]\w*\s?)+$")
full_name_null_email = re.compile("^([A-Z]\w*\s?)+<>$")
full_name_with_email = re.compile("^([A-Z]\w*\s?)+<.*>$")
username_no_email = re.compile("^\w*$")
username_null_email = re.compile("^\w*\s?<>$")
username_with_email = re.compile("^\w*\s?<.*>$")
username_sqr_email = re.compile("^\w*\s?\[.*\]$")
username_rnd_name = re.compile("^\w*\s?\(.*\)$")
username_address = re.compile("^\w*@.*$")
any_any = re.compile("^.+<.*>$")
null_any = re.compile("^<.*>$")
any_email = re.compile("^.+\s\S+@\S+$")
any_null = re.compile("^.+$")
def email_from_fullname(author):
return "email for " + author
def email_from_username(author):
return "email for " + author
def username_from_email(author):
return "dummy_username for " + author
def replace_author(author):
if failed_pattern.match(author):
return "nulluser <>"
if null_author.match(author):
return "nulluser " + author
if visier_prepended.match(author):
return replace_author(author[7:])
if full_name_no_email.match(author):
return author.strip() + " <{}>".format(email_from_fullname(author.strip()))
if full_name_null_email.match(author):
fullname = author.strip()[:-2].strip()
return fullname + " <{}>".format(email_from_fullname(fullname))
if full_name_with_email.match(author):
return author
if username_no_email.match(author):
return author.strip() + " <{}>".format(email_from_username(author.strip()))
if username_null_email.match(author):
username = author.strip()[:-2].strip()
return username + " <{}>".format(email_from_username(username))
if username_with_email.match(author):
return author
if username_sqr_email.match(author):
return author.replace("[", "<").replace("]", ">")
if username_rnd_name.match(author):
username = author.split("(")[0].strip()
return username + " " + email_from_username(username)
if username_address.match(author):
username = author.split("@")[0]
return username + " <{}>".format(email_from_username(username))
if any_any.match(author):
return author
if null_any.match(author):
return username_from_email(author[1:-1]) + " " + author
if any_email.match(author):
email = author.split(" ")[-1].strip()
return author.replace(email, "<{}>".format(email))
if any_null.match(author):
return "nulluser <>"
in_authors = open("authors.txt", "r")
out_authors = open("reformatted-authors.txt", "w")
for author in in_authors:
out_authors.write("{0}={1}\n".format(author.strip(), replace_author(author).strip()))
in_authors.close()
out_authors.close()

50
hg2git.cf Normal file
View File

@ -0,0 +1,50 @@
# PREP WORK
git clone --depth=1 --branch=master https://github.com/frej/fast-export.git <fast-export>
curl "http://repo1.maven.org/maven2/com/madgag/bfg/1.12.15/bfg-1.12.15.jar" -o <bfg-repo-cleaner>/bfg.jar
mkdir <target>
# TO CREATE A GIT REPO AT <target> FROM <source>
cd <source>
hg log | grep user: | sort | uniq | sed "s/user: *//" > <target>/authors.txt
cd <target>
python <authors>/authors.py
cp <fast-export>/* .
git init
git config core.ignoreCase false
sh hg-fast-export.sh -r <source> --force -A reformatted-authors.txt
# TO CLEAN UP LARGE FILES
git gc
java -jar <bfg-repo-cleaner>/bfg.jar --strip-blobs-bigger-than <size> <target>
git reflog expire --expire=now --all
git gc --prune=now --aggressive
# TO DELETE CLOSED BRANCHES
cd <source>
hg heads --closed --template "{branch}\n" | tr " " "_" | sort > <target>/all.log
hg heads --template "{branch}\n" | tr " " "_" | sort > <target>/open.log
cd <target>
comm -2 -3 all.log open.log > closed.log
for branch in `cat closed.log`; do git tag "closed/$branch" $branch; git branch -df $branch; done
# CLEAN-UP WORK
git checkout master
git clean -df
* create .gitignore (do before committing anything):
* `find -name ".hgignore"` to find all .hgignore files
* rename all .hgignore to .gitignore
* rewrite anything under "syntax: regexp" to glob form
* remove "syntax: {glob|regexp}" lines
# OTHER TASKS
* cloning all branches:
git clone --mirror <target> <target-clone>/.git
cd <target-clone>
git config --local --bool core.bare false
git checkout master
* cloning a specific <branch>:
git clone -b <branch> --single-branch <target> <target-clone>
# NOTES
* use `git pull --rebase` instead of merging local commits to avoid excessive merges
* merges should typically only happen when merging a distinct branch with master

18
pattern-author.txt Normal file
View File

@ -0,0 +1,18 @@
format matches replace
------ ------- -------
*** failed to import... ^\*\*\*.*$ "nulluser <>"
<> ^<>$ prepend with "nulluser "
Full Name ^([A-Z]\w*\s?)+$ append with <email>
Full Name <> ^([A-Z]\w*\s?)+<>$ <> with <email>
Full Name <name@email.com> ^([A-Z]\w*\s?)+<.*>$ do nothing
VISIER\username <name@email.com> ^VISIER\\.*$ drop 7 and recurse
username ^\w*$ append with <email>
username <> ^\w*\s?<>$ <> with <email>
username <name@email.com> ^\w*\s?<.*>$ do nothing
username [name@email.com] ^\w*\s?\[.*\]$ [ with < and ] with >
username (Full Name) ^\w*\s?\(.*\)$ \s?\(.*\) with \s<email>
username@address.com ^\w*@.*$ @.* with \s<email>
anything <anything> ^.+<.*>$ do nothing
<anything> ^<.*>$ prepend with user
anything name@email.com ^.+\s\S+@\S+$ wrap email with <>
anything ^.+$ "nulluser <>"