Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor SVN commands #5

Merged
merged 1 commit into from
Feb 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bridge-repo-converter/build/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ services:
- ../repos-to-serve/:/repos-to-serve
environment:
- BRIDGE_REPO_CONVERTER_INTERVAL_MINUTES=1
- LOG_LEVEL=debug
- LOG_LEVEL=DEBUG # DEBUG INFO WARNING ERROR CRITICAL # Default is INFO
networks:
- sourcegraph

Expand Down
225 changes: 133 additions & 92 deletions bridge-repo-converter/build/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
# Python 3.12.1

### TODO:
# Author files
# Atlassian's Java binary to tidy up branches and tags
# Configure batch size
# Test layout tags and branches as lists / arrays
# Output status update on clone jobs
# Revision x of y completed, time taken, ETA for remaining revisions
# Check for clone completion, and log it
# Configure history start, etc.
# Git check if the repo already exists

### Notes
# See this migration guide https://www.atlassian.com/git/tutorials/migrating-convert
Expand All @@ -17,7 +19,14 @@
# authors file
# java -jar /sourcegraph/svn-migration-scripts.jar authors https://svn.apache.org/repos/asf/eagle > authors.txt
# Kinda useful, surprisingly fast
# git config svn.authorsfile
# git config svn.authorsfile # https://git-scm.com/docs/git-svn#Documentation/git-svn.txt---authors-fileltfilenamegt
# git config svn.authorsProg # https://git-scm.com/docs/git-svn#Documentation/git-svn.txt---authors-progltfilenamegt

# git gc

# git default branch for a bare repo git symbolic-ref HEAD refs/heads/trunk

# git list all config git -C $repo_path config --list

# clean-git
# java -Dfile.encoding=utf-8 -jar /sourcegraph/svn-migration-scripts.jar clean-git
Expand All @@ -27,6 +36,8 @@
# Corrupted repo

# Find a python library for manipulating git repos
# https://gitpython.readthedocs.io/en/stable/tutorial.html
# Couple CVEs: https://nvd.nist.gov/vuln/search/results?query=gitpython

# An example of doing the conversion in Python, not sure why when git svn exists
# https://sourcegraph.com/github.com/gabrys/svn2github/-/blob/svn2github.py
Expand Down Expand Up @@ -64,6 +75,30 @@ def fork_and_wait(args):
return forked_process.exitcode


def subprocess_run(args, password=False):

# Copy args to redact passwords for logging
args_without_password = args.copy()

if password:
args_without_password[args_without_password.index(password)] = "REDACTED"

try:

logging.debug(f"Starting subprocess: {' '.join(args_without_password)}")
result = subprocess.run(args, check=True, capture_output=True, text=True)

if result.returncode == 0:
logging.debug(f"Subprocess succeeded: {' '.join(args_without_password)} with output: {result.stdout}")

except subprocess.CalledProcessError as error:

logging.error(f"Subprocess failed: {' '.join(args_without_password)} with error: {error}")
result = False

return result


def parse_args(args_dict):

# Parse the command args
Expand All @@ -73,8 +108,9 @@ def parse_args(args_dict):
formatter_class = argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
"-d",
"--debug",
action = "store_true",
default = False,
help = "Quick flag to set --log-level DEBUG",
)
parser.add_argument(
Expand All @@ -90,7 +126,6 @@ def parse_args(args_dict):
parser.add_argument(
"--log-level",
choices =["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
default = "INFO",
help = "Log level",
)
parser.add_argument(
Expand All @@ -100,7 +135,7 @@ def parse_args(args_dict):
help = "Run without logging to stdout",
)
parser.add_argument(
"--repo-path",
"--repo-share-path",
default = "/repos-to-serve",
help = "Root of path to directory to store cloned Git repos",
)
Expand All @@ -109,11 +144,23 @@ def parse_args(args_dict):
# Store the parsed args in the args dictionary
args_dict["repos_to_convert_file"] = Path(parsed.repos_to_convert)
args_dict["log_file"] = Path(parsed.log_file)
args_dict["log_level"] = parsed.log_level
args_dict["quiet"] = parsed.quiet
args_dict["repo_path"] = parsed.repo_path
args_dict["repo_share_path"] = parsed.repo_share_path

# Set the log level, in order of ascending precedence
# Set the default, so this key isn't left empty
args_dict["log_level"] = "INFO"

# Override the default if defined in the OS environment variables
if os.environ.get('LOG_LEVEL'):
args_dict["log_level"] = os.environ.get('LOG_LEVEL')

# Override the default and OS environment variables if specified in --log-level arg
if parsed.log_level:
args_dict["log_level"] = parsed.log_level

if "d" in parsed:
# Override all if --debug provided
if parsed.debug:
args_dict["log_level"] = "DEBUG"


Expand Down Expand Up @@ -178,17 +225,14 @@ def parse_repos_to_convert_file_into_repos_dict(args_dict, repos_dict):

def clone_svn_repos(args_dict, repos_dict):

# Declare an empty dict for SVN repos to extract them from the repos_dict
svn_repos_dict = {}

# Loop through the repos_dict, find the type: SVN repos, then add them to the dict of SVN repos
for repo_key in repos_dict.keys():

repo_type = repos_dict[repo_key].get('type','').lower()

if repo_type == 'svn':

svn_clone_command = ["git","svn","clone"]
# Get config parameters read from repos-to-clone.yaml
svn_repo_code_root = repos_dict[repo_key].get('svn-repo-code-root','')
username = repos_dict[repo_key].get('username','')
password = repos_dict[repo_key].get('password','')
Expand All @@ -197,108 +241,105 @@ def clone_svn_repos(args_dict, repos_dict):
git_repo_name = repos_dict[repo_key].get('git-repo-name','')
git_default_branch = repos_dict[repo_key].get('git-default-branch','main')
authors_file_path = repos_dict[repo_key].get('authors-file-path','')
authors_prog_path = repos_dict[repo_key].get('authors-prog-path','')
git_ignore_file_path = repos_dict[repo_key].get('git-ignore-file-path','')
layout = repos_dict[repo_key].get('layout','')
layout = repos_dict[repo_key].get('layout','').lower()
trunk = repos_dict[repo_key].get('trunk','')
tags = repos_dict[repo_key].get('tags','')
branches = repos_dict[repo_key].get('branches','')

# Username and password may be required fields, test them by logging in
if username and password:

logging.info(f"Logging in to SVN repo {repo_key} with username {username}")
result = subprocess.run(["svn", "info", "--non-interactive", "--username", username, "--password", password, svn_repo_code_root])

logging.debug(f"Result of svn info login command: {result}")

if result.returncode == 0:
## Parse config parameters into command args
# TODO: Interpret code_host_name, git_org_name, and git_repo_name if not given
repo_path = str(args_dict["repo_share_path"]+"/"+code_host_name+"/"+git_org_name+"/"+git_repo_name)

## Define common command args
arg_svn_non_interactive = [ "--non-interactive" ] # Do not prompt, just fail if the command doesn't work, not supported by all commands
arg_svn_username = [ "--username", username ]
arg_svn_password = [ "--password", password ] # Only used for direct `svn` command
arg_svn_echo_password = [ "echo", password, "|" ] # Used for git svn commands
arg_svn_repo_code_root = [ svn_repo_code_root ]
arg_git_cfg = [ "git", "-C", repo_path, "config" ]
arg_git_svn = [ "git", "-C", repo_path, "svn" ]

## Define commands
cmd_svn_run_login = [ "svn", "info" ] + arg_svn_repo_code_root + arg_svn_non_interactive
cmd_git_cfg_default_branch = arg_git_cfg + [ "--global", "init.defaultBranch", git_default_branch ] # Possibility of collisions if multiple of these are run overlapping, make sure it's quick between reading and using this
cmd_git_run_svn_init = arg_git_svn + [ "init" ] + arg_svn_repo_code_root
cmd_git_cfg_bare_clone = arg_git_cfg + [ "core.bare", "true" ]
cmd_git_cfg_authors_file = arg_git_cfg + [ "svn.authorsfile", authors_file_path ]
cmd_git_cfg_authors_prog = arg_git_cfg + [ "svn.authorsProg", authors_prog_path ]
cmd_git_run_svn_fetch = arg_git_svn + [ "fetch" ]

## Modify commands based on config parameters
if username:
cmd_git_run_svn_init += arg_svn_username

logging.info(f"Logged in successfully to SVN repo {repo_key} with username {username}")
if password:
cmd_git_run_svn_init = arg_svn_echo_password + cmd_git_run_svn_init
cmd_git_run_svn_fetch = arg_svn_echo_password + cmd_git_run_svn_fetch

else:
if username and password:
cmd_svn_run_login += arg_svn_username + arg_svn_password

logging.warning(f"Failed to login to SVN repo {repo_key} with username {username}, skipping this repo")
continue
if layout:
cmd_git_run_svn_init += ["--stdlayout"]

# If username was provided
if username:
svn_clone_command.append(["--username", username])

# Need to find a way to handle the password and prevent interactive login
# # If password was provided
# if password:
# svn_clone_command.append(["--password", password])

# If layout was specified as standard
if layout == "standard":
svn_clone_command.append(["--stdlayout"])

# Otherwise, specify layout
else:
if trunk:
svn_clone_command += f" --trunk={trunk} "
if tags:
svn_clone_command += f" --tags={tags} "
for branch in branches:
svn_clone_command += f" --branches={branch} "

# If authors file was provided
if authors_file_path :
if os.path.exists(authors_file_path):
svn_clone_command += f" --authors-file={authors_file_path} "
else:
logging.warning(f"Authors file not found at {authors_file_path}, skipping")
# Warn the user if they provided an invalid value for the layout, only standard is supported
if "standard" not in layout and "std" not in layout:
logging.warning(f"Layout {layout} provided for repo {repo_key}, only standard is supported, continuing assuming standard")

# Add final parameters to the clone command
svn_clone_command += f" {svn_repo_code_root} "
svn_clone_command += f" {git_repo_name} "
if trunk:
cmd_git_run_svn_init += ["--trunk", trunk]
if tags:
cmd_git_run_svn_init += ["--tags", tags]
if branches:
cmd_git_run_svn_init += ["--branches", branches]

# Create the directories if they don't already exist
repo_path = str(args_dict["repo_path"]+"/"+code_host_name+"/"+git_org_name+"/"+git_repo_name)

# Check if specified path exists
## Run commands
# Create the repo path if it doesn't exist
if not os.path.exists(repo_path):

# If not, then create it
os.makedirs(repo_path)

# If default branch was provided, set it, otherwise set main
subprocess.run(["git", "config", "--global", "init.defaultBranch", git_default_branch])

#fork_and_wait(["git", "-C", repo_path, "svn", "init", svn_repo_code_root])
try:
subprocess.run(["git", "-C", repo_path, "svn", "init", svn_repo_code_root], check=True)
except subprocess.CalledProcessError:
pass
# Log in to the SVN server to test if credentials are needed / provided / valid
subprocess_run(cmd_svn_run_login, password)

# Configure the bare clone
subprocess.run(["git", "config", "--file", f"{repo_path}/.git/config", "--bool", "core.bare", "true"], check=True)

if git_ignore_file_path:
try:
logging.info(f"Copying gitignore file from {git_ignore_file_path} to {repo_path}")
#shutil.copy2(git_ignore_file_path, repo_path)
except FileNotFoundError:
logging.warning(f"Gitignore file not found at {git_ignore_file_path}")
# Set the default branch before init
subprocess_run(cmd_git_cfg_default_branch)

logging.info(f"Cloning SVN repo {repo_key}")
# Initialize the repo
subprocess_run(cmd_git_run_svn_init, password)

# Fork the process

result = subprocess.run(["git", "-C", repo_path, "svn", "fetch"], check=True, capture_output=True, text=True)
#fork_and_forget(["git", "-C", repo_path, "svn", "fetch"])
# Create the lockfile with the forked pid number

logging.debug(f"git svn fetch: {result}")
# Configure the bare clone
subprocess_run(cmd_git_cfg_bare_clone)

if result.returncode == 0:
# Configure the authors file, if provided
if authors_file_path:
if os.path.exists(authors_file_path):
subprocess_run(cmd_git_cfg_authors_file)
else:
logging.warning(f"Authors file not found at {authors_file_path}, skipping")

logging.info(f"git svn fetch succeeded for {repo_key}")
# Configure the authors program, if provided
if authors_prog_path:
if os.path.exists(authors_prog_path):
subprocess_run(cmd_git_cfg_authors_prog)
else:
logging.warning(f"Authors prog not found at {authors_prog_path}, skipping")

else:
# Configure the .gitignore file, if provided
if git_ignore_file_path:
if os.path.exists(git_ignore_file_path):
logging.debug(f"Copying .gitignore file from {git_ignore_file_path} to {repo_path}")
shutil.copy2(git_ignore_file_path, repo_path)
else:
logging.warning(f".gitignore file not found at {git_ignore_file_path}, skipping")

logging.warning(f"git svn fetch failed for {repo_key}")
# Run the svn_fetch_command
logging.info(f"Fetch SVN repo {repo_key}")
subprocess_run(cmd_git_run_svn_fetch, password)

# Create the lockfile with the forked pid number

def clone_tfs_repos(args_dict, repos_dict):

Expand Down
3 changes: 3 additions & 0 deletions bridge-repo-converter/build/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ then
BRIDGE_REPO_CONVERTER_INTERVAL_MINUTES=60
fi

# Write environment variables to where cron will use them
env >> /etc/environment

# Ensure the script is executable
chmod 744 /sourcegraph/run.py

Expand Down
27 changes: 4 additions & 23 deletions bridge-repo-converter/config/repos-to-convert.yaml
Original file line number Diff line number Diff line change
@@ -1,27 +1,8 @@
Apache Ant:
Apache Ambari:
type: SVN
svn-repo-code-root: https://svn.apache.org/repos/asf/ant/core
username: username
password: password
svn-repo-code-root: https://svn.apache.org/repos/asf/ambari/
code-host-name: svn.apache.org
git-org-name: apache
git-repo-name: ant
git-org-name: asf
git-repo-name: ambari
git-default-branch: main
authors-file-path: authors-file-path.txt
git-ignore-file-path: gitignore
layout: standard
trunk: trunk
tags: tags
branches:
- branches
TFVC Test Repo:
type: TFVC
tfvc-collection: https://dev.azure.com/marc-leblanc/
code-host-name: dev.azure.com
git-org-name: marc-leblanc
git-repo-name: tfvc-project-1
authors-file-path: authors-file-path.txt
git-ignore-file-path: gitignore
branches:
- $/tfvc-project-1/