From 6b28406dcaca4c52e6601612877ca1e2dbc9d99c Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Thu, 8 Aug 2024 11:35:22 +0300 Subject: Make load-with-metadata script to fetch metadata repository shallowly --- load/load-with-metadata.in | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/load/load-with-metadata.in b/load/load-with-metadata.in index a99709e..293f8db 100644 --- a/load/load-with-metadata.in +++ b/load/load-with-metadata.in @@ -93,12 +93,22 @@ fi load_commit="$repo_dir.load" touch "$load_commit" -# Pull the repository. +# Pull the repository shallowly. # if ! remote_url="$(git -C "$repo_dir" config --get remote.origin.url)"; then error "'$repo_dir' is not a git repository" fi +# Save the repository name and branch of where the latest commit has been +# fetched from, separated by space. For example 'origin master'. +# +refs=$(git -C "$repo_dir" log -1 --format=%D FETCH_HEAD) +repo_branch="$(sed -n -E 's%^.+ ([^/ ]+)/([^/ ]+)$%\1 \2%p' <<<"$refs")" + +if [[ -z "$repo_branch" ]]; then + error "unable to extract repository and branch from '$refs'" +fi + # Git doesn't support the connection timeout option. The options we use are # just an approximation of the former, that, in particular, don't cover the # connection establishing. To work around this problem, before running a git @@ -109,8 +119,12 @@ check_git_connectivity "$remote_url" "$timeout" # Fail if no network activity happens during the time specified. # +# Note: keep $repo_branch expansion unquoted. +# git -c http.lowSpeedLimit=1 -c "http.lowSpeedTime=$timeout" \ - -C "$repo_dir" pull -q + -C "$repo_dir" fetch -q --depth=1 $repo_branch + +git -C "$repo_dir" reset -q --hard FETCH_HEAD # Match the HEAD commit id to the one stored in the file. If it matches, then # nothing changed in the repository since it has been processed by brep-load -- cgit v1.1