stats.sh: speed up stats extraction

This commit is contained in:
XhmikosR
2025-11-22 08:26:52 +02:00
parent 21ce4c0266
commit 9257ce1e43

View File

@@ -1,13 +1,69 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# clear file # shellcheck enable=require-variable-braces
true > stats.out
for TAG_DATE in $(git tag --sort=creatordate --format='%(refname:short),%(creatordate:short)'); do set -euo pipefail
# echo "$TAG_DATE"
split=(${TAG_DATE//,/ }) # Ensure required tools are installed
# echo ${split[0]} for tool in git jq; do
entries=$(git show tags/${split[0]}:readmeData.json | jq '.base.entries') if ! command -v "${tool}" > /dev/null 2>&1; then
if [[ -z "$entries" ]]; then continue; fi echo "Error: ${tool} not found in PATH" >&2
echo ${split[1]},${entries} >> stats.out exit 1
fi
done
# Clear output file
: > stats.out
# Collect "tag:readmeData.json date" pairs for all tags
mapfile -t taglist < <(
git for-each-ref --sort=creatordate \
--format='%(refname:short) %(creatordate:short)' refs/tags |
awk '{print $1 ":readmeData.json " $2}'
)
# Feed all object names into one cat-file process
exec 3< <(printf '%s\n' "${taglist[@]}" | cut -d' ' -f1 | git cat-file --batch)
for line in "${taglist[@]}"; do
# Extract the date field by removing everything up to the first space
date=${line#* }
# Read one header line from fd 3
if ! read -r -a header_fields <&3; then
break
fi
# Skip if header is incomplete
if ((${#header_fields[@]} < 3)); then
continue
fi
size=${header_fields[2]}
# Validate that size is numeric
if [[ ! "${size}" =~ ^[0-9]+$ ]]; then
continue
fi
# Read exactly ${size} bytes of blob content
IFS= read -r -N "${size}" blob <&3
# Consume the newline that follows the blob
read -r _ <&3 || true
# Stream parse JSON only if blob is non-empty
if [[ -z "${blob}" ]]; then
continue
fi
# Stream parse JSON with jq in "raw input" mode to avoid subshell overhead
jq -nr --arg date "${date}" --argjson blob "${blob}" '
$blob.base.entries // empty
| if type=="array" then
.[] | "\($date),\(.)"
else
"\($date),\(.)"
end
' >> stats.out 2> /dev/null || true
done done