1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768 |
- #!/bin/bash
- # Find all internal, cross-page markdown links and checks if the page exists.
- # Must be run at the root of the site.
- set -eu -o pipefail
- ignored_web_urls="
- /
- "
- function print_error {
- src_file="$1"
- web_url="$2"
- target_file="$3"
- msg="$4"
- echo
- echo "Error: $msg"
- echo "Source file: $src_file"
- echo "Web URL: $web_url"
- echo "Target file: $target_file"
- }
- # Checks if real file for MD-link input exists.
- function check_link {
- src_file="$1"
- md_link="$2"
- name=$(grep -Po '(?<=^\[)[^\[\]]*(?=\])' <<<$md_link || true)
- web_url=$(grep -Po '(?<=\()[^\(\)]*(?=\)$)' <<<$md_link || true)
- target_file=$(sed 's|/$|.md|' <<<$web_url)
- # Ignore external (fully-qualified) URLs.
- if grep -P '^https?://' <<<$web_url >/dev/null; then
- return
- fi
- # Ignore if not ending with "/".
- if ! grep -P '/$' <<<$web_url >/dev/null; then
- return
- fi
- # Ignore special targets.
- if grep -Fx "$web_url" <<<$ignored_web_urls >/dev/null; then
- return
- fi
- # Show error if using relative path.
- if grep -P '/$' <<<$target_file >/dev/null; then
- print_error "$src_file" "$web_url" "$target_file" "Relative paths not allowed."
- return
- fi
- # Show error if file does not exist.
- if [[ ! -f $target_file ]]; then
- print_error "$src_file" "$web_url" "$target_file" "Target file does not exist."
- return
- fi
- }
- # Find all markdown pages and check.
- for file in $(find "." -type f -name '*.md' | sed 's|^\./||' | LC_ALL=C sort -t. -k1,1); do
- # Extract MD-links and check them.
- md_link_regex='\[[^\[\]]*\]\([^\(\)]*\)'
- { grep -Po "$md_link_regex" "$file" || true; } | while read -r md_link; do
- check_link "$file" "$md_link"
- done
- done
|