#!/bin/bash

# Find all internal, cross-page markdown links and checks if the page exists.
# Must be run at the root of the site.

set -eu -o pipefail

ignored_web_urls="
/
"

function print_error {
    src_file="$1"
    web_url="$2"
    target_file="$3"
    msg="$4"

    echo
    echo "Error: $msg"
    echo "Source file: $src_file"
    echo "Web URL: $web_url"
    echo "Target file: $target_file"
}

# Checks if real file for MD-link input exists.
function check_link {
    src_file="$1"
    md_link="$2"
    name=$(grep -Po '(?<=^\[)[^\[\]]*(?=\])' <<<$md_link || true)
    web_url=$(grep -Po '(?<=\()[^\(\)]*(?=\)$)' <<<$md_link || true)
    target_file=$(sed 's|/$|.md|' <<<$web_url)

    # Ignore external (fully-qualified) URLs.
    if grep -P '^https?://' <<<$web_url >/dev/null; then
        return
    fi

    # Ignore if not ending with "/".
    if ! grep -P '/$' <<<$web_url >/dev/null; then
        return
    fi

    # Ignore special targets.
    if grep -Fx "$web_url" <<<$ignored_web_urls >/dev/null; then
        return
    fi

    # Show error if using relative path.
    if grep -P '/$' <<<$target_file >/dev/null; then
        print_error "$src_file" "$web_url" "$target_file" "Relative paths not allowed."
        return
    fi

    # Show error if file does not exist.
    if [[ ! -f $target_file ]]; then
        print_error "$src_file" "$web_url" "$target_file" "Target file does not exist."
        return
    fi
}

# Find all markdown pages and check.
for file in $(find "." -type f -name '*.md' | sed 's|^\./||' | LC_ALL=C sort -t. -k1,1); do
    # Extract MD-links and check them.
    md_link_regex='\[[^\[\]]*\]\([^\(\)]*\)'
    { grep -Po "$md_link_regex" "$file" || true; } | while read -r md_link; do
        check_link "$file" "$md_link"
    done
done