diff options
Diffstat (limited to 'scripts')
| -rwxr-xr-x | scripts/fetch-tlds.sh | 816 |
1 files changed, 816 insertions, 0 deletions
diff --git a/scripts/fetch-tlds.sh b/scripts/fetch-tlds.sh new file mode 100755 index 0000000..0892f42 --- /dev/null +++ b/scripts/fetch-tlds.sh @@ -0,0 +1,816 @@ +#!/usr/bin/env bash +# fetch-tlds.sh — Scrape purchasable TLD lists from registrar APIs +# Outputs clean sorted lists for use in Lists.toml +# +# Usage: +# ./scripts/fetch-tlds.sh # fetch all, show summary +# ./scripts/fetch-tlds.sh porkbun # porkbun only +# ./scripts/fetch-tlds.sh inwx # inwx only +# ./scripts/fetch-tlds.sh --raw # output raw TLD lists (one per line) +# ./scripts/fetch-tlds.sh --toml # output TOML-ready arrays +# ./scripts/fetch-tlds.sh --diff # compare against current Lists.toml +# ./scripts/fetch-tlds.sh --template # generate full Lists.toml with whois overrides if necessary +# +# Notes : yea this is ai slop, didnt make it myself oooo scary, but most of the rust i did myself just didnt feel like doing this at 4am and it somewhat works + + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_DIR="$(dirname "$SCRIPT_DIR")" +LISTS_TOML="$PROJECT_DIR/Lists.toml" +CACHE_DIR="$PROJECT_DIR/.tld-cache" +mkdir -p "$CACHE_DIR" + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +CYAN='\033[0;36m' +BOLD='\033[1m' +NC='\033[0m' + +# ─── Porkbun ──────────────────────────────────────────────────────────────── +fetch_porkbun() { + local cache="$CACHE_DIR/porkbun.json" + local max_age=86400 # 24h cache + + if [[ -f "$cache" ]]; then + local age=$(( $(date +%s) - $(stat -f%m "$cache" 2>/dev/null || stat -c%Y "$cache" 2>/dev/null || echo 0) )) + if (( age < max_age )); then + echo "$cache" + return 0 + fi + fi + + echo -e "${CYAN}Fetching Porkbun pricing API...${NC}" >&2 + # abusing porkbun public no money needed ah endpoint is no API key is even needed + if curl -sf -X POST "https://api.porkbun.com/api/json/v3/pricing/get" \ + -H "Content-Type: application/json" \ + -d '{}' \ + -o "$cache" 2>/dev/null; then + echo "$cache" + else + echo -e "${RED}Failed to fetch Porkbun data${NC}" >&2 + return 1 + fi +} + +parse_porkbun() { + local json_file="$1" + # Extract TLD keys from the pricing response + # Response format: {"status":"SUCCESS","pricing":{"com":{...},"net":{...},...}} + if command -v jq &>/dev/null; then + jq -r '.pricing // {} | keys[]' "$json_file" 2>/dev/null | sort -u + else + # Fallback: grep for TLD keys (less reliable but works) + grep -o '"[a-z][a-z0-9.-]*":{' "$json_file" | sed 's/"//g; s/:{//' | sort -u + fi +} + +# ─── INWX ─────────────────────────────────────────────────────────────────── +fetch_inwx() { + local cache="$CACHE_DIR/inwx.html" + local max_age=86400 + + if [[ -f "$cache" ]]; then + local age=$(( $(date +%s) - $(stat -f%m "$cache" 2>/dev/null || stat -c%Y "$cache" 2>/dev/null || echo 0) )) + if (( age < max_age )); then + echo "$cache" + return 0 + fi + fi + + echo -e "${CYAN}Fetching INWX domain list...${NC}" >&2 + # INWX domain check page has TLD list embedded as JSON + if curl -sfL "https://www.inwx.de/en/domain/check" \ + -H "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)" \ + -o "$cache" 2>/dev/null; then + echo "$cache" + else + echo -e "${YELLOW}Could not fetch INWX${NC}" >&2 + return 1 + fi +} + +parse_inwx() { + local html_file="$1" + # TLDs are embedded as JSON objects with "tld":"xxx" in the page + grep -oE '"tld":"[a-z]{2,20}"' "$html_file" | sed 's/"tld":"//;s/"//' | sort -u +} + +# ─── OVH ──────────────────────────────────────────────────────────────────── +fetch_ovh() { + local cache="$CACHE_DIR/ovh.json" + local max_age=86400 + + if [[ -f "$cache" ]]; then + local age=$(( $(date +%s) - $(stat -f%m "$cache" 2>/dev/null || stat -c%Y "$cache" 2>/dev/null || echo 0) )) + if (( age < max_age )); then + echo "$cache" + return 0 + fi + fi + + echo -e "${CYAN}Fetching OVH domain extensions...${NC}" >&2 + if curl -sf "https://www.ovh.com/engine/apiv6/domain/extensions" \ + -H "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)" \ + -o "$cache" 2>/dev/null; then + echo "$cache" + else + echo -e "${YELLOW}Could not fetch OVH extensions${NC}" >&2 + return 1 + fi +} + +parse_ovh() { + local json_file="$1" + if command -v jq &>/dev/null; then + # Only top-level TLDs (no dots = not sub-TLDs like com.au) + jq -r '.[]' "$json_file" 2>/dev/null | grep -vE '\.' | sort -u + else + grep -oE '"[a-z]{2,20}"' "$json_file" | tr -d '"' | grep -vE '\.' | sort -u + fi +} + +# ─── tld-list.com (comprehensive registry, free basic list) ──────────────── +fetch_tldlist() { + local cache="$CACHE_DIR/tldlist-basic.txt" + local max_age=86400 + + if [[ -f "$cache" ]]; then + local age=$(( $(date +%s) - $(stat -f%m "$cache" 2>/dev/null || stat -c%Y "$cache" 2>/dev/null || echo 0) )) + if (( age < max_age )); then + echo "$cache" + return 0 + fi + fi + + echo -e "${CYAN}Fetching tld-list.com basic list...${NC}" >&2 + if curl -sf "https://tld-list.com/df/tld-list-basic.csv" \ + -H "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)" \ + -o "$cache" 2>/dev/null; then + echo "$cache" + else + echo -e "${YELLOW}Could not fetch tld-list.com${NC}" >&2 + return 1 + fi +} + +parse_tldlist() { + local file="$1" + # One TLD per line, CR/LF endings, includes IDN entries — filter to ASCII only + tr -d '\r' < "$file" | grep -E '^[a-z][a-z0-9]*$' | sort -u +} + +# ─── IANA root zone (fallback) ────────────────────────────────────────────── +fetch_iana() { + local cache="$CACHE_DIR/iana-tlds.txt" + local max_age=604800 # 7 days + + if [[ -f "$cache" ]]; then + local age=$(( $(date +%s) - $(stat -f%m "$cache" 2>/dev/null || stat -c%Y "$cache" 2>/dev/null || echo 0) )) + if (( age < max_age )); then + echo "$cache" + return 0 + fi + fi + + echo -e "${CYAN}Fetching IANA TLD list...${NC}" >&2 + if curl -sf "https://data.iana.org/TLD/tlds-alpha-by-domain.txt" -o "$cache" 2>/dev/null; then + echo "$cache" + else + echo -e "${RED}Failed to fetch IANA list${NC}" >&2 + return 1 + fi +} + +parse_iana() { + local file="$1" + # Skip header line, lowercase everything, filter to 2-3 char ccTLDs + tail -n +2 "$file" | tr '[:upper:]' '[:lower:]' | sort -u +} + +parse_iana_cctlds() { + local file="$1" + tail -n +2 "$file" | tr '[:upper:]' '[:lower:]' | grep -E '^[a-z]{2}$' | sort -u +} + +# ─── RDAP bootstrap (what actually has lookup servers) ────────────────────── +fetch_rdap() { + local cache="$CACHE_DIR/rdap-dns.json" + local max_age=86400 + + if [[ -f "$cache" ]]; then + local age=$(( $(date +%s) - $(stat -f%m "$cache" 2>/dev/null || stat -c%Y "$cache" 2>/dev/null || echo 0) )) + if (( age < max_age )); then + echo "$cache" + return 0 + fi + fi + + echo -e "${CYAN}Fetching RDAP bootstrap...${NC}" >&2 + if curl -sf "https://data.iana.org/rdap/dns.json" -o "$cache" 2>/dev/null; then + echo "$cache" + else + echo -e "${RED}Failed to fetch RDAP bootstrap${NC}" >&2 + return 1 + fi +} + +parse_rdap_tlds() { + local json_file="$1" + if command -v jq &>/dev/null; then + jq -r '.services[][] | .[]' "$json_file" 2>/dev/null | grep -v '^http' | tr '[:upper:]' '[:lower:]' | sort -u + else + grep -oE '"[a-z]{2,20}"' "$json_file" | tr -d '"' | sort -u + fi +} + +# ─── WHOIS server list (rfc1036/whois project) ───────────────────────────── +fetch_whois_servers() { + local cache="$CACHE_DIR/tld_serv_list.txt" + local max_age=604800 # 7 days + + if [[ -f "$cache" ]]; then + local age=$(( $(date +%s) - $(stat -f%m "$cache" 2>/dev/null || stat -c%Y "$cache" 2>/dev/null || echo 0) )) + if (( age < max_age )); then + echo "$cache" + return 0 + fi + fi + + echo -e "${CYAN}Fetching WHOIS server list...${NC}" >&2 + if curl -sf "https://raw.githubusercontent.com/rfc1036/whois/next/tld_serv_list" -o "$cache" 2>/dev/null; then + echo "$cache" + else + echo -e "${YELLOW}Could not fetch WHOIS server list${NC}" >&2 + return 1 + fi +} + +# Get the WHOIS server for a given TLD from the cached server list +# Returns empty string if no server found or server is NONE/ARPA/etc +get_whois_server() { + local tld="$1" + local serv_file="$2" + # Format: .tld [optional-tag] server + # Some entries have VERISIGN or similar tag before the server + local line + line=$(grep -E "^\\.${tld}[[:space:]]" "$serv_file" 2>/dev/null | head -1) + if [[ -z "$line" ]]; then + echo "" + return + fi + # Extract server: last word on the line that looks like a hostname + local server + server=$(echo "$line" | awk '{ + for (i=NF; i>=2; i--) { + if ($i ~ /^[a-z0-9].*\.[a-z]/) { print $i; exit } + } + }') + # Filter out unusable entries + if [[ "$server" == "NONE" || "$server" == "ARPA" || -z "$server" || "$server" == http* ]]; then + echo "" + else + echo "$server" + fi +} + +# Get WHOIS server from IANA directly (slower, single TLD at a time) +get_iana_whois_server() { + local tld="$1" + curl -s "https://www.iana.org/domains/root/db/${tld}.html" 2>/dev/null \ + | sed -n 's/.*WHOIS Server:<\/b> *\([^ <]*\).*/\1/p' \ + | head -1 +} + +# ─── Extract current Lists.toml entries ───────────────────────────────────── +parse_current_lists() { + local list_name="${1:-all}" + if [[ ! -f "$LISTS_TOML" ]]; then + echo -e "${RED}No Lists.toml found at $LISTS_TOML${NC}" >&2 + return 1 + fi + # Extract TLDs from a named list, stripping quotes, colons (whois overrides), commas + awk -v list="$list_name" ' + $0 ~ "^"list" *= *\\[" { found=1; next } + found && /^\]/ { exit } + found && /^[[:space:]]*\[/ { exit } + found { + gsub(/["\t,]/, " ") + n = split($0, parts, " ") + for (i=1; i<=n; i++) { + if (parts[i] != "") { + # Strip whois override suffix + sub(/:.*/, "", parts[i]) + print parts[i] + } + } + } + ' "$LISTS_TOML" | sort -u +} + +# ─── Helpers ──────────────────────────────────────────────────────────────── +to_toml_array() { + # Reads TLDs from stdin, outputs TOML array format (wrapped at ~80 chars) + local tlds=() + while IFS= read -r tld; do + [[ -z "$tld" ]] && continue + tlds+=("$tld") + done + local line='\t' + local first=true + for tld in "${tlds[@]}"; do + local entry="\"$tld\"" + if $first; then + line+="$entry" + first=false + else + local test_line="$line, $entry" + if (( ${#test_line} > 78 )); then + echo -e "$line," + line="\t$entry" + else + line+=", $entry" + fi + fi + done + [[ -n "$line" ]] && echo -e "$line," +} + +filter_cctlds() { + grep -E '^[a-z]{2}$' +} + +filter_short_tlds() { + # 2-6 char TLDs that are useful for domain hacking + grep -E '^[a-z]{2,6}$' +} + +# ─── Known broken/unregistrable TLDs ──────────────────────────────────────── +SKIP_TLDS="bl bq eh mf gb bv sj kp hm" + +filter_skip() { + local skip_pattern + skip_pattern=$(echo "$SKIP_TLDS" | tr ' ' '|') + grep -vE "^($skip_pattern)$" +} + +# ─── Template generation ──────────────────────────────────────────────────── +# Generates a full Lists.toml with: +# - "tld" for TLDs with RDAP support (direct lookup works) +# - "tld:whois.server" for TLDs needing WHOIS fallback +# - skip TLDs omitted entirely (no Patch.toml needed) +# +# Uses: Porkbun + OVH + INWX (purchasable), RDAP bootstrap (has server?), WHOIS server list +# With --all-sources: also cross-references tld-list.com +generate_template() { + local all_registrar_tlds="$1" + local rdap_tlds="$2" + local source_summary="$3" + + # Fetch WHOIS server list for fallback + local whois_serv_file="" + if whois_serv_file=$(fetch_whois_servers 2>/dev/null); then + true # got it + fi + + # The input is already merged + filtered from all registrar sources + local buyable_tlds + buyable_tlds=$(echo "$all_registrar_tlds" | filter_skip | sort -u) + + local buyable_count + buyable_count=$(echo "$buyable_tlds" | grep -c . || echo 0) + + # Build annotated TLD list: "tld" or "tld:whois_server" + # A TLD needs a whois override if it's NOT in the RDAP bootstrap + local annotated_all=() + local annotated_cc=() + local rdap_hit=0 whois_hit=0 bare_hit=0 + + while IFS= read -r tld; do + [[ -z "$tld" ]] && continue + local entry="" + if echo "$rdap_tlds" | grep -qx "$tld" 2>/dev/null; then + # Has RDAP server — no override needed + entry="$tld" + ((rdap_hit++)) || true + else + # No RDAP — try to find WHOIS server + local server="" + if [[ -n "$whois_serv_file" ]]; then + server=$(get_whois_server "$tld" "$whois_serv_file") + fi + if [[ -n "$server" ]]; then + entry="${tld}:${server}" + ((whois_hit++)) || true + else + # No known server — include bare, hoardom will try common patterns + entry="$tld" + ((bare_hit++)) || true + fi + fi + annotated_all+=("$entry") + # Also track ccTLDs (2-letter entries) + local base_tld="${tld%%:*}" + if [[ "$base_tld" =~ ^[a-z]{2}$ ]]; then + annotated_cc+=("$entry") + fi + done <<< "$buyable_tlds" + + echo -e "${CYAN}Building template...${NC}" >&2 + echo -e " ${GREEN}${rdap_hit}${NC} TLDs with RDAP (direct lookup)" >&2 + echo -e " ${YELLOW}${whois_hit}${NC} TLDs with WHOIS override" >&2 + echo -e " ${RED}${bare_hit}${NC} TLDs with no known server (will probe)" >&2 + echo "" >&2 + + # ── Curated lists (bare TLD names, annotated automatically) ───────── + + # Standard: com, net, org + generally desirable / well-known TLDs + local standard_tlds=( + "com" "net" "org" "io" "co" "dev" "app" "me" "info" + "biz" "one" "xyz" "online" "site" "tech" "pro" "tv" + "cc" "to" "sh" "li" "fm" "am" "gg" "ws" "la" + "ms" "nu" "cx" "mn" "st" "tel" "ai" "id" "in" + "it" "is" "at" "be" "de" "eu" "fr" "nl" "se" + "uk" "us" "ca" "au" "nz" "club" "blog" "art" "fun" + "lol" "wtf" "page" "link" "space" "store" "shop" + ) + + # Decent: the best of the best — com, net, org, io + short desirable ones + # that work great for domain hacking and are punchy + local decent_tlds=( + "com" "net" "org" "io" "dev" "app" "co" "me" + "ai" "sh" "to" "fm" "tv" "gg" "cc" "li" "am" + "la" "nu" "id" "in" "it" "is" "at" "ws" + "one" "pro" "bio" "art" "ink" "run" "win" "new" + "lol" "pub" "fun" "vet" "fit" "rip" "wtf" "zip" + ) + + # Swiss: standard-like but with Swiss / Central European related TLDs up front + local swiss_tlds=( + "com" "net" "org" "ch" "li" "swiss" "zuerich" + "io" "co" "dev" "app" "me" "info" "one" "pro" + "de" "at" "fr" "it" "eu" + "tech" "online" "site" "shop" "store" + "biz" "xyz" "tv" "cc" "to" "sh" "fm" "am" "gg" + ) + + # Annotate curated lists with whois overrides where needed + annotate_list() { + local -n input_list=$1 + local result=() + for bare_tld in "${input_list[@]}"; do + local found=false + for ann in "${annotated_all[@]}"; do + local ann_base="${ann%%:*}" + if [[ "$ann_base" == "$bare_tld" ]]; then + result+=("$ann") + found=true + break + fi + done + if ! $found; then + result+=("$bare_tld") + fi + done + printf '%s\n' "${result[@]}" + } + + # Length-based filtered lists from annotated_all + filter_annotated_by_length() { + local min="$1" + local max="$2" + for ann in "${annotated_all[@]}"; do + local base="${ann%%:*}" + local len=${#base} + if (( len >= min && len <= max )); then + echo "$ann" + fi + done + } + + # ─── Output ───────────────────────────────────────────────────────── + local date_str + date_str=$(date +%Y-%m-%d) + + cat <<HEADER +# Lists.toml — Built-in TLD lists for hoardom +# Auto-generated on ${date_str} from ${source_summary} +# +# Format: +# "tld" — TLD has RDAP support, lookup works directly +# "tld:whois.server" — No RDAP: use this WHOIS server for fallback +# +# ${buyable_count} purchasable TLDs (handshake/sub-TLDs excluded) +# ${rdap_hit} have RDAP, ${whois_hit} need WHOIS override, ${bare_hit} will auto-probe +# +# Lists: +# standard — common desirable TLDs (com, net, org, io, dev, ...) +# decent — very best short punchy TLDs for domain hacking +# swiss — standard-like but with Swiss/Central European TLDs prioritized +# country — all 2-letter country-code TLDs +# two — all 2-letter TLDs +# three — all TLDs with 3 or fewer letters +# four — all TLDs with exactly 4 letters +# long — all TLDs with 5+ letters +# all — everything + +HEADER + + echo "standard = [" + annotate_list standard_tlds | to_toml_array + echo "]" + echo "" + + echo "decent = [" + annotate_list decent_tlds | to_toml_array + echo "]" + echo "" + + echo "swiss = [" + annotate_list swiss_tlds | to_toml_array + echo "]" + echo "" + + echo "country = [" + printf '%s\n' "${annotated_cc[@]}" | to_toml_array + echo "]" + echo "" + + echo "two = [" + filter_annotated_by_length 2 2 | to_toml_array + echo "]" + echo "" + + echo "three = [" + filter_annotated_by_length 2 3 | to_toml_array + echo "]" + echo "" + + echo "four = [" + filter_annotated_by_length 4 4 | to_toml_array + echo "]" + echo "" + + echo "long = [" + filter_annotated_by_length 5 99 | to_toml_array + echo "]" + echo "" + + echo "all = [" + printf '%s\n' "${annotated_all[@]}" | to_toml_array + echo "]" +} + +# ─── Main ─────────────────────────────────────────────────────────────────── +main() { + local mode="summary" + local source="all" + local all_sources=false + + for arg in "$@"; do + case "$arg" in + --raw) mode="raw" ;; + --toml) mode="toml" ;; + --diff) mode="diff" ;; + --template) mode="template" ;; + --all-sources) all_sources=true ;; + porkbun) source="porkbun" ;; + inwx) source="inwx" ;; + ovh) source="ovh" ;; + iana) source="iana" ;; + rdap) source="rdap" ;; + tldlist) source="tldlist" ;; + --help|-h) + echo "Usage: $0 [source] [--raw|--toml|--diff|--template] [--all-sources]" + echo "" + echo "Sources: porkbun, ovh, inwx, iana, rdap, tldlist" + echo "" + echo "Flags:" + echo " --raw Output raw TLD list (one per line)" + echo " --toml Output TOML-ready arrays" + echo " --diff Compare against current Lists.toml" + echo " --template Generate full Lists.toml with whois overrides" + echo " --all-sources Include tld-list.com for extra coverage (used as" + echo " a filter: only TLDs also in a registrar are kept)" + exit 0 ;; + esac + done + + local porkbun_tlds="" inwx_tlds="" ovh_tlds="" iana_tlds="" rdap_tlds="" tldlist_tlds="" + local porkbun_count=0 inwx_count=0 ovh_count=0 iana_count=0 rdap_count=0 tldlist_count=0 + + # Template mode needs all registrar sources + rdap regardless of source filter + if [[ "$mode" == "template" ]]; then + source="all" + fi + + # ── Fetch from selected sources ── + + if [[ "$source" == "all" || "$source" == "porkbun" ]]; then + if porkbun_file=$(fetch_porkbun); then + porkbun_tlds=$(parse_porkbun "$porkbun_file") + porkbun_count=$(echo "$porkbun_tlds" | grep -c . || true) + fi + fi + + if [[ "$source" == "all" || "$source" == "ovh" ]]; then + if ovh_file=$(fetch_ovh); then + ovh_tlds=$(parse_ovh "$ovh_file") + ovh_count=$(echo "$ovh_tlds" | grep -c . || true) + fi + fi + + if [[ "$source" == "all" || "$source" == "inwx" ]]; then + if inwx_file=$(fetch_inwx 2>/dev/null); then + inwx_tlds=$(parse_inwx "$inwx_file") + inwx_count=$(echo "$inwx_tlds" | grep -c . || true) + fi + fi + + if [[ "$source" == "all" || "$source" == "iana" ]]; then + if iana_file=$(fetch_iana); then + iana_tlds=$(parse_iana "$iana_file") + iana_count=$(echo "$iana_tlds" | grep -c . || true) + fi + fi + + if [[ "$source" == "all" || "$source" == "rdap" ]]; then + if rdap_file=$(fetch_rdap); then + rdap_tlds=$(parse_rdap_tlds "$rdap_file") + rdap_count=$(echo "$rdap_tlds" | grep -c . || true) + fi + fi + + if [[ "$all_sources" == true || "$source" == "tldlist" ]]; then + if tldlist_file=$(fetch_tldlist); then + tldlist_tlds=$(parse_tldlist "$tldlist_file") + tldlist_count=$(echo "$tldlist_tlds" | grep -c . || true) + fi + fi + + # ── Filter porkbun: no handshake, no sub-TLDs ── + local porkbun_filtered="" + if [[ -n "$porkbun_tlds" ]]; then + local porkbun_file="$CACHE_DIR/porkbun.json" + if command -v jq &>/dev/null && [[ -f "$porkbun_file" ]]; then + porkbun_filtered=$(jq -r ' + .pricing // {} | to_entries[] | + select(.key | contains(".") | not) | + select(.value.specialType // "" | test("handshake") | not) | + .key + ' "$porkbun_file" 2>/dev/null | sort -u) + else + porkbun_filtered=$(echo "$porkbun_tlds" | grep -v '\.' | sort -u) + fi + fi + + # ── Merge all registrar-confirmed purchasable TLDs ── + # Only TLDs that have pricing at a real registrar are included + local registrar_tlds + registrar_tlds=$(echo -e "${porkbun_filtered}\n${ovh_tlds}\n${inwx_tlds}" | grep -E '^[a-z]' | sort -u | filter_skip) + + # If --all-sources, also include tld-list.com TLDs that appear in at least + # one registrar (cross-reference = purchasable + known to community list) + if [[ "$all_sources" == true && -n "$tldlist_tlds" ]]; then + # tld-list.com entries that are ALSO in a registrar = confirmed purchasable + local tldlist_confirmed + tldlist_confirmed=$(comm -12 <(echo "$tldlist_tlds") <(echo "$registrar_tlds") 2>/dev/null || true) + # They're already in registrar_tlds, so this just validates. + # More useful: tld-list entries NOT in any registrar = brand/reserved (skip them) + local tldlist_extra + tldlist_extra=$(comm -23 <(echo "$tldlist_tlds") <(echo "$registrar_tlds") 2>/dev/null || true) + local extra_count + extra_count=$(echo "$tldlist_extra" | grep -c . || echo 0) + echo -e " ${YELLOW}tld-list.com:${NC} $extra_count TLDs with no registrar pricing (brand/reserved, excluded)" >&2 + fi + + local all_tlds="$registrar_tlds" + local all_cctlds + all_cctlds=$(echo "$all_tlds" | filter_cctlds) + + # Build source summary string for template header + local sources_used=() + [[ $porkbun_count -gt 0 ]] && sources_used+=("Porkbun") + [[ $ovh_count -gt 0 ]] && sources_used+=("OVH") + [[ $inwx_count -gt 0 ]] && sources_used+=("INWX") + local source_summary + local joined + joined=$(printf " + %s" "${sources_used[@]}") + joined="${joined:3}" # strip leading " + " + source_summary="${joined} + RDAP bootstrap + WHOIS server list" + + case "$mode" in + raw) + echo "$all_tlds" + ;; + toml) + echo -e "${BOLD}# Purchasable TLDs from all registrars ($(echo "$all_tlds" | wc -l | tr -d ' ') total)${NC}" + echo "all_registrars = [" + echo "$all_tlds" | to_toml_array + echo "]" + echo "" + echo "# Country-code TLDs (purchasable)" + echo "cctlds = [" + echo "$all_cctlds" | to_toml_array + echo "]" + ;; + diff) + echo -e "${BOLD}Comparing registrar data vs current Lists.toml${NC}" + echo "" + local current_all current_country + current_all=$(parse_current_lists "all") + current_country=$(parse_current_lists "country") + + # TLDs in registrars but NOT in our 'all' list + if [[ -n "$all_tlds" ]]; then + local missing_from_all + missing_from_all=$(comm -23 <(echo "$all_tlds" | filter_short_tlds | sort) <(echo "$current_all" | sort) 2>/dev/null || true) + if [[ -n "$missing_from_all" ]]; then + local mc + mc=$(echo "$missing_from_all" | wc -l | tr -d ' ') + echo -e "${YELLOW}TLDs at registrars but NOT in our 'all' list ($mc):${NC}" + echo "$missing_from_all" | tr '\n' ' ' + echo "" + echo "" + fi + + # ccTLDs at registrars but NOT in our 'country' list + local missing_cc + missing_cc=$(comm -23 <(echo "$all_cctlds" | sort) <(echo "$current_country" | sort) 2>/dev/null || true) + if [[ -n "$missing_cc" ]]; then + local mcc + mcc=$(echo "$missing_cc" | wc -l | tr -d ' ') + echo -e "${YELLOW}ccTLDs at registrars but NOT in 'country' list ($mcc):${NC}" + echo "$missing_cc" | tr '\n' ' ' + echo "" + echo "" + fi + + # TLDs in our 'all' list but NOT at any registrar + local extra + extra=$(comm -13 <(echo "$all_tlds" | sort) <(echo "$current_all" | sort) 2>/dev/null || true) + if [[ -n "$extra" ]]; then + local ec + ec=$(echo "$extra" | wc -l | tr -d ' ') + echo -e "${CYAN}TLDs in our 'all' list but NOT at any registrar ($ec):${NC}" + echo "$extra" | tr '\n' ' ' + echo "" + echo "" + fi + fi + + # Check which of our TLDs have RDAP servers + if [[ -n "$rdap_tlds" && -n "$current_all" ]]; then + local no_rdap + no_rdap=$(comm -23 <(echo "$current_all" | sort) <(echo "$rdap_tlds" | sort) 2>/dev/null || true) + if [[ -n "$no_rdap" ]]; then + local nrc + nrc=$(echo "$no_rdap" | wc -l | tr -d ' ') + echo -e "${RED}TLDs in our lists with NO RDAP server ($nrc) — need WHOIS fallback:${NC}" + echo "$no_rdap" | tr '\n' ' ' + echo "" + fi + fi + ;; + template) + generate_template "$registrar_tlds" "$rdap_tlds" "$source_summary" + ;; + summary) + echo -e "${BOLD}═══ TLD Source Summary ═══${NC}" + echo "" + [[ $porkbun_count -gt 0 ]] && echo -e " ${GREEN}Porkbun${NC} $(echo "$porkbun_filtered" | grep -c . || echo 0) TLDs ($(echo "$porkbun_filtered" | filter_cctlds | wc -l | tr -d ' ') ccTLDs)" + [[ $ovh_count -gt 0 ]] && echo -e " ${GREEN}OVH${NC} $ovh_count TLDs ($(echo "$ovh_tlds" | filter_cctlds | wc -l | tr -d ' ') ccTLDs)" + [[ $inwx_count -gt 0 ]] && echo -e " ${GREEN}INWX${NC} $inwx_count TLDs ($(echo "$inwx_tlds" | filter_cctlds | wc -l | tr -d ' ') ccTLDs)" + [[ $tldlist_count -gt 0 ]] && echo -e " ${GREEN}tld-list.com${NC} $tldlist_count TLDs (community registry, no pricing)" + [[ $iana_count -gt 0 ]] && echo -e " ${GREEN}IANA${NC} $iana_count TLDs" + [[ $rdap_count -gt 0 ]] && echo -e " ${GREEN}RDAP${NC} $rdap_count TLDs with lookup servers" + echo "" + + # Show what each registrar uniquely contributes + if [[ $porkbun_count -gt 0 && $ovh_count -gt 0 ]]; then + local ovh_unique inwx_unique + ovh_unique=$(comm -23 <(echo "$ovh_tlds" | sort) <(echo "$porkbun_filtered" | sort) | wc -l | tr -d ' ') + echo -e " ${CYAN}OVH adds${NC} $ovh_unique TLDs not on Porkbun" + if [[ $inwx_count -gt 0 ]]; then + inwx_unique=$(comm -23 <(echo "$inwx_tlds" | sort) <(echo -e "${porkbun_filtered}\n${ovh_tlds}" | sort -u) | wc -l | tr -d ' ') + echo -e " ${CYAN}INWX adds${NC} $inwx_unique TLDs not on Porkbun/OVH" + fi + echo "" + fi + + echo -e " ${BOLD}Merged purchasable:${NC} $(echo "$all_tlds" | wc -l | tr -d ' ') TLDs" + echo -e " ${BOLD}Merged ccTLDs:${NC} $(echo "$all_cctlds" | wc -l | tr -d ' ')" + echo "" + echo -e " Cached data in: ${CYAN}$CACHE_DIR${NC}" + echo -e " Use ${BOLD}--diff${NC} to compare against Lists.toml" + echo -e " Use ${BOLD}--toml${NC} to output TOML-ready arrays" + echo -e " Use ${BOLD}--template${NC} to generate template Lists.toml" + echo -e " Use ${BOLD}--all-sources${NC} to also fetch tld-list.com" + echo -e " Use ${BOLD}--raw${NC} for raw TLD list (one per line)" + ;; + esac +} + +main "$@" |
