aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/fetch-tlds.sh816
1 files changed, 816 insertions, 0 deletions
diff --git a/scripts/fetch-tlds.sh b/scripts/fetch-tlds.sh
new file mode 100755
index 0000000..0892f42
--- /dev/null
+++ b/scripts/fetch-tlds.sh
@@ -0,0 +1,816 @@
+#!/usr/bin/env bash
+# fetch-tlds.sh — Scrape purchasable TLD lists from registrar APIs
+# Outputs clean sorted lists for use in Lists.toml
+#
+# Usage:
+# ./scripts/fetch-tlds.sh # fetch all, show summary
+# ./scripts/fetch-tlds.sh porkbun # porkbun only
+# ./scripts/fetch-tlds.sh inwx # inwx only
+# ./scripts/fetch-tlds.sh --raw # output raw TLD lists (one per line)
+# ./scripts/fetch-tlds.sh --toml # output TOML-ready arrays
+# ./scripts/fetch-tlds.sh --diff # compare against current Lists.toml
+# ./scripts/fetch-tlds.sh --template # generate full Lists.toml with whois overrides if necessary
+#
+# Notes : yea this is ai slop, didnt make it myself oooo scary, but most of the rust i did myself just didnt feel like doing this at 4am and it somewhat works
+
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
+LISTS_TOML="$PROJECT_DIR/Lists.toml"
+CACHE_DIR="$PROJECT_DIR/.tld-cache"
+mkdir -p "$CACHE_DIR"
+
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+CYAN='\033[0;36m'
+BOLD='\033[1m'
+NC='\033[0m'
+
+# ─── Porkbun ────────────────────────────────────────────────────────────────
+fetch_porkbun() {
+ local cache="$CACHE_DIR/porkbun.json"
+ local max_age=86400 # 24h cache
+
+ if [[ -f "$cache" ]]; then
+ local age=$(( $(date +%s) - $(stat -f%m "$cache" 2>/dev/null || stat -c%Y "$cache" 2>/dev/null || echo 0) ))
+ if (( age < max_age )); then
+ echo "$cache"
+ return 0
+ fi
+ fi
+
+ echo -e "${CYAN}Fetching Porkbun pricing API...${NC}" >&2
+ # abusing porkbun public no money needed ah endpoint is no API key is even needed
+ if curl -sf -X POST "https://api.porkbun.com/api/json/v3/pricing/get" \
+ -H "Content-Type: application/json" \
+ -d '{}' \
+ -o "$cache" 2>/dev/null; then
+ echo "$cache"
+ else
+ echo -e "${RED}Failed to fetch Porkbun data${NC}" >&2
+ return 1
+ fi
+}
+
+parse_porkbun() {
+ local json_file="$1"
+ # Extract TLD keys from the pricing response
+ # Response format: {"status":"SUCCESS","pricing":{"com":{...},"net":{...},...}}
+ if command -v jq &>/dev/null; then
+ jq -r '.pricing // {} | keys[]' "$json_file" 2>/dev/null | sort -u
+ else
+ # Fallback: grep for TLD keys (less reliable but works)
+ grep -o '"[a-z][a-z0-9.-]*":{' "$json_file" | sed 's/"//g; s/:{//' | sort -u
+ fi
+}
+
+# ─── INWX ───────────────────────────────────────────────────────────────────
+fetch_inwx() {
+ local cache="$CACHE_DIR/inwx.html"
+ local max_age=86400
+
+ if [[ -f "$cache" ]]; then
+ local age=$(( $(date +%s) - $(stat -f%m "$cache" 2>/dev/null || stat -c%Y "$cache" 2>/dev/null || echo 0) ))
+ if (( age < max_age )); then
+ echo "$cache"
+ return 0
+ fi
+ fi
+
+ echo -e "${CYAN}Fetching INWX domain list...${NC}" >&2
+ # INWX domain check page has TLD list embedded as JSON
+ if curl -sfL "https://www.inwx.de/en/domain/check" \
+ -H "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)" \
+ -o "$cache" 2>/dev/null; then
+ echo "$cache"
+ else
+ echo -e "${YELLOW}Could not fetch INWX${NC}" >&2
+ return 1
+ fi
+}
+
+parse_inwx() {
+ local html_file="$1"
+ # TLDs are embedded as JSON objects with "tld":"xxx" in the page
+ grep -oE '"tld":"[a-z]{2,20}"' "$html_file" | sed 's/"tld":"//;s/"//' | sort -u
+}
+
+# ─── OVH ────────────────────────────────────────────────────────────────────
+fetch_ovh() {
+ local cache="$CACHE_DIR/ovh.json"
+ local max_age=86400
+
+ if [[ -f "$cache" ]]; then
+ local age=$(( $(date +%s) - $(stat -f%m "$cache" 2>/dev/null || stat -c%Y "$cache" 2>/dev/null || echo 0) ))
+ if (( age < max_age )); then
+ echo "$cache"
+ return 0
+ fi
+ fi
+
+ echo -e "${CYAN}Fetching OVH domain extensions...${NC}" >&2
+ if curl -sf "https://www.ovh.com/engine/apiv6/domain/extensions" \
+ -H "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)" \
+ -o "$cache" 2>/dev/null; then
+ echo "$cache"
+ else
+ echo -e "${YELLOW}Could not fetch OVH extensions${NC}" >&2
+ return 1
+ fi
+}
+
+parse_ovh() {
+ local json_file="$1"
+ if command -v jq &>/dev/null; then
+ # Only top-level TLDs (no dots = not sub-TLDs like com.au)
+ jq -r '.[]' "$json_file" 2>/dev/null | grep -vE '\.' | sort -u
+ else
+ grep -oE '"[a-z]{2,20}"' "$json_file" | tr -d '"' | grep -vE '\.' | sort -u
+ fi
+}
+
+# ─── tld-list.com (comprehensive registry, free basic list) ────────────────
+fetch_tldlist() {
+ local cache="$CACHE_DIR/tldlist-basic.txt"
+ local max_age=86400
+
+ if [[ -f "$cache" ]]; then
+ local age=$(( $(date +%s) - $(stat -f%m "$cache" 2>/dev/null || stat -c%Y "$cache" 2>/dev/null || echo 0) ))
+ if (( age < max_age )); then
+ echo "$cache"
+ return 0
+ fi
+ fi
+
+ echo -e "${CYAN}Fetching tld-list.com basic list...${NC}" >&2
+ if curl -sf "https://tld-list.com/df/tld-list-basic.csv" \
+ -H "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)" \
+ -o "$cache" 2>/dev/null; then
+ echo "$cache"
+ else
+ echo -e "${YELLOW}Could not fetch tld-list.com${NC}" >&2
+ return 1
+ fi
+}
+
+parse_tldlist() {
+ local file="$1"
+ # One TLD per line, CR/LF endings, includes IDN entries — filter to ASCII only
+ tr -d '\r' < "$file" | grep -E '^[a-z][a-z0-9]*$' | sort -u
+}
+
+# ─── IANA root zone (fallback) ──────────────────────────────────────────────
+fetch_iana() {
+ local cache="$CACHE_DIR/iana-tlds.txt"
+ local max_age=604800 # 7 days
+
+ if [[ -f "$cache" ]]; then
+ local age=$(( $(date +%s) - $(stat -f%m "$cache" 2>/dev/null || stat -c%Y "$cache" 2>/dev/null || echo 0) ))
+ if (( age < max_age )); then
+ echo "$cache"
+ return 0
+ fi
+ fi
+
+ echo -e "${CYAN}Fetching IANA TLD list...${NC}" >&2
+ if curl -sf "https://data.iana.org/TLD/tlds-alpha-by-domain.txt" -o "$cache" 2>/dev/null; then
+ echo "$cache"
+ else
+ echo -e "${RED}Failed to fetch IANA list${NC}" >&2
+ return 1
+ fi
+}
+
+parse_iana() {
+ local file="$1"
+ # Skip header line, lowercase everything, filter to 2-3 char ccTLDs
+ tail -n +2 "$file" | tr '[:upper:]' '[:lower:]' | sort -u
+}
+
+parse_iana_cctlds() {
+ local file="$1"
+ tail -n +2 "$file" | tr '[:upper:]' '[:lower:]' | grep -E '^[a-z]{2}$' | sort -u
+}
+
+# ─── RDAP bootstrap (what actually has lookup servers) ──────────────────────
+fetch_rdap() {
+ local cache="$CACHE_DIR/rdap-dns.json"
+ local max_age=86400
+
+ if [[ -f "$cache" ]]; then
+ local age=$(( $(date +%s) - $(stat -f%m "$cache" 2>/dev/null || stat -c%Y "$cache" 2>/dev/null || echo 0) ))
+ if (( age < max_age )); then
+ echo "$cache"
+ return 0
+ fi
+ fi
+
+ echo -e "${CYAN}Fetching RDAP bootstrap...${NC}" >&2
+ if curl -sf "https://data.iana.org/rdap/dns.json" -o "$cache" 2>/dev/null; then
+ echo "$cache"
+ else
+ echo -e "${RED}Failed to fetch RDAP bootstrap${NC}" >&2
+ return 1
+ fi
+}
+
+parse_rdap_tlds() {
+ local json_file="$1"
+ if command -v jq &>/dev/null; then
+ jq -r '.services[][] | .[]' "$json_file" 2>/dev/null | grep -v '^http' | tr '[:upper:]' '[:lower:]' | sort -u
+ else
+ grep -oE '"[a-z]{2,20}"' "$json_file" | tr -d '"' | sort -u
+ fi
+}
+
+# ─── WHOIS server list (rfc1036/whois project) ─────────────────────────────
+fetch_whois_servers() {
+ local cache="$CACHE_DIR/tld_serv_list.txt"
+ local max_age=604800 # 7 days
+
+ if [[ -f "$cache" ]]; then
+ local age=$(( $(date +%s) - $(stat -f%m "$cache" 2>/dev/null || stat -c%Y "$cache" 2>/dev/null || echo 0) ))
+ if (( age < max_age )); then
+ echo "$cache"
+ return 0
+ fi
+ fi
+
+ echo -e "${CYAN}Fetching WHOIS server list...${NC}" >&2
+ if curl -sf "https://raw.githubusercontent.com/rfc1036/whois/next/tld_serv_list" -o "$cache" 2>/dev/null; then
+ echo "$cache"
+ else
+ echo -e "${YELLOW}Could not fetch WHOIS server list${NC}" >&2
+ return 1
+ fi
+}
+
+# Get the WHOIS server for a given TLD from the cached server list
+# Returns empty string if no server found or server is NONE/ARPA/etc
+get_whois_server() {
+ local tld="$1"
+ local serv_file="$2"
+ # Format: .tld [optional-tag] server
+ # Some entries have VERISIGN or similar tag before the server
+ local line
+ line=$(grep -E "^\\.${tld}[[:space:]]" "$serv_file" 2>/dev/null | head -1)
+ if [[ -z "$line" ]]; then
+ echo ""
+ return
+ fi
+ # Extract server: last word on the line that looks like a hostname
+ local server
+ server=$(echo "$line" | awk '{
+ for (i=NF; i>=2; i--) {
+ if ($i ~ /^[a-z0-9].*\.[a-z]/) { print $i; exit }
+ }
+ }')
+ # Filter out unusable entries
+ if [[ "$server" == "NONE" || "$server" == "ARPA" || -z "$server" || "$server" == http* ]]; then
+ echo ""
+ else
+ echo "$server"
+ fi
+}
+
+# Get WHOIS server from IANA directly (slower, single TLD at a time)
+get_iana_whois_server() {
+ local tld="$1"
+ curl -s "https://www.iana.org/domains/root/db/${tld}.html" 2>/dev/null \
+ | sed -n 's/.*WHOIS Server:<\/b> *\([^ <]*\).*/\1/p' \
+ | head -1
+}
+
+# ─── Extract current Lists.toml entries ─────────────────────────────────────
+parse_current_lists() {
+ local list_name="${1:-all}"
+ if [[ ! -f "$LISTS_TOML" ]]; then
+ echo -e "${RED}No Lists.toml found at $LISTS_TOML${NC}" >&2
+ return 1
+ fi
+ # Extract TLDs from a named list, stripping quotes, colons (whois overrides), commas
+ awk -v list="$list_name" '
+ $0 ~ "^"list" *= *\\[" { found=1; next }
+ found && /^\]/ { exit }
+ found && /^[[:space:]]*\[/ { exit }
+ found {
+ gsub(/["\t,]/, " ")
+ n = split($0, parts, " ")
+ for (i=1; i<=n; i++) {
+ if (parts[i] != "") {
+ # Strip whois override suffix
+ sub(/:.*/, "", parts[i])
+ print parts[i]
+ }
+ }
+ }
+ ' "$LISTS_TOML" | sort -u
+}
+
+# ─── Helpers ────────────────────────────────────────────────────────────────
+to_toml_array() {
+ # Reads TLDs from stdin, outputs TOML array format (wrapped at ~80 chars)
+ local tlds=()
+ while IFS= read -r tld; do
+ [[ -z "$tld" ]] && continue
+ tlds+=("$tld")
+ done
+ local line='\t'
+ local first=true
+ for tld in "${tlds[@]}"; do
+ local entry="\"$tld\""
+ if $first; then
+ line+="$entry"
+ first=false
+ else
+ local test_line="$line, $entry"
+ if (( ${#test_line} > 78 )); then
+ echo -e "$line,"
+ line="\t$entry"
+ else
+ line+=", $entry"
+ fi
+ fi
+ done
+ [[ -n "$line" ]] && echo -e "$line,"
+}
+
+filter_cctlds() {
+ grep -E '^[a-z]{2}$'
+}
+
+filter_short_tlds() {
+ # 2-6 char TLDs that are useful for domain hacking
+ grep -E '^[a-z]{2,6}$'
+}
+
+# ─── Known broken/unregistrable TLDs ────────────────────────────────────────
+SKIP_TLDS="bl bq eh mf gb bv sj kp hm"
+
+filter_skip() {
+ local skip_pattern
+ skip_pattern=$(echo "$SKIP_TLDS" | tr ' ' '|')
+ grep -vE "^($skip_pattern)$"
+}
+
+# ─── Template generation ────────────────────────────────────────────────────
+# Generates a full Lists.toml with:
+# - "tld" for TLDs with RDAP support (direct lookup works)
+# - "tld:whois.server" for TLDs needing WHOIS fallback
+# - skip TLDs omitted entirely (no Patch.toml needed)
+#
+# Uses: Porkbun + OVH + INWX (purchasable), RDAP bootstrap (has server?), WHOIS server list
+# With --all-sources: also cross-references tld-list.com
+generate_template() {
+ local all_registrar_tlds="$1"
+ local rdap_tlds="$2"
+ local source_summary="$3"
+
+ # Fetch WHOIS server list for fallback
+ local whois_serv_file=""
+ if whois_serv_file=$(fetch_whois_servers 2>/dev/null); then
+ true # got it
+ fi
+
+ # The input is already merged + filtered from all registrar sources
+ local buyable_tlds
+ buyable_tlds=$(echo "$all_registrar_tlds" | filter_skip | sort -u)
+
+ local buyable_count
+ buyable_count=$(echo "$buyable_tlds" | grep -c . || echo 0)
+
+ # Build annotated TLD list: "tld" or "tld:whois_server"
+ # A TLD needs a whois override if it's NOT in the RDAP bootstrap
+ local annotated_all=()
+ local annotated_cc=()
+ local rdap_hit=0 whois_hit=0 bare_hit=0
+
+ while IFS= read -r tld; do
+ [[ -z "$tld" ]] && continue
+ local entry=""
+ if echo "$rdap_tlds" | grep -qx "$tld" 2>/dev/null; then
+ # Has RDAP server — no override needed
+ entry="$tld"
+ ((rdap_hit++)) || true
+ else
+ # No RDAP — try to find WHOIS server
+ local server=""
+ if [[ -n "$whois_serv_file" ]]; then
+ server=$(get_whois_server "$tld" "$whois_serv_file")
+ fi
+ if [[ -n "$server" ]]; then
+ entry="${tld}:${server}"
+ ((whois_hit++)) || true
+ else
+ # No known server — include bare, hoardom will try common patterns
+ entry="$tld"
+ ((bare_hit++)) || true
+ fi
+ fi
+ annotated_all+=("$entry")
+ # Also track ccTLDs (2-letter entries)
+ local base_tld="${tld%%:*}"
+ if [[ "$base_tld" =~ ^[a-z]{2}$ ]]; then
+ annotated_cc+=("$entry")
+ fi
+ done <<< "$buyable_tlds"
+
+ echo -e "${CYAN}Building template...${NC}" >&2
+ echo -e " ${GREEN}${rdap_hit}${NC} TLDs with RDAP (direct lookup)" >&2
+ echo -e " ${YELLOW}${whois_hit}${NC} TLDs with WHOIS override" >&2
+ echo -e " ${RED}${bare_hit}${NC} TLDs with no known server (will probe)" >&2
+ echo "" >&2
+
+ # ── Curated lists (bare TLD names, annotated automatically) ─────────
+
+ # Standard: com, net, org + generally desirable / well-known TLDs
+ local standard_tlds=(
+ "com" "net" "org" "io" "co" "dev" "app" "me" "info"
+ "biz" "one" "xyz" "online" "site" "tech" "pro" "tv"
+ "cc" "to" "sh" "li" "fm" "am" "gg" "ws" "la"
+ "ms" "nu" "cx" "mn" "st" "tel" "ai" "id" "in"
+ "it" "is" "at" "be" "de" "eu" "fr" "nl" "se"
+ "uk" "us" "ca" "au" "nz" "club" "blog" "art" "fun"
+ "lol" "wtf" "page" "link" "space" "store" "shop"
+ )
+
+ # Decent: the best of the best — com, net, org, io + short desirable ones
+ # that work great for domain hacking and are punchy
+ local decent_tlds=(
+ "com" "net" "org" "io" "dev" "app" "co" "me"
+ "ai" "sh" "to" "fm" "tv" "gg" "cc" "li" "am"
+ "la" "nu" "id" "in" "it" "is" "at" "ws"
+ "one" "pro" "bio" "art" "ink" "run" "win" "new"
+ "lol" "pub" "fun" "vet" "fit" "rip" "wtf" "zip"
+ )
+
+ # Swiss: standard-like but with Swiss / Central European related TLDs up front
+ local swiss_tlds=(
+ "com" "net" "org" "ch" "li" "swiss" "zuerich"
+ "io" "co" "dev" "app" "me" "info" "one" "pro"
+ "de" "at" "fr" "it" "eu"
+ "tech" "online" "site" "shop" "store"
+ "biz" "xyz" "tv" "cc" "to" "sh" "fm" "am" "gg"
+ )
+
+ # Annotate curated lists with whois overrides where needed
+ annotate_list() {
+ local -n input_list=$1
+ local result=()
+ for bare_tld in "${input_list[@]}"; do
+ local found=false
+ for ann in "${annotated_all[@]}"; do
+ local ann_base="${ann%%:*}"
+ if [[ "$ann_base" == "$bare_tld" ]]; then
+ result+=("$ann")
+ found=true
+ break
+ fi
+ done
+ if ! $found; then
+ result+=("$bare_tld")
+ fi
+ done
+ printf '%s\n' "${result[@]}"
+ }
+
+ # Length-based filtered lists from annotated_all
+ filter_annotated_by_length() {
+ local min="$1"
+ local max="$2"
+ for ann in "${annotated_all[@]}"; do
+ local base="${ann%%:*}"
+ local len=${#base}
+ if (( len >= min && len <= max )); then
+ echo "$ann"
+ fi
+ done
+ }
+
+ # ─── Output ─────────────────────────────────────────────────────────
+ local date_str
+ date_str=$(date +%Y-%m-%d)
+
+ cat <<HEADER
+# Lists.toml — Built-in TLD lists for hoardom
+# Auto-generated on ${date_str} from ${source_summary}
+#
+# Format:
+# "tld" — TLD has RDAP support, lookup works directly
+# "tld:whois.server" — No RDAP: use this WHOIS server for fallback
+#
+# ${buyable_count} purchasable TLDs (handshake/sub-TLDs excluded)
+# ${rdap_hit} have RDAP, ${whois_hit} need WHOIS override, ${bare_hit} will auto-probe
+#
+# Lists:
+# standard — common desirable TLDs (com, net, org, io, dev, ...)
+# decent — very best short punchy TLDs for domain hacking
+# swiss — standard-like but with Swiss/Central European TLDs prioritized
+# country — all 2-letter country-code TLDs
+# two — all 2-letter TLDs
+# three — all TLDs with 3 or fewer letters
+# four — all TLDs with exactly 4 letters
+# long — all TLDs with 5+ letters
+# all — everything
+
+HEADER
+
+ echo "standard = ["
+ annotate_list standard_tlds | to_toml_array
+ echo "]"
+ echo ""
+
+ echo "decent = ["
+ annotate_list decent_tlds | to_toml_array
+ echo "]"
+ echo ""
+
+ echo "swiss = ["
+ annotate_list swiss_tlds | to_toml_array
+ echo "]"
+ echo ""
+
+ echo "country = ["
+ printf '%s\n' "${annotated_cc[@]}" | to_toml_array
+ echo "]"
+ echo ""
+
+ echo "two = ["
+ filter_annotated_by_length 2 2 | to_toml_array
+ echo "]"
+ echo ""
+
+ echo "three = ["
+ filter_annotated_by_length 2 3 | to_toml_array
+ echo "]"
+ echo ""
+
+ echo "four = ["
+ filter_annotated_by_length 4 4 | to_toml_array
+ echo "]"
+ echo ""
+
+ echo "long = ["
+ filter_annotated_by_length 5 99 | to_toml_array
+ echo "]"
+ echo ""
+
+ echo "all = ["
+ printf '%s\n' "${annotated_all[@]}" | to_toml_array
+ echo "]"
+}
+
+# ─── Main ───────────────────────────────────────────────────────────────────
+main() {
+ local mode="summary"
+ local source="all"
+ local all_sources=false
+
+ for arg in "$@"; do
+ case "$arg" in
+ --raw) mode="raw" ;;
+ --toml) mode="toml" ;;
+ --diff) mode="diff" ;;
+ --template) mode="template" ;;
+ --all-sources) all_sources=true ;;
+ porkbun) source="porkbun" ;;
+ inwx) source="inwx" ;;
+ ovh) source="ovh" ;;
+ iana) source="iana" ;;
+ rdap) source="rdap" ;;
+ tldlist) source="tldlist" ;;
+ --help|-h)
+ echo "Usage: $0 [source] [--raw|--toml|--diff|--template] [--all-sources]"
+ echo ""
+ echo "Sources: porkbun, ovh, inwx, iana, rdap, tldlist"
+ echo ""
+ echo "Flags:"
+ echo " --raw Output raw TLD list (one per line)"
+ echo " --toml Output TOML-ready arrays"
+ echo " --diff Compare against current Lists.toml"
+ echo " --template Generate full Lists.toml with whois overrides"
+ echo " --all-sources Include tld-list.com for extra coverage (used as"
+ echo " a filter: only TLDs also in a registrar are kept)"
+ exit 0 ;;
+ esac
+ done
+
+ local porkbun_tlds="" inwx_tlds="" ovh_tlds="" iana_tlds="" rdap_tlds="" tldlist_tlds=""
+ local porkbun_count=0 inwx_count=0 ovh_count=0 iana_count=0 rdap_count=0 tldlist_count=0
+
+ # Template mode needs all registrar sources + rdap regardless of source filter
+ if [[ "$mode" == "template" ]]; then
+ source="all"
+ fi
+
+ # ── Fetch from selected sources ──
+
+ if [[ "$source" == "all" || "$source" == "porkbun" ]]; then
+ if porkbun_file=$(fetch_porkbun); then
+ porkbun_tlds=$(parse_porkbun "$porkbun_file")
+ porkbun_count=$(echo "$porkbun_tlds" | grep -c . || true)
+ fi
+ fi
+
+ if [[ "$source" == "all" || "$source" == "ovh" ]]; then
+ if ovh_file=$(fetch_ovh); then
+ ovh_tlds=$(parse_ovh "$ovh_file")
+ ovh_count=$(echo "$ovh_tlds" | grep -c . || true)
+ fi
+ fi
+
+ if [[ "$source" == "all" || "$source" == "inwx" ]]; then
+ if inwx_file=$(fetch_inwx 2>/dev/null); then
+ inwx_tlds=$(parse_inwx "$inwx_file")
+ inwx_count=$(echo "$inwx_tlds" | grep -c . || true)
+ fi
+ fi
+
+ if [[ "$source" == "all" || "$source" == "iana" ]]; then
+ if iana_file=$(fetch_iana); then
+ iana_tlds=$(parse_iana "$iana_file")
+ iana_count=$(echo "$iana_tlds" | grep -c . || true)
+ fi
+ fi
+
+ if [[ "$source" == "all" || "$source" == "rdap" ]]; then
+ if rdap_file=$(fetch_rdap); then
+ rdap_tlds=$(parse_rdap_tlds "$rdap_file")
+ rdap_count=$(echo "$rdap_tlds" | grep -c . || true)
+ fi
+ fi
+
+ if [[ "$all_sources" == true || "$source" == "tldlist" ]]; then
+ if tldlist_file=$(fetch_tldlist); then
+ tldlist_tlds=$(parse_tldlist "$tldlist_file")
+ tldlist_count=$(echo "$tldlist_tlds" | grep -c . || true)
+ fi
+ fi
+
+ # ── Filter porkbun: no handshake, no sub-TLDs ──
+ local porkbun_filtered=""
+ if [[ -n "$porkbun_tlds" ]]; then
+ local porkbun_file="$CACHE_DIR/porkbun.json"
+ if command -v jq &>/dev/null && [[ -f "$porkbun_file" ]]; then
+ porkbun_filtered=$(jq -r '
+ .pricing // {} | to_entries[] |
+ select(.key | contains(".") | not) |
+ select(.value.specialType // "" | test("handshake") | not) |
+ .key
+ ' "$porkbun_file" 2>/dev/null | sort -u)
+ else
+ porkbun_filtered=$(echo "$porkbun_tlds" | grep -v '\.' | sort -u)
+ fi
+ fi
+
+ # ── Merge all registrar-confirmed purchasable TLDs ──
+ # Only TLDs that have pricing at a real registrar are included
+ local registrar_tlds
+ registrar_tlds=$(echo -e "${porkbun_filtered}\n${ovh_tlds}\n${inwx_tlds}" | grep -E '^[a-z]' | sort -u | filter_skip)
+
+ # If --all-sources, also include tld-list.com TLDs that appear in at least
+ # one registrar (cross-reference = purchasable + known to community list)
+ if [[ "$all_sources" == true && -n "$tldlist_tlds" ]]; then
+ # tld-list.com entries that are ALSO in a registrar = confirmed purchasable
+ local tldlist_confirmed
+ tldlist_confirmed=$(comm -12 <(echo "$tldlist_tlds") <(echo "$registrar_tlds") 2>/dev/null || true)
+ # They're already in registrar_tlds, so this just validates.
+ # More useful: tld-list entries NOT in any registrar = brand/reserved (skip them)
+ local tldlist_extra
+ tldlist_extra=$(comm -23 <(echo "$tldlist_tlds") <(echo "$registrar_tlds") 2>/dev/null || true)
+ local extra_count
+ extra_count=$(echo "$tldlist_extra" | grep -c . || echo 0)
+ echo -e " ${YELLOW}tld-list.com:${NC} $extra_count TLDs with no registrar pricing (brand/reserved, excluded)" >&2
+ fi
+
+ local all_tlds="$registrar_tlds"
+ local all_cctlds
+ all_cctlds=$(echo "$all_tlds" | filter_cctlds)
+
+ # Build source summary string for template header
+ local sources_used=()
+ [[ $porkbun_count -gt 0 ]] && sources_used+=("Porkbun")
+ [[ $ovh_count -gt 0 ]] && sources_used+=("OVH")
+ [[ $inwx_count -gt 0 ]] && sources_used+=("INWX")
+ local source_summary
+ local joined
+ joined=$(printf " + %s" "${sources_used[@]}")
+ joined="${joined:3}" # strip leading " + "
+ source_summary="${joined} + RDAP bootstrap + WHOIS server list"
+
+ case "$mode" in
+ raw)
+ echo "$all_tlds"
+ ;;
+ toml)
+ echo -e "${BOLD}# Purchasable TLDs from all registrars ($(echo "$all_tlds" | wc -l | tr -d ' ') total)${NC}"
+ echo "all_registrars = ["
+ echo "$all_tlds" | to_toml_array
+ echo "]"
+ echo ""
+ echo "# Country-code TLDs (purchasable)"
+ echo "cctlds = ["
+ echo "$all_cctlds" | to_toml_array
+ echo "]"
+ ;;
+ diff)
+ echo -e "${BOLD}Comparing registrar data vs current Lists.toml${NC}"
+ echo ""
+ local current_all current_country
+ current_all=$(parse_current_lists "all")
+ current_country=$(parse_current_lists "country")
+
+ # TLDs in registrars but NOT in our 'all' list
+ if [[ -n "$all_tlds" ]]; then
+ local missing_from_all
+ missing_from_all=$(comm -23 <(echo "$all_tlds" | filter_short_tlds | sort) <(echo "$current_all" | sort) 2>/dev/null || true)
+ if [[ -n "$missing_from_all" ]]; then
+ local mc
+ mc=$(echo "$missing_from_all" | wc -l | tr -d ' ')
+ echo -e "${YELLOW}TLDs at registrars but NOT in our 'all' list ($mc):${NC}"
+ echo "$missing_from_all" | tr '\n' ' '
+ echo ""
+ echo ""
+ fi
+
+ # ccTLDs at registrars but NOT in our 'country' list
+ local missing_cc
+ missing_cc=$(comm -23 <(echo "$all_cctlds" | sort) <(echo "$current_country" | sort) 2>/dev/null || true)
+ if [[ -n "$missing_cc" ]]; then
+ local mcc
+ mcc=$(echo "$missing_cc" | wc -l | tr -d ' ')
+ echo -e "${YELLOW}ccTLDs at registrars but NOT in 'country' list ($mcc):${NC}"
+ echo "$missing_cc" | tr '\n' ' '
+ echo ""
+ echo ""
+ fi
+
+ # TLDs in our 'all' list but NOT at any registrar
+ local extra
+ extra=$(comm -13 <(echo "$all_tlds" | sort) <(echo "$current_all" | sort) 2>/dev/null || true)
+ if [[ -n "$extra" ]]; then
+ local ec
+ ec=$(echo "$extra" | wc -l | tr -d ' ')
+ echo -e "${CYAN}TLDs in our 'all' list but NOT at any registrar ($ec):${NC}"
+ echo "$extra" | tr '\n' ' '
+ echo ""
+ echo ""
+ fi
+ fi
+
+ # Check which of our TLDs have RDAP servers
+ if [[ -n "$rdap_tlds" && -n "$current_all" ]]; then
+ local no_rdap
+ no_rdap=$(comm -23 <(echo "$current_all" | sort) <(echo "$rdap_tlds" | sort) 2>/dev/null || true)
+ if [[ -n "$no_rdap" ]]; then
+ local nrc
+ nrc=$(echo "$no_rdap" | wc -l | tr -d ' ')
+ echo -e "${RED}TLDs in our lists with NO RDAP server ($nrc) — need WHOIS fallback:${NC}"
+ echo "$no_rdap" | tr '\n' ' '
+ echo ""
+ fi
+ fi
+ ;;
+ template)
+ generate_template "$registrar_tlds" "$rdap_tlds" "$source_summary"
+ ;;
+ summary)
+ echo -e "${BOLD}═══ TLD Source Summary ═══${NC}"
+ echo ""
+ [[ $porkbun_count -gt 0 ]] && echo -e " ${GREEN}Porkbun${NC} $(echo "$porkbun_filtered" | grep -c . || echo 0) TLDs ($(echo "$porkbun_filtered" | filter_cctlds | wc -l | tr -d ' ') ccTLDs)"
+ [[ $ovh_count -gt 0 ]] && echo -e " ${GREEN}OVH${NC} $ovh_count TLDs ($(echo "$ovh_tlds" | filter_cctlds | wc -l | tr -d ' ') ccTLDs)"
+ [[ $inwx_count -gt 0 ]] && echo -e " ${GREEN}INWX${NC} $inwx_count TLDs ($(echo "$inwx_tlds" | filter_cctlds | wc -l | tr -d ' ') ccTLDs)"
+ [[ $tldlist_count -gt 0 ]] && echo -e " ${GREEN}tld-list.com${NC} $tldlist_count TLDs (community registry, no pricing)"
+ [[ $iana_count -gt 0 ]] && echo -e " ${GREEN}IANA${NC} $iana_count TLDs"
+ [[ $rdap_count -gt 0 ]] && echo -e " ${GREEN}RDAP${NC} $rdap_count TLDs with lookup servers"
+ echo ""
+
+ # Show what each registrar uniquely contributes
+ if [[ $porkbun_count -gt 0 && $ovh_count -gt 0 ]]; then
+ local ovh_unique inwx_unique
+ ovh_unique=$(comm -23 <(echo "$ovh_tlds" | sort) <(echo "$porkbun_filtered" | sort) | wc -l | tr -d ' ')
+ echo -e " ${CYAN}OVH adds${NC} $ovh_unique TLDs not on Porkbun"
+ if [[ $inwx_count -gt 0 ]]; then
+ inwx_unique=$(comm -23 <(echo "$inwx_tlds" | sort) <(echo -e "${porkbun_filtered}\n${ovh_tlds}" | sort -u) | wc -l | tr -d ' ')
+ echo -e " ${CYAN}INWX adds${NC} $inwx_unique TLDs not on Porkbun/OVH"
+ fi
+ echo ""
+ fi
+
+ echo -e " ${BOLD}Merged purchasable:${NC} $(echo "$all_tlds" | wc -l | tr -d ' ') TLDs"
+ echo -e " ${BOLD}Merged ccTLDs:${NC} $(echo "$all_cctlds" | wc -l | tr -d ' ')"
+ echo ""
+ echo -e " Cached data in: ${CYAN}$CACHE_DIR${NC}"
+ echo -e " Use ${BOLD}--diff${NC} to compare against Lists.toml"
+ echo -e " Use ${BOLD}--toml${NC} to output TOML-ready arrays"
+ echo -e " Use ${BOLD}--template${NC} to generate template Lists.toml"
+ echo -e " Use ${BOLD}--all-sources${NC} to also fetch tld-list.com"
+ echo -e " Use ${BOLD}--raw${NC} for raw TLD list (one per line)"
+ ;;
+ esac
+}
+
+main "$@"