Skip to content
ngadmini edited this page Oct 11, 2022 · 10 revisions

use this bash_script to obstain invalid TLD coming from : https://raw.githubusercontent.com/alsyundawy/dnstrust-apjii/main/raw/db_trustpositif.txt

bash_script

run this script to check the changes that occurred in the db_trustpositif.txt. then modify the grab_regex according to the changes

#!/usr/bin/env bash
# TAGS
#   tlds_validation.sh
#   v1-beta
# AUTHOR
#   ngadimin@warnet-ersa.net
# TL;DR
#  exception: domains with non ASCII character and ended with port = some.domains:900

set -e
export LC_ALL=C
PATH=/usr/local/bin:/usr/bin:/bin:${PATH}
_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
_reg1="/#/d;s/[A-Z]/\L&/g"
_reg2="s/[A-Z]/\L&/g;/\(:\|\.\)[0-9]\{2,\}$/d"
_tlds=$(mktemp -d -t tld.XXXX -p "${_DIR}")
_url1="http://data.iana.org/TLD/tlds-alpha-by-domain.txt"
_url2="https://raw.githubusercontent.com/alsyundawy/dnstrust-apjii/main/raw/db_trustpositif.txt"

cd "${_tlds}"
printf "\n[INFO] starting TLDs validation, target: %s\n" "${_url2##*/}"
printf "[INFO] check availability remote files\n"
for _X in "${_url1}" "${_url2}"; do
   if ! [[ $(curl -s -o /dev/null -w "%{http_code}" "${_X}") =~ ^[02]{3}$ ]]; then
      printf "[INFO] remote files: %s NOT available\n" "${_X##*/}"
      exit 1
   fi
done
curl -s "${_url1}" | sed "${_reg1}" >> tlds-alpha-by-domain.txt
curl -s "${_url2}" | sed "${_reg2}" | awk -F. '{print $NF}' | sort -u | grep -Pv "[^\x00-\x7F]" >> tlds_trust.txt
sort {tlds_trust,tlds-alpha-by-domain}.txt | uniq -d > valid_tlds.txt
sort {valid_tlds,tlds_trust}.txt | uniq -u > invalid_tlds.txt
printf "[INFO] there are %'d invalid TLDs\n" "$(wc -l invalid_tlds.txt | awk -F' ' '{printf $(NF-1)}')"
printf "[INFO] build regex\nyou can replace grab_regex line 4 with this regex.txt\n"
sed ':a;N;$!ba;s/\n/\\\|/g' invalid_tlds.txt | sed 's/^/\/\\.\\\(/;s/$/\\\)\$\/d/' > regex.txt
cd "${_DIR}"
printf "bye!\n"
exit 0