1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
| import requests from lxml import etree import pandas as pd import argparse
headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36" } def check(url): full_url = f'https://{url}' try: res = requests.get(full_url, headers=headers) return res.status_code == 200 except requests.RequestException: return False
def run(domain): url = f'https://baidurank.aizhan.com/baidu/{domain}/' try: res = requests.get(url, headers=headers).text HTML = etree.HTML(res) li_list = HTML.xpath('//dl[@id="dl-sub"]//dd/ul[@class="clearfix"]') L = [] for i in li_list: title = i.xpath('./li/a/@title') if check(title[0]): L.append(title) print(L) df = pd.DataFrame(L[0], columns=['子域名']) df.to_excel('subdomains.xlsx', index=False) except Exception as e: print(e)
def main(): parser = argparse.ArgumentParser(description='检查给定域名的子域名。') parser.add_argument('domain', type=str, help=''' python one.py 要检查的域名,\n 输出至excle文件中 ''') args = parser.parse_args() run(args.domain)
if __name__ == '__main__': main()
|