domainshop/app.py

76 lines
2.1 KiB
Python
Raw Permalink Normal View History

2024-07-02 10:38:55 +02:00
#!/usr/bin/python3
import sys
2024-07-08 10:14:31 +02:00
# The next line is for my setup. This might not be correct for every setup!
2024-07-02 10:38:55 +02:00
sys.path.append("/opt/homebrew/lib/python3.11/site-packages")
2024-07-08 10:14:31 +02:00
import cmd
2024-07-02 10:38:55 +02:00
import requests
from bs4 import BeautifulSoup
try:
max_len = int(sys.argv[1])
if max_len == 0:
max_len = None
2024-07-02 10:38:55 +02:00
except IndexError:
2024-07-04 08:26:27 +02:00
max_len = None
2024-07-02 10:38:55 +02:00
try:
top_domain = str(sys.argv[2])
except IndexError:
top_domain = None
2024-07-02 10:38:55 +02:00
def cleanlist(my_list):
retList = []
retListDomain = []
2024-07-04 08:26:27 +02:00
if max_len != None:
for x in my_list:
2024-07-08 10:14:31 +02:00
if len(x) <= (max_len + 3):
2024-07-04 08:26:27 +02:00
retList.append(x)
if top_domain != None and len(retList) != 0:
for x in retList:
if x[-2:] == top_domain:
retListDomain.append(x)
elif top_domain != None and len(retList) == 0:
for x in my_list:
if x[-2:] == top_domain:
retListDomain.append(x)
if len(retList) == 0 and len(retListDomain) == 0:
2024-07-04 08:26:27 +02:00
return my_list
elif top_domain == None:
return retList
else:
return retListDomain
2024-07-02 10:38:55 +02:00
def fetch():
URL = "https://domene.shop/expired"
page = requests.get(URL)
return page.content
def parse():
soup = BeautifulSoup(fetch(), "html.parser")
web_links = soup.find_all("a")
actual_web_links = [web_link["href"] for web_link in web_links]
new_list = [x for x in actual_web_links if "/?domain=" in x]
final_list = [s.replace("/?domain=", "") for s in new_list]
final_list = [s.replace("xn--", "") for s in final_list] # remove all the 8s
the_list = cleanlist(final_list)
final_list = the_list.sort() # sorts normally by alphabetical order
2024-07-04 08:26:27 +02:00
out = cmd.Cmd()
2024-07-02 10:38:55 +02:00
if len(the_list) > 0:
2024-07-04 08:26:27 +02:00
if max_len != None:
the_list = sorted(the_list, key=len, reverse=False)
out.columnize(the_list, displaywidth=80)
else:
the_list = sorted(the_list, reverse=False)
out.columnize(the_list, displaywidth=140)
2024-07-02 10:38:55 +02:00
else:
print("No expired domains with the length citeria you wanted!")
if __name__ == "__main__":
2024-07-03 11:37:56 +02:00
parse()