35 lines
1.2 KiB
Python
Executable File
35 lines
1.2 KiB
Python
Executable File
#!/usr/bin/env python
|
|
import requests, bs4, urllib, sys, re, math
|
|
|
|
def getHTML(url) :
|
|
headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
|
|
'Accept-Encoding': 'gzip, deflate, br',
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
|
|
'Upgrade-Insecure-Requests': '1',
|
|
'Host': 'www.navimro.com'
|
|
}
|
|
resp = requests.get(url, headers = headers)
|
|
resp.raise_for_status()
|
|
resp.encoding='UTF-8'
|
|
html = resp.text
|
|
bs = bs4.BeautifulSoup(html, 'html.parser')
|
|
return bs;
|
|
|
|
mainUrl = "https://www.navimro.com"
|
|
|
|
mainBs = getHTML(mainUrl)
|
|
tb = mainBs.select('table.category-area td')
|
|
|
|
for i in range(len(tb)) :
|
|
dt = tb[i].select('dt')[0].getText().strip()
|
|
dd = tb[i].select('dd')
|
|
for j in range(len(dd)) :
|
|
dd2Text = dd[j].getText().strip()
|
|
dd2link = mainUrl + dd[j].select('a')[0].get('href')
|
|
depth2bs = getHTML(dd2link)
|
|
d3tb = depth2bs.select('ul.lv3-active a')
|
|
for k in range(len(d3tb)) :
|
|
d3Text = d3tb[k].getText().strip()
|
|
d3link = mainUrl + d3tb[k].get('href')
|
|
print(dt + ">" + dd2Text + ">" + d3Text + "\t" + d3link)
|