63 lines
2.0 KiB
Python
63 lines
2.0 KiB
Python
import requests, bs4, urllib, sys, re, math
|
|
|
|
term = str(sys.argv[1]);
|
|
|
|
query = urllib.parse.quote_plus(term)
|
|
|
|
url = "https://www.navimro.com/s/"
|
|
|
|
data = { 'q': term }
|
|
|
|
headers = {
|
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
|
|
'Accept-Encoding': 'gzip, deflate, br',
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
|
|
'Upgrade-Insecure-Requests': '1',
|
|
'Host': 'www.navimro.com'
|
|
}
|
|
|
|
resp = requests.get(url, params = data, headers = headers)
|
|
resp.raise_for_status()
|
|
resp.encoding='UTF-8'
|
|
|
|
html = resp.text
|
|
|
|
bs = bs4.BeautifulSoup(html, 'html.parser')
|
|
|
|
res_count = bs.select('div.sub-right-wrap h4.clearFix span.fs17 span.fc-main')
|
|
|
|
if len(res_count) == 0 :
|
|
print(term + "\t" + 'Not Found')
|
|
else :
|
|
res_count = re.sub(r"\,","",res_count[0].getText().strip())
|
|
_lists = bs.select('div.product-list.clearFix div.product-list-area ul.clearFix li')
|
|
rank = 1
|
|
for i in range(len(_lists)) :
|
|
|
|
promo = _lists[i].select('div a span.icon i.icon-txt')
|
|
if len(promo) != 0 :
|
|
promoText = promo[0].getText().strip()
|
|
else :
|
|
promoText = ''
|
|
|
|
seriesNum = _lists[i].select('div span.count')[0].getText().strip()
|
|
name = _lists[i].select('div a h1.description')[0].getText().strip()
|
|
brand = _lists[i].select('div a span.name')[0].getText().strip()
|
|
|
|
price = _lists[i].select('div a span.price strong.fl')
|
|
if len(price) == 0 :
|
|
priceText = _lists[i].select('div a span.price strong')[0].getText().strip()
|
|
priceTax = ''
|
|
linkStr = 'https://www.navimro.com' + _lists[i].select('div a:nth-of-type(1)')[0].get('href')
|
|
else :
|
|
priceText = re.sub(r"\,","",price[0].getText().strip())
|
|
priceTax = math.ceil(int(priceText) * 1.1)
|
|
linkStr = 'https://www.navimro.com' + _lists[i].select('div a:nth-of-type(3)')[0].get('href')
|
|
|
|
|
|
|
|
print(term + "\t" + str(res_count) + "\t" + str(rank) + "\t" + promoText + "\t" + seriesNum + "\t" + brand + "\t" + name + "\t" + str(priceText) + "\t" + str(priceTax) + "\t" + linkStr)
|
|
rank = rank + 1
|
|
|
|
#
|