python_apps/crwlers/graingerRelated.py
2023-11-03 14:49:12 +09:00

46 lines
2.2 KiB
Python

import requests, bs4, urllib, sys, re, math
url = str(sys.argv[1]);
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
'Accept-Encoding': 'gzip, deflate, br',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Upgrade-Insecure-Requests': '1',
'save-data': 'on',
'Host': 'www.grainger.com'
}
resp = requests.get(url, headers = headers)
resp.raise_for_status()
resp.encoding='UTF-8'
html = resp.text
bs = bs4.BeautifulSoup(html, 'html.parser')
prdName = bs.select('div#productPage div h1.productName')[0].getText().strip()
alternateProducts = bs.select('div#alternateProductsCarousel ul.carouselProductLists li.product')
for i in range(len(alternateProducts)) :
itemNo = alternateProducts[i].select('div.productSKU a')[0].getText().strip()
altPrdName = alternateProducts[i].select('div.productName a')[0].getText().strip()
url = alternateProducts[i].select('div.productImage a')[0].get('href')
prdBrand = alternateProducts[i].select('div.productBrand')[0].getText().strip()
webPrice = alternateProducts[i].select('div.productPrice span.gcprice-value')[0].getText().strip()
print(url + "\t" + prdName + "\t" + "Alternate Products" + "\t" + itemNo + "\t" + altPrdName + "\t" + prdBrand + "\t" + webPrice + "\t" + url)
relatedProducts = bs.select('div#relatedProductsCarousel ul.carouselProductLists li.product')
print(relatedProducts)
for i in range(len(relatedProducts)) :
itemNo = relatedProducts[i].select('div.productSKU a')[0].getText().strip()
relPrdName = relatedProducts[i].select('div.productName a')[0].getText().strip()
url = relatedProducts[i].select('div.productImage a')[0].get('href')
prdBrand = relatedProducts[i].select('div.productBrand')[0].getText().strip()
webPrice = relatedProducts[i].select('div.productPrice span.gcprice-value')[0].getText().strip()
print(url + "\t" + prdName + "\t" + "Related Products" + "\t" + itemNo + "\t" + relPrdName + "\t" + prdBrand + "\t" + webPrice + "\t" + url)
#print(alternateProducts)
#print(url + "\t" + prdName + "\t" + prdDetailtxt + "\t" + zoomIMG + "\t" + docStr + "\t" + compStr)