61 lines
1.9 KiB
Python
61 lines
1.9 KiB
Python
import requests, bs4, urllib, sys, re, math
|
|
|
|
url = str(sys.argv[1]);
|
|
|
|
headers = {
|
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
|
|
'Accept-Encoding': 'gzip, deflate, br',
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
|
|
'Upgrade-Insecure-Requests': '1',
|
|
'save-data': 'on',
|
|
'Host': 'www.grainger.com'
|
|
}
|
|
|
|
resp = requests.get(url, headers = headers)
|
|
resp.raise_for_status()
|
|
resp.encoding='UTF-8'
|
|
|
|
html = resp.text
|
|
|
|
bs = bs4.BeautifulSoup(html, 'html.parser')
|
|
|
|
prdName = bs.select('div#productPage div h1.productName')[0].getText().strip()
|
|
zoomIMG = bs.select('img#mainImageZoom')[0]['data-blzsrc']
|
|
zoomIMG = 'https:' + re.sub('\$mdmain\$','$zmmain$',zoomIMG)
|
|
|
|
prdDetail = bs.select('div#additionalInfoSection div#copyTextSection')
|
|
if len(prdDetail) != 0 :
|
|
prdDetailtxt = prdDetail[0].getText().strip()
|
|
else :
|
|
prdDetailtxt = ''
|
|
|
|
Docs = bs.select('div#documentationSection div.documentionInfo ul li a')
|
|
docStr = ''
|
|
for i in range(len(Docs)) :
|
|
docStr = docStr + 'https:' + Docs[i].get('href')
|
|
if i != len(Docs) - 1 :
|
|
docStr = docStr + ','
|
|
|
|
compliance = bs.select('div#complianceSection a')
|
|
compStr = ''
|
|
for i in range(len(compliance)) :
|
|
compStr = compStr + compliance[i].get('href')
|
|
if i != len(compliance) - 1 :
|
|
compStr = compStr + ','
|
|
|
|
additionalIMG = bs.select('div#productThumbnails ul li')
|
|
|
|
additionalIMGURL = ""
|
|
|
|
for i in range(len(additionalIMG)) :
|
|
thumbType = additionalIMG[i]['data-type']
|
|
if thumbType == "prodImage" :
|
|
image_src = additionalIMG[i].select('button img')[0]['data-image']
|
|
image_src = 'https:' + image_src
|
|
elif thumbType == "video" :
|
|
image_src = additionalIMG[i]['data-video-url']
|
|
else :
|
|
image_src = ""
|
|
additionalIMGURL = additionalIMGURL + str(image_src) + "\t"
|
|
|
|
print(url + "\t" + prdName + "\t" + prdDetailtxt + "\t" + docStr + "\t" + compStr + "\t" + zoomIMG + "\t" + additionalIMGURL) |