import requests, bs4, urllib, sys, re, math url = str(sys.argv[1]); headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36', 'Accept-Encoding': 'gzip, deflate, br', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Upgrade-Insecure-Requests': '1', 'save-data': 'on', 'Host': 'www.grainger.com' } resp = requests.get(url, headers = headers) resp.raise_for_status() resp.encoding='UTF-8' html = resp.text bs = bs4.BeautifulSoup(html, 'html.parser') prdName = bs.select('div#productPage div h1.productName')[0].getText().strip() zoomIMG = bs.select('img#mainImageZoom')[0]['data-blzsrc'] zoomIMG = 'https:' + re.sub('\$mdmain\$','$zmmain$',zoomIMG) prdDetail = bs.select('div#additionalInfoSection div#copyTextSection') if len(prdDetail) != 0 : prdDetailtxt = prdDetail[0].getText().strip() else : prdDetailtxt = '' Docs = bs.select('div#documentationSection div.documentionInfo ul li a') docStr = '' for i in range(len(Docs)) : docStr = docStr + 'https:' + Docs[i].get('href') if i != len(Docs) - 1 : docStr = docStr + ',' compliance = bs.select('div#complianceSection a') compStr = '' for i in range(len(compliance)) : compStr = compStr + compliance[i].get('href') if i != len(compliance) - 1 : compStr = compStr + ',' additionalIMG = bs.select('div#productThumbnails ul li') additionalIMGURL = "" for i in range(len(additionalIMG)) : thumbType = additionalIMG[i]['data-type'] if thumbType == "prodImage" : image_src = additionalIMG[i].select('button img')[0]['data-image'] image_src = 'https:' + image_src elif thumbType == "video" : image_src = additionalIMG[i]['data-video-url'] else : image_src = "" additionalIMGURL = additionalIMGURL + str(image_src) + "\t" print(url + "\t" + prdName + "\t" + prdDetailtxt + "\t" + docStr + "\t" + compStr + "\t" + zoomIMG + "\t" + additionalIMGURL)