#!/usr/bin/env python import requests, bs4, urllib, sys, re, math, logging def getHTML(url) : headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36', 'Accept-Encoding': 'gzip, deflate, br', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Upgrade-Insecure-Requests': '1', 'Host': 'www.navimro.com' } resp = requests.get(url, headers = headers) resp.raise_for_status() resp.encoding='UTF-8' html = resp.text bs = bs4.BeautifulSoup(html, 'html.parser') return bs; url = str(sys.argv[1]) bs = getHTML(url) title = bs.select('div.spec div.product-detail-area')[0].getText().strip() promo = bs.select('div.spec table td i.icon-txt') brand = bs.select('div.spec table td div.brand-product-new span')[0].getText().strip() desc = str(bs.select('div.spec table tr.description td.desc_info')[0]) series = bs.select('div.option-table table.itemTable') seriesTitles = bs.select('div.option-table table.itemTable th') seriesContent = bs.select('div.option-table table.itemTable tbody tr.itemList') seriesTitleArray = dict() for i in range(len(seriesTitles)-3) : seriesTitleArray[i] = seriesTitles[i].getText().strip() promoText = "" for i in range(len(promo)) : promoText = promoText + promo[i].getText().strip() if i != (len(promo) - 1) : promoText = promoText + "," desc = re.sub("\