python_apps/crwlers/conteenew/bid_gangnam.py
2023-11-03 14:49:12 +09:00

61 lines
2.4 KiB
Python

import requests, sys, bs4, re
term = sys.argv[1]
url = "https://www.gangnam.go.kr/contract/list.do?pgno=1&mid=ID05_041501&saup_field1_gb=3&saup_dept_cd=&keyfield=SAUP_NM&keyword=" \
+ term + "&srch_start_amt=&srch_end_amt="
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "ko-KR,ko;q=0.8,en-US;q=0.5,en;q=0.3",
"Cache-Control": "max-age=0",
"Connection": "keep-alive",
"Cookie": "JSESSIONID=baa_dI1wLJEFZeosbLHVxg-w1SGjCSDe5m77JFC1Oz5l8ZsbtJq0c4hVTuY_; WMONID=REfZ-vn3O8b; _ga=GA1.3.110539796.1632479167; siteContractModal=1; JSESSIONID=baa_dI1wLJEFZeosbLHVxg-w1SGjCSDe5m77JFC1Oz5l8ZsbtJq0c4hVTuY_; _gid=GA1.3.817966770.1633006424",
"Host": "www.gangnam.go.kr",
"Referer": "https://www.gangnam.go.kr/contract/list.do?pgno=4&mid=ID05_041501&saup_field1_gb=3&saup_dept_cd=&keyfield=SAUP_NM&keyword=&srch_start_amt=&srch_end_amt=",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:92.0) Gecko/20100101 Firefox/92.0"
}
resp = requests.get(url, headers=headers)
resp.raise_for_status()
html = resp.text
bs = bs4.BeautifulSoup(html, 'html.parser')
trs = bs.select('table.table tr')
res=[]
for i in range(len(trs) - 1):
tds = trs[i + 1].select('td')
tdsele = {}
for j in range(len(tds)):
if j == 0:
tdsele["src"] = "강남구청"
tdsele["term"] = term
tdsele["type"] = tds[j].getText().strip()
elif j == 1:
tdsele["commitDate"] = tds[j].getText().strip()
elif j == 2:
tdsele["name"] = re.sub("\,|\'","",tds[j].getText().strip())
tdsele["detailurl"] = "https://www.gangnam.go.kr" + tds[j].select('a')[0].get('href')
elif j == 3:
tdsele["budget"] = tds[j].getText().strip()
tdsele["budget"] = re.sub("","",tdsele["budget"])
tdsele["budget"] = re.sub("\,", "", tdsele["budget"])
elif j == 4:
tdsele["dept"] = tds[j].getText().strip()
elif j == 5:
applyperiod = tds[j].getText().strip().split("~ ")
tdsele["applyDateFrom"] = applyperiod[0]
tdsele["applyDateTo"] = applyperiod[1]
res.append(tdsele)
print(res)