61 lines
2.4 KiB
Python
61 lines
2.4 KiB
Python
import requests, sys, bs4, re
|
|
|
|
term = sys.argv[1]
|
|
|
|
url = "https://www.gangnam.go.kr/contract/list.do?pgno=1&mid=ID05_041501&saup_field1_gb=3&saup_dept_cd=&keyfield=SAUP_NM&keyword=" \
|
|
+ term + "&srch_start_amt=&srch_end_amt="
|
|
|
|
headers = {
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
|
"Accept-Encoding": "gzip, deflate, br",
|
|
"Accept-Language": "ko-KR,ko;q=0.8,en-US;q=0.5,en;q=0.3",
|
|
"Cache-Control": "max-age=0",
|
|
"Connection": "keep-alive",
|
|
"Cookie": "JSESSIONID=baa_dI1wLJEFZeosbLHVxg-w1SGjCSDe5m77JFC1Oz5l8ZsbtJq0c4hVTuY_; WMONID=REfZ-vn3O8b; _ga=GA1.3.110539796.1632479167; siteContractModal=1; JSESSIONID=baa_dI1wLJEFZeosbLHVxg-w1SGjCSDe5m77JFC1Oz5l8ZsbtJq0c4hVTuY_; _gid=GA1.3.817966770.1633006424",
|
|
"Host": "www.gangnam.go.kr",
|
|
"Referer": "https://www.gangnam.go.kr/contract/list.do?pgno=4&mid=ID05_041501&saup_field1_gb=3&saup_dept_cd=&keyfield=SAUP_NM&keyword=&srch_start_amt=&srch_end_amt=",
|
|
"Sec-Fetch-Dest": "document",
|
|
"Sec-Fetch-Mode": "navigate",
|
|
"Sec-Fetch-Site": "same-origin",
|
|
"Sec-Fetch-User": "?1",
|
|
"Upgrade-Insecure-Requests": "1",
|
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:92.0) Gecko/20100101 Firefox/92.0"
|
|
}
|
|
|
|
resp = requests.get(url, headers=headers)
|
|
resp.raise_for_status()
|
|
html = resp.text
|
|
|
|
bs = bs4.BeautifulSoup(html, 'html.parser')
|
|
|
|
trs = bs.select('table.table tr')
|
|
|
|
res=[]
|
|
|
|
for i in range(len(trs) - 1):
|
|
tds = trs[i + 1].select('td')
|
|
tdsele = {}
|
|
for j in range(len(tds)):
|
|
if j == 0:
|
|
tdsele["src"] = "강남구청"
|
|
tdsele["term"] = term
|
|
tdsele["type"] = tds[j].getText().strip()
|
|
elif j == 1:
|
|
tdsele["commitDate"] = tds[j].getText().strip()
|
|
elif j == 2:
|
|
tdsele["name"] = re.sub("\,|\'","",tds[j].getText().strip())
|
|
tdsele["detailurl"] = "https://www.gangnam.go.kr" + tds[j].select('a')[0].get('href')
|
|
elif j == 3:
|
|
tdsele["budget"] = tds[j].getText().strip()
|
|
tdsele["budget"] = re.sub("원","",tdsele["budget"])
|
|
tdsele["budget"] = re.sub("\,", "", tdsele["budget"])
|
|
elif j == 4:
|
|
tdsele["dept"] = tds[j].getText().strip()
|
|
elif j == 5:
|
|
applyperiod = tds[j].getText().strip().split("~ ")
|
|
tdsele["applyDateFrom"] = applyperiod[0]
|
|
tdsele["applyDateTo"] = applyperiod[1]
|
|
res.append(tdsele)
|
|
|
|
print(res)
|