python_apps/crwlers/termExtend.py
2023-11-03 14:49:12 +09:00

48 lines
1.3 KiB
Python

#!/usr/bin/env python
import requests, bs4, urllib, sys, logging, re, subprocess
from urllib import parse
from datetime import datetime
startTime = datetime.now()
def scriptCmd(srcTerm,adTerm) :
PYTHON_PREFIX = '/var/www/pyenv/bin/python'
PYTHON_SCRIPT = '/home/maddiekorea/py/keywordCheck.py'
res = PYTHON_PREFIX + " " + PYTHON_SCRIPT + " \"" + srcTerm + "\" \"" + adTerm + "\" ext >> ext.res"
return res
def excute(shellcmd) :
executeCrawl = subprocess.Popen(shellcmd,stdout=subprocess.PIPE,shell=True)
try:
executeCrawl.communicate(timeout=50)
except subprocess.TimeoutExpired:
executeCrawl.terminate()
mainTerm = str(sys.argv[1])
prefixs = str(sys.argv[2])
suffixs = str(sys.argv[3])
prefixArr = prefixs.split(",")
suffixArr = suffixs.split(",")
excute(scriptCmd(mainTerm,mainTerm))
prefixed = []
suffixed = []
for i in range(len(suffixArr)) :
if suffixArr[i] != "" :
term = mainTerm + suffixArr[i]
suffixed.insert(i,term)
excute(scriptCmd(mainTerm,term))
if len(prefixArr) != 0 :
for i in range(len(prefixArr)) :
if prefixArr[i] != "" :
term = prefixArr[i] + mainTerm
prefixed.insert(i,term)
excute(scriptCmd(mainTerm,term))
for j in range(len(suffixed)) :
term = prefixArr[i] + suffixed[j]
excute(scriptCmd(mainTerm,term))