python_apps/brandTermExtract/main.py
2023-11-03 14:49:12 +09:00

84 lines
2.9 KiB
Python

import pandas, re
# ⌃R을(를) 눌러 실행하거나 내 코드로 바꿉니다.
# 클래스, 파일, 도구 창, 액션 및 설정을 어디서나 검색하려면 ⇧ 두 번을(를) 누릅니다.
def load_excel(filename):
df = pandas.read_excel(filename, engine="openpyxl")
df = df[['po_idx_통합', 'po_idx_공구', 'po_idx_할인', 'po_idx_심쿵', 'po_title_통합', 'po_keyword_통합']]
df.to_feather("./data.feather")
return df
def getblacketedDics(terms, list):
terms = re.sub(r"\s","",terms).lower()
s = re.findall(r"\[(.*?)\]", terms)
for i in range(len(s)):
list.append(s[i])
return s
def getblacketedTerm(terms, diclist):
terms = re.sub(r"\s","",terms).lower()
s = re.findall(r"\[(.*?)\]", terms)
res = ''
for i in range(len(s)):
if s[i] in diclist:
res = res + str(s[i])
res = res + ","
res = re.sub(r"\,$","",res)
return res
def getblandTerm(terms, diclist):
terms = re.sub(r"\s","",terms).lower()
s = re.findall(r"\[(.*?)\]", terms)
res = ''
for i in range(len(s)):
if s[i] not in diclist:
res = res + str(s[i])
res = res + ","
res = re.sub(r"\,$","",res)
return res
def load_branddic():
dicfilename = "./branddic_20230420.xlsx"
df = pandas.read_excel(dicfilename, engine="openpyxl")
return df[0].values.tolist()
def load_brandblackdic():
dicfilename = "./branddic_blacklist_20230420.xlsx"
df = pandas.read_excel(dicfilename, engine="openpyxl")
return df[0].values.tolist()
if __name__ == '__main__':
datadf = pandas.read_feather("./data.feather")
#diclist = load_branddic()
diclist = load_brandblackdic()
#datadf['branddic'] = datadf.apply(lambda x:getblacketedTerm(x['po_title_통합'], diclist), axis=1)
datadf['branddic'] = datadf.apply(lambda x: getblandTerm(x['po_title_통합'], diclist), axis=1)
datadf.to_excel("./_tmpTest.xlsx")
#print(datadf.info())
#print(dic)
# https://www.jetbrains.com/help/pycharm/에서 PyCharm 도움말 참조
#df = load_excel("./20230420_통합상품_키워드.xlsx")
#상품명 다른 케이스 리뷰
#sdf = df[(df['po_title_통합'] != df['po_title_공구']) & (df['po_title_공구'].notnull())]
#sdf = sdf[['po_idx_통합','po_title_통합','po_title_공구']]
#print(sdf)
#sdf.to_excel("./tmpTest.xlsx")
#통합명만 써도 됨...
#
#print(df.info())
#Build 기본 사전 데이터 밑 데이터
#df = pandas.read_feather("./data.feather")
#diclist = []
#df['blacked'] = df.apply(lambda x: getblacketedDics(x['po_title_통합'], diclist), axis=1)
#df.to_excel("./tmpTest.xlsx")
#diclist = list(set(diclist))
#dicdf = pandas.DataFrame(diclist)
#dicdf.to_excel("./dic.xlsx")
#print(dicdf)
#엑셀로 받아 사전을 빌드하여 branddic이름으로 저장.