import pandas, re # ⌃R을(를) 눌러 실행하거나 내 코드로 바꿉니다. # 클래스, 파일, 도구 창, 액션 및 설정을 어디서나 검색하려면 ⇧ 두 번을(를) 누릅니다. def load_excel(filename): df = pandas.read_excel(filename, engine="openpyxl") df = df[['po_idx_통합', 'po_idx_공구', 'po_idx_할인', 'po_idx_심쿵', 'po_title_통합', 'po_keyword_통합']] df.to_feather("./data.feather") return df def getblacketedDics(terms, list): terms = re.sub(r"\s","",terms).lower() s = re.findall(r"\[(.*?)\]", terms) for i in range(len(s)): list.append(s[i]) return s def getblacketedTerm(terms, diclist): terms = re.sub(r"\s","",terms).lower() s = re.findall(r"\[(.*?)\]", terms) res = '' for i in range(len(s)): if s[i] in diclist: res = res + str(s[i]) res = res + "," res = re.sub(r"\,$","",res) return res def getblandTerm(terms, diclist): terms = re.sub(r"\s","",terms).lower() s = re.findall(r"\[(.*?)\]", terms) res = '' for i in range(len(s)): if s[i] not in diclist: res = res + str(s[i]) res = res + "," res = re.sub(r"\,$","",res) return res def load_branddic(): dicfilename = "./branddic_20230420.xlsx" df = pandas.read_excel(dicfilename, engine="openpyxl") return df[0].values.tolist() def load_brandblackdic(): dicfilename = "./branddic_blacklist_20230420.xlsx" df = pandas.read_excel(dicfilename, engine="openpyxl") return df[0].values.tolist() if __name__ == '__main__': datadf = pandas.read_feather("./data.feather") #diclist = load_branddic() diclist = load_brandblackdic() #datadf['branddic'] = datadf.apply(lambda x:getblacketedTerm(x['po_title_통합'], diclist), axis=1) datadf['branddic'] = datadf.apply(lambda x: getblandTerm(x['po_title_통합'], diclist), axis=1) datadf.to_excel("./_tmpTest.xlsx") #print(datadf.info()) #print(dic) # https://www.jetbrains.com/help/pycharm/에서 PyCharm 도움말 참조 #df = load_excel("./20230420_통합상품_키워드.xlsx") #상품명 다른 케이스 리뷰 #sdf = df[(df['po_title_통합'] != df['po_title_공구']) & (df['po_title_공구'].notnull())] #sdf = sdf[['po_idx_통합','po_title_통합','po_title_공구']] #print(sdf) #sdf.to_excel("./tmpTest.xlsx") #통합명만 써도 됨... # #print(df.info()) #Build 기본 사전 데이터 밑 데이터 #df = pandas.read_feather("./data.feather") #diclist = [] #df['blacked'] = df.apply(lambda x: getblacketedDics(x['po_title_통합'], diclist), axis=1) #df.to_excel("./tmpTest.xlsx") #diclist = list(set(diclist)) #dicdf = pandas.DataFrame(diclist) #dicdf.to_excel("./dic.xlsx") #print(dicdf) #엑셀로 받아 사전을 빌드하여 branddic이름으로 저장.