Files
Esmart/groupchat/main.py
2025-09-23 02:39:47 +08:00

50 lines
1.7 KiB
Python

# 企业微信客户群聊分析
from os import stat_result
import pandas as pd
import re
def parser_chat_text(content: str):
result = []
temp = ["", "", "", ""]
message = ""
pattern = "(.*?@{0,1}.*?@{0,1}.*?) (.{1,2}/.{1,2}) (.{2}:.{2}:.{2})"
for i in content.split("\n"):
a = re.findall(pattern, i)
if a:
if message:
temp[3] = message
message = ""
result.append(temp.copy())
user, date, time = a[0]
temp[0] = user
temp[1] = date
temp[2] = time
else:
message += i
stat_result = {}
for user, date, time, content in result:
if user in stat_result:
times, word_count = stat_result[user]
stat_result[user] = (times + 1, word_count + len(content))
else:
stat_result[user] = (1, len(content))
return result, [[t, stat_result[t][0], stat_result[t][1]] for t in stat_result]
if __name__ == '__main__':
df = pd.read_excel("聊天记录.xlsx")
for group_name,group_messages in df.values:
if group_messages == " ":
continue
writer = pd.ExcelWriter(f"{group_name}-聊天记录分析.xlsx", engine="xlsxwriter")
chat_messages ,chat_analyzer = parser_chat_text(group_messages)
df1 = pd.DataFrame(chat_messages,columns=["user","date","time","content"])
df1 = df1.sort_values("date",ascending=False)
df2 = pd.DataFrame(chat_analyzer,columns=["user","frequence","word_count"])
df2 = df2.sort_values("frequence",ascending=False)
df2.to_excel(writer, sheet_name="记录分析",index=False)
df1.to_excel(writer, sheet_name="记录详情",index=False)
writer.close()