48 lines
1.7 KiB
Python
48 lines
1.7 KiB
Python
# 企业微信客户群聊分析
|
|
from os import stat_result
|
|
|
|
import pandas as pd
|
|
import re
|
|
|
|
|
|
def parser_chat_text(content: str):
|
|
result = []
|
|
temp = ["", "", "", ""]
|
|
message = ""
|
|
pattern = "(.*?@{0,1}.*?@{0,1}.*?) (.{1,2}/.{1,2}) (.{2}:.{2}:.{2})"
|
|
for i in content.split("\n"):
|
|
a = re.findall(pattern, i)
|
|
if a:
|
|
if message:
|
|
temp[3] = message
|
|
message = ""
|
|
result.append(temp.copy())
|
|
user, date, time = a[0]
|
|
temp[0] = user
|
|
temp[1] = date
|
|
temp[2] = time
|
|
else:
|
|
message += i
|
|
stat_result = {}
|
|
for user, date, time, content in result:
|
|
if user in stat_result:
|
|
times, word_count = stat_result[user]
|
|
stat_result[user] = (times + 1, word_count + len(content))
|
|
else:
|
|
stat_result[user] = (1, len(content))
|
|
return result, [[t, stat_result[t][0], stat_result[t][1]] for t in stat_result]
|
|
|
|
|
|
if __name__ == '__main__':
|
|
df = pd.read_excel("聊天记录.xlsx")
|
|
for group_name,group_messages in df.values:
|
|
writer = pd.ExcelWriter(f"{group_name}-聊天记录分析.xlsx", engine="xlsxwriter")
|
|
chat_messages ,chat_analyzer = parser_chat_text(group_messages)
|
|
df1 = pd.DataFrame(chat_messages,columns=["user","date","time","content"])
|
|
df1 = df1.sort_values("date",ascending=False)
|
|
df2 = pd.DataFrame(chat_analyzer,columns=["user","frequence","word_count"])
|
|
df2 = df2.sort_values("frequence",ascending=False)
|
|
df2.to_excel(writer, sheet_name="记录分析",index=False)
|
|
df1.to_excel(writer, sheet_name="记录详情",index=False)
|
|
writer.close()
|