其他
导出多年微信记录,我找到了自己的聊天习惯
arm CPU 架构获取密钥方法(简易方法)
cd ~/Downloads/ptrsx-aarch64-apple-darwin
./ptrsx-dumper test --pid $(pgrep WeChat |head -1) --path "WeChat+0x4B0F700->0->8->8->16->32->8->8->64->8->0->0" -n 32
0x600001f00f60
353e9afab4b6422981d110d65e6d555557f1aca0afaa4431a5dab66fef384596
Intel CPU 架构获取密钥方法(通杀方法)
csrutil disable
# 重启电脑进入正常系统
reboot
Breakpoint 1: 2 locations.
(lldb) c
Process 58802 resuming
0x60000240c760: 0x35 0x3e 0x9a 0xfa 0xb4 0xb6 0x42 0x29
0x60000240c768: 0x81 0xd1 0x10 0xd6 0x5e 0x6d 0x55 0x55
0x60000240c770: 0x57 0xf1 0xac 0xa0 0xaf 0xaa 0x44 0x31
0x60000240c778: 0xa5 0xda 0xb6 0x6f 0xef 0x38 0x45 0x96
(lldb) exit
Quitting lldb will detach from one or more processes. Do you really want to proceed: [Y/n] y
source = '''
0x60000240c760: 0x35 0x3e 0x9a 0xfa 0xb4 0xb6 0x42 0x29
0x60000240c768: 0x81 0xd1 0x10 0xd6 0x5e 0x6d 0x55 0x55
0x60000240c770: 0x57 0xf1 0xac 0xa0 0xaf 0xaa 0x44 0x31
0x60000240c778: 0xa5 0xda 0xb6 0x6f 0xef 0x38 0x45 0x96
'''
key = '0x' + ''.join(i.partition(':')[2].replace('0x', '').replace(' ', '') for i in source.split('\n')[1:5])
print(key)
mkdir ~/Desktop/WeChatDB
# 切换到数据库文件所在的目录,不要直接复制此处的路径,直接从 Finder 拉到终端里
cd /Users/james/Library/Containers/com.tencent.xinWeChat/Data/Library/Application\ Support/com.tencent.xinWeChat/2.0b4.0.9/5a22781f14219edfffa333cb38aa92cf/Message
# 把所有的 msg 数据库文件拷贝到 WeChatDB 目录
cp msg*.db ~/Desktop/WeChatDB
# 在 WeChatDB 目录里创建空文件夹,从 db0 到 db9
mkdir ~/Desktop/WeChatDB/db{0..9}
密码类型:选择「原始密钥」 密码,第一种使用 ptrsx-dumper 获取到的密码前面没有 0x 字符,需要自己手动加上,第二种 Python 解析出来的密码可以直接使用,例如此处作者填写的密码就是:0x353e9afab4b6422981d110d65e6d555557f1aca0afaa4431a5dab66fef384596 加密设置:选择「SQLCipher 3 默认」
(base) conda activate WeChatDataAnalysis
(WeChatDataAnalysis) pip3 install jieba mplfonts
(base) conda remove -n WeChatDataAnalysis --all
☁️
制作词云
"CompressContent": null,
"ConBlob": "chN3eGlkX3k2OHh0MDk4NHFjczIxeglsaXV6b25nZGGAAQCYAQCgAQC4AQDIAQDQAQDwAQD4AQCIAgI=",
"IntRes1": 0,
"IntRes2": 0,
"StrRes1": null,
"StrRes2": null,
"mesDes": 0,
"mesLocalID": 8,
"mesSvrID": 9004976427286855000,
"messageType": 1,
"msgContent": "投稿一日一技",
"msgCreateTime": 1611106801,
"msgImgStatus": 1,
"msgSeq": 0,
"msgSource": "",
"msgStatus": 2,
"msgVoiceText": null
}
msgContent:消息内容 msgCreateTime:消息创建时间,使用 Unix Time 表示 messageType:消息类型
文本:1 图片:3 语音:34 视频:43 表情包:47 位置:48 分享消息:49 系统消息:10000
msgStatus:消息状态 收到消息:4 发出消息:2、3
msgVoiceText:微信的语音转文字识别结果
import os
import json
data = open('你的 JSON 文件路径').read()
data = json.loads(data)
content = ""
for item in data:
if (item['messageType'] == 1):
message = item['msgContent'].split('\n')
if len(message) > 1:
content += " ".join(message[1:])+ "\n"
else:
content += message[0] + "\n"
print(content)
import jieba
ls = jieba.lcut(content)
# 如果分词出来只有一个字符剔除这个字符
ls = [i for i in ls if len(i) > 1]
text = ' '.join(ls)
print(text)
wc = wordcloud.WordCloud(
font_path="SIMSUN.ttf",
width=1000,
height=1000,
background_color="skyblue",
max_words=200)
wc.generate(text)
wc.to_file("result.png")
import wordcloud
# 引入停止词模块
from wordcloud import WordCloud, STOPWORDS
stopwords = STOPWORDS
# 添加新的停止词
stopwords.update(['微笑','撇嘴','色','发呆','得意','流泪','害羞','闭嘴','睡','大哭','尴尬','发怒','调皮','呲牙','惊讶','难过','囧','抓狂','吐','偷笑','愉快','白眼','傲慢','困','惊恐','憨笑','悠闲','咒骂','疑问','嘘','晕','衰','骷髅','敲打','再见','擦汗','抠鼻','鼓掌','坏笑','右哼哼','鄙视','委屈','快哭了','阴险','亲亲','可怜','笑脸','生病','脸红','破涕为笑','恐惧','失望','无语','嘿哈','捂脸','奸笑','机智','皱眉','耶','吃瓜','加油','汗','天啊','Emm','社会社会','旺柴','好的','打脸','哇','翻白眼','666','让我看看','叹气','苦涩','裂开','嘴唇','爱心','心碎','拥抱','强','弱','握手','胜利','抱拳','勾引','拳头','OK','合十','啤酒','咖啡','蛋糕','玫瑰','凋谢','菜刀','炸弹','便便','月亮','太阳','庆祝','礼物','红包','發','福','烟花','爆竹','猪头','跳跳','发抖','转圈'])
wc = wordcloud.WordCloud(
font_path="SIMSUN.ttf",
width=1000,
height=1000,
background_color="skyblue",
max_words=200,
# 配置停止词参数
stopwords=stopwords)
wc.generate(text)
wc.to_file("resultStop.png")
import jieba
ls = jieba.lcut(content)
# 如果分词出来只有一个字符剔除这个字符
ls = [i for i in ls if len(i) > 1]
text = ' '.join(ls)
# 引入词云
import wordcloud
# 引入停止词模块
from wordcloud import WordCloud, STOPWORDS
stopwords = STOPWORDS
# 添加新的停止词
stopwords.update(['微笑','撇嘴','色','发呆','得意','流泪','害羞','闭嘴','睡','大哭','尴尬','发怒','调皮','呲牙','惊讶','难过','囧','抓狂','吐','偷笑','愉快','白眼','傲慢','困','惊恐','憨笑','悠闲','咒骂','疑问','嘘','晕','衰','骷髅','敲打','再见','擦汗','抠鼻','鼓掌','坏笑','右哼哼','鄙视','委屈','快哭了','阴险','亲亲','可怜','笑脸','生病','脸红','破涕为笑','恐惧','失望','无语','嘿哈','捂脸','奸笑','机智','皱眉','耶','吃瓜','加油','汗','天啊','Emm','社会社会','旺柴','好的','打脸','哇','翻白眼','666','让我看看','叹气','苦涩','裂开','嘴唇','爱心','心碎','拥抱','强','弱','握手','胜利','抱拳','勾引','拳头','OK','合十','啤酒','咖啡','蛋糕','玫瑰','凋谢','菜刀','炸弹','便便','月亮','太阳','庆祝','礼物','红包','發','福','烟花','爆竹','猪头','跳跳','发抖','转圈'])
# 引入 imageio 读取爱心图片文件
from imageio.v2 import imread
# 读取爱心图片文件并赋值给 background 变量
background = imread('heart.png')
wc = wordcloud.WordCloud(
font_path="SIMSUN.ttf",
width=1000,
height=1000,
# 词云背景颜色
background_color="skyblue",
max_words=200,
# 配置停止词参数
stopwords=stopwords,
# 配置爱心图片遮罩
mask = background)
wc.generate(text)
wc.to_file("resultHeart.png")
background_color="skyblue"
'[微笑]','[撇嘴]','[色]','[发呆]','[得意]','[流泪]','[害羞]','[闭嘴]','[睡]','[大哭]','[尴尬]','[发怒]','[调皮]','[呲牙]','[惊讶]','[难过]','[囧]','[抓狂]','[吐]','[偷笑]','[愉快]','[白眼]','[傲慢]','[困]','[惊恐]','[憨笑]','[悠闲]','[咒骂]','[疑问]','[嘘]','[晕]','[衰]','[骷髅]','[敲打]','[再见]','[擦汗]','[抠鼻]','[鼓掌]','[坏笑]','[右哼哼]','[鄙视]','[委屈]','[快哭了]','[阴险]','[亲亲]','[可怜]','[笑脸]','[生病]','[脸红]','[破涕为笑]','[恐惧]','[失望]','[无语]','[嘿哈]','[捂脸]','[奸笑]','[机智]','[皱眉]','[耶]','[吃瓜]','[加油]','[汗]','[天啊]','[Emm]','[社会社会]','[旺柴]','[好的]','[打脸]','[哇]','[翻白眼]','[666]','[让我看看]','[叹气]','[苦涩]','[裂开]','[嘴唇]','[爱心]','[心碎]','[拥抱]','[强]','[弱]','[握手]','[胜利]','[抱拳]','[勾引]','[拳头]','[OK]','[合十]','[啤酒]','[咖啡]','[蛋糕]','[玫瑰]','[凋谢]','[菜刀]','[炸弹]','[便便]','[月亮]','[太阳]','[庆祝]','[礼物]','[红包]','[發]','[福]','[烟花]','[爆竹]','[猪头]','[跳跳]','[发抖]','[转圈]'
stickers = ['[微笑]','[撇嘴]','[色]','[发呆]','[得意]','[流泪]','[害羞]','[闭嘴]','[睡]','[大哭]','[尴尬]','[发怒]','[调皮]','[呲牙]','[惊讶]','[难过]','[囧]','[抓狂]','[吐]','[偷笑]','[愉快]','[白眼]','[傲慢]','[困]','[惊恐]','[憨笑]','[悠闲]','[咒骂]','[疑问]','[嘘]','[晕]','[衰]','[骷髅]','[敲打]','[再见]','[擦汗]','[抠鼻]','[鼓掌]','[坏笑]','[右哼哼]','[鄙视]','[委屈]','[快哭了]','[阴险]','[亲亲]','[可怜]','[笑脸]','[生病]','[脸红]','[破涕为笑]','[恐惧]','[失望]','[无语]','[嘿哈]','[捂脸]','[奸笑]','[机智]','[皱眉]','[耶]','[吃瓜]','[加油]','[汗]','[天啊]','[Emm]','[社会社会]','[旺柴]','[好的]','[打脸]','[哇]','[翻白眼]','[666]','[让我看看]','[叹气]','[苦涩]','[裂开]','[嘴唇]','[爱心]','[心碎]','[拥抱]','[强]','[弱]','[握手]','[胜利]','[抱拳]','[勾引]','[拳头]','[OK]','[合十]','[啤酒]','[咖啡]','[蛋糕]','[玫瑰]','[凋谢]','[菜刀]','[炸弹]','[便便]','[月亮]','[太阳]','[庆祝]','[礼物]','[红包]','[發]','[福]','[烟花]','[爆竹]','[猪头]','[跳跳]','[发抖]','[转圈]']
# 除了微信自带的表情,添加一些常用的 Emoji 表情,除此之外还可以添加一些想要统计的单词
stickers.extend(['😂','🤣','🥰','😅','🥹','😘','🤩','🥺','😓','🙄','🤡', '哈哈'])
stickers_dict = {stickers: 0 for stickers in stickers}
print(json.dumps(stickers_dict, ensure_ascii=False, indent=4))
遍历 JSON 统计表情出现次数
data = data.read()
data = json.loads(data)
# 循环所有的消息记录
for item in data:
# 循环表情字典里的所有键
for word in stickers_dict.keys():
# 使用文本的 count 函数,计算里面包含了多少个 word 变量,然后加回去
stickers_dict[word] += item['msgContent'].count(word)
print(json.dumps(stickers_dict, ensure_ascii=False, indent=4))
data = {key: value for key, value in stickers_dict.items() if value != 0}
print(json.dumps(data, ensure_ascii=False, indent=4))
# 使用 DataFrame.from_dict() 将字典转换为 DataFrame
df = pd.DataFrame.from_dict(data, orient='index', columns=['数量'])
# 添加表情列
df['表情'] = df.index
# 按数量降序排序
df = df.sort_values(by='数量', ascending=False)
# 重置索引
df = df[['表情', '数量']]
df = df.reset_index(drop=True)
# 保存 DataFrame 到 Excel 文件
df.to_excel('~/Desktop/emoji_data.xlsx', index=False)
print("数据已保存到 ~/Desktop/emoji_data.xlsx 文件中。")
时间分段聊天频率分析
import os
import json
import datetime
data = open('/Users/james/Desktop/WeChatDB/db2/Chat_3c0825dcf3b568028bcf00ee45656d60.json').read()
data = json.loads(data)
dict = {}
for item in data:
# 获取消息发送时间
unixtime = (item['msgCreateTime'])
print(unixtime)
mplfonts init
mplfonts install --update SIMSUN.ttf
# 导入字体管理模块
from mplfonts import use_font
#指定中文字体(这里的中文字体根据实际情况修改)
#use_font('SIMSUN')
use_font('Noto Serif CJK SC')
axis_x=[i[:2]+'\n点\n'+i[2:] for i in result]
axis_y=[result[i] for i in result]
plt.bar(axis_x,axis_y)
plt.xlabel("时间段")
plt.ylabel("消息数量")
plt.title("蓝翼运动大众群")
plt.show()
# 导入字体管理模块
from mplfonts import use_font
#指定中文字体(这里的中文字体根据实际情况修改)
#use_font('SIMSUN')
use_font('Noto Serif CJK SC')
axis_x=[i[:2]+'\n点\n'+i[2:] for i in result]
axis_y=[result[i] for i in result]
plt.bar(axis_x,axis_y)
plt.xlabel("时间段")
plt.ylabel("消息数量")
plt.title("蓝翼运动大众群")
plt.show()
🔥
聊天频率热图
import numpy as np
import matplotlib.pyplot as plt
# 导入字体管理模块
from mplfonts import use_font
#指定中文字体(这里的中文字体根据实际情况修改)
use_font('SIMSUN')
x_labels = ["周一", "周二", "周三", "周四", "周五", "周六", "周日"]
y_labels = [202327, 202328, 202329, 202330]
values = np.array([
[0, 0, 0, 0, 0, 38, 0],
[7, 3, 0, 0, 28, 0, 31],
[20, 0, 0, 16, 0, 0, 0],
[0, 0, 78, 10, 7, 0, 0]
])
fig, axe = plt.subplots(figsize=(8, 3))
axe.set_xticks(np.arange(len(x_labels)))
axe.set_yticks(np.arange(len(y_labels)))
axe.set_xticklabels(x_labels)
axe.set_yticklabels(y_labels)
im = axe.imshow(values, cmap=plt.cm.Greens)
axe.figure.colorbar(im, ax=axe)
plt.show()
Y 轴:哪一年的哪一周,即年和周数
Value 数据:星期几发了多少条消息
import os
import json
import datetime
data = open('/Users/james/Desktop/WeChatDB/db2/Chat_55ce213c2ad742d8ea91bf2c5ee282fb.json').read()
data = json.loads(data)
statistics_dict = {}
for item in data:
unixtime = item.get('msgCreateTime')
if isinstance(unixtime, int):
# key 格式:年份 周数
week = int(datetime.datetime.fromtimestamp(unixtime).strftime('%Y%W')) + 1
print(week)
202328: [7, 3, 0, 0, 28, 0, 31],
202329: [20, 0, 0, 16, 0, 0, 0],
202330: [0, 0, 78, 10, 7, 0, 0]
}
unixtime = item.get('msgCreateTime')
if isinstance(unixtime, int):
# key 格式:年份.周
week = int(datetime.datetime.fromtimestamp(unixtime).strftime('%Y%W')) + 1
# 判断「年份周数」key 是否存在
if statistics_dict.get(week) is None:
# 如果不存在,初始化这周的数据为全 0
statistics_dict[week] = [0, 0, 0, 0, 0, 0, 0]
# 对应的天数 +1
day = int(datetime.datetime.fromtimestamp(unixtime).strftime('%w'))
statistics_dict[week][day] = statistics_dict[week][day] + 1
print(statistics_dict)
import numpy as np
import matplotlib.pyplot as plt
from mplfonts import use_font
use_font('SimHei')
x_labels = ["周一", "周二", "周三", "周四", "周五", "周六", "周日"]
fig, axe = plt.subplots(figsize = (15, 15))
axe.set_xticks(np.arange(len(x_labels)))
axe.set_yticks(np.arange(len(y_labels)))
axe.set_xticklabels(x_labels)
axe.set_yticklabels(y_labels)
im = axe.imshow(values, cmap=plt.cm.Greens)
# 是否开启参考刻度,如果不需要开启,注释下面这行代码
axe.figure.colorbar(im, ax=axe)
plt.savefig('history.png')
import matplotlib.pyplot as plt
print(json.dumps(plt.colormaps(), indent=4))
# ['magma', 'inferno', 'plasma', 'viridis', 'cividis', 'twilight', 'twilight_shifted', 'turbo', 'Blues', 'BrBG', 'BuGn', 'BuPu', 'CMRmap', 'GnBu', 'Greens', 'Greys', 'OrRd', 'Oranges', 'PRGn', 'PiYG', 'PuBu', 'PuBuGn', 'PuOr', 'PuRd', 'Purples', 'RdBu', 'RdGy', 'RdPu', 'RdYlBu', 'RdYlGn', 'Reds', 'Spectral', 'Wistia', 'YlGn', 'YlGnBu', 'YlOrBr', 'YlOrRd', 'afmhot', 'autumn', 'binary', 'bone', 'brg', 'bwr', 'cool', 'coolwarm', 'copper', 'cubehelix', 'flag', 'gist_earth', 'gist_gray', 'gist_heat', 'gist_ncar', 'gist_rainbow', 'gist_stern', 'gist_yarg', 'gnuplot', 'gnuplot2', 'gray', 'hot', 'hsv', 'jet', 'nipy_spectral', 'ocean', 'pink', 'prism', 'rainbow', 'seismic', 'spring', 'summer', 'terrain', 'winter', 'Accent', 'Dark2', 'Paired', 'Pastel1', 'Pastel2', 'Set1', 'Set2', 'Set3', 'tab10', 'tab20', 'tab20b', 'tab20c', 'magma_r', 'inferno_r', 'plasma_r', 'viridis_r', 'cividis_r', 'twilight_r', 'twilight_shifted_r', 'turbo_r', 'Blues_r', 'BrBG_r', 'BuGn_r', 'BuPu_r', 'CMRmap_r', 'GnBu_r', 'Greens_r', 'Greys_r', 'OrRd_r', 'Oranges_r', 'PRGn_r', 'PiYG_r', 'PuBu_r', 'PuBuGn_r', 'PuOr_r', 'PuRd_r', 'Purples_r', 'RdBu_r', 'RdGy_r', 'RdPu_r', 'RdYlBu_r', 'RdYlGn_r', 'Reds_r', 'Spectral_r', 'Wistia_r', 'YlGn_r', 'YlGnBu_r', 'YlOrBr_r', 'YlOrRd_r', 'afmhot_r', 'autumn_r', 'binary_r', 'bone_r', 'brg_r', 'bwr_r', 'cool_r', 'coolwarm_r', 'copper_r', 'cubehelix_r', 'flag_r', 'gist_earth_r', 'gist_gray_r', 'gist_heat_r', 'gist_ncar_r', 'gist_rainbow_r', 'gist_stern_r', 'gist_yarg_r', 'gnuplot_r', 'gnuplot2_r', 'gray_r', 'hot_r', 'hsv_r', 'jet_r', 'nipy_spectral_r', 'ocean_r', 'pink_r', 'prism_r', 'rainbow_r', 'seismic_r', 'spring_r', 'summer_r', 'terrain_r', 'winter_r', 'Accent_r', 'Dark2_r', 'Paired_r', 'Pastel1_r', 'Pastel2_r', 'Set1_r', 'Set2_r', 'Set3_r', 'tab10_r', 'tab20_r', 'tab20b_r', 'tab20c_r']