file='D:/CuteHand/jr_novels/names.txt' #本地文件夹,根据需要修改 #可以使用os模块的添加路径 with open(file) as f: # 去掉结尾的换行符 data = [line.strip() for line in f.readlines()] novels = data[::2] names = data[1::2]
novel_names = {k: v.split() for k, v in zip(novels, names)}
#继续挖掘下倚天屠龙记里面人物出现次数排名 namelist=[name.strip() for name in novel_names['倚天屠龙记']] namelist=''.join(namelist) namelist=namelist.split('、') count = [] num=10#统计前10名
for name in namelist: count.append([name, data.count(name)]) count.sort(key=lambda x: x[1]) _, ax = plt.subplots() numbers = [x[1] for x in count[-num:]] names = [x[0] for x in count[-num:]] ax.barh(range(num), numbers, align='center') ax.set_title('倚天屠龙记', fontsize=14) ax.set_yticks(range(num)) ax.set_yticklabels(names, fontsize=10) plt.show()
#加入功夫和门派数据 file='D:/CuteHand/jr_novels/' with open(file+"kungfu.txt") as f: kungfu_names = [line.strip() for line in f.readlines()] with open(file+"bangs.txt") as f: bang_names = [line.strip() for line in f.readlines()]
#编写文本挖掘可视化函数 #寻找小说出现最多的十大人物 deffind_main_characters(novel): file='D:/CuteHand/jr_novels/' with open(file+'names.txt') as f: df = [line.strip() for line in f.readlines()] novels = df[::2] names = df[1::2] novel_names = {k: v.split() for k, v in zip(novels, names)} with
open(file+'{}.txt'.format(novel)) as f: data = f.read() count = [] namelist=[name.strip() for name in novel_names[novel]] namelist=''.join(namelist) namelist=namelist.split('、') for name in namelist: count.append([name, data.count(name)]) count.sort(key=lambda x: x[1]) _, ax = plt.subplots() num=10 numbers = [x[1] for x in count[-num:]] names = [x[0] for x in count[-num:]] ax.barh(range(num), numbers, align='center') ax.set_title(novel+"出现最多的十大人物", fontsize=16) ax.set_yticks(range(num)) ax.set_yticklabels(names, fontsize=14)
#寻找小说出现最多的十大武功 defkungfu(novel): file='D:/CuteHand/jr_novels/' with open(file+'{}.txt'.format(novel)) as f: df = f.read() namelist=kungfu_names count = [] num=10#统计前10名
for name in namelist: count.append([name, df.count(name)]) count.sort(key=lambda x: x[1]) _, ax = plt.subplots() numbers = [x[1] for x in count[-num:]] names = [x[0] for x in count[-num:]] ax.barh(range(num), numbers, align='center') ax.set_title(novel+"出现最多的十大武功", fontsize=16) ax.set_yticks(range(num)) ax.set_yticklabels(names, fontsize=14)
#寻找小说出现最多的十大门派 defbang(novel): file='D:/CuteHand/jr_novels/' with open(file+'{}.txt'.format(novel)) as f: df = f.read() namelist=bang_names count = [] num=10#统计前10名
for name in namelist: count.append([name, df.count(name)]) count.sort(key=lambda x: x[1]) _, ax = plt.subplots() numbers = [x[1] for x in count[-num:]] names = [x[0] for x in count[-num:]] ax.barh(range(num), numbers, align='center') ax.set_title(novel+"出现最多的十大门派", fontsize=16) ax.set_yticks(range(num)) ax.set_yticklabels(names, fontsize=14)
import gensim import warnings warnings.filterwarnings(action='ignore', category=UserWarning,module='gensim') warnings.filterwarnings(action='ignore', category=FutureWarning,module='gensim') import jieba for _, names in novel_names.items(): for name in names: jieba.add_word(name) file='D:/CuteHand/jr_novels/' with open(file+"kungfu.txt") as f: kungfu_names = [line.strip() for line in f.readlines()] with open(file+"bangs.txt") as f: bang_names = [line.strip() for line in f.readlines()]