1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
|
with open('cp.txt','r') as f: content = f.read()
import string content = content.lower() for i in string.punctuation: content = content.replace(i, ' ') wordList = content.split()
noMean = {'a','an','the','i','do','am','you','no','t','m','d','ve'} wordSet = set(wordList) - noMean wordList = list(wordSet)
data = {} for word in wordList:
data[word] = wordList.count(word)
for key in data: print(key,data[key],'次') print("============")
wordList = list(data.items())
''' def takeSecond(elem): # 定义函数,获取每个单词的次数项 return elem[1] wordList.sort(key = takeSecond,reverse = True) '''
wordList.sort(key = lambda x:x[1],reverse = True) print(wordList) print("============")
hist = [] for key, value in data.items(): hist.append([value, key]) hist.sort(reverse = True)
for i in range(20): print(hist[i])
|