#word_freq.py #make a frequency distribution of the words in a file. import re fname = input("Name of existing text file to read: ") inf = open(fname, "r") corpus = inf.read() corpus = corpus.replace("\n",' ') #change newlines to space corpus = corpus.lower() #lowercase all corpus = re.sub("[^a-z ]",'',corpus) #delete if not letter or space #corpus = re.sub(" *",' ',corpus) #replace run of spaces with single space #print(corpus) word_list = corpus.split() #print(word_list) histo_dict = {} for word in word_list: if word in histo_dict: histo_dict[word] += 1 else: histo_dict.update({word: 1}) #print(histo_dict.items()) #print(type(histo_dict.items())) # is a dict_items word_items_list = list(histo_dict.items()) #make a list of it """print(word_items_list) print(type(word_items_list)) # is a list print(type(word_items_list[3])) # is a tuple print(word_items_list[3]) # a tuple print(word_items_list[3][0]) # first thing of the tuple, the word print(word_items_list[3][1]) # second thing of the tuple, the count """ #sort key function. Sort on count def sort_tuple_count(t): return t[1] sort_how = input("Sort alphabetically (a) or descending numerically (n): ") if sort_how == 'a': word_items_list.sort() else: #word_items_list.sort(reverse=True,key=sort_tuple_count) word_items_list.sort(reverse=True,key=lambda t: t[1]) #print(word_items_list) for tup_word in word_items_list: print(tup_word[0], tup_word[1])