#word_freq.py
#make a frequency distribution of the words in a file.

import re

fname = input("Name of existing text file to read: ")    
inf = open(fname, "r")
corpus = inf.read()

corpus = corpus.replace("\n",' ')  #change newlines to space
corpus = corpus.lower()            #lowercase all
corpus = re.sub("[^a-z ]",'',corpus)   #delete if not letter or space
#corpus = re.sub("  *",' ',corpus)   #replace run of spaces with single space
#print(corpus)

word_list = corpus.split()  
#print(word_list)

histo_dict = {}
for word in word_list:
    if word in histo_dict:
        histo_dict[word] += 1
    else:
        histo_dict.update({word: 1})

#print(histo_dict.items())
#print(type(histo_dict.items()))   # is a dict_items
word_items_list =  list(histo_dict.items())  #make a list of it

"""print(word_items_list)
print(type(word_items_list))   # is a list
print(type(word_items_list[3]))   # is a tuple
print(word_items_list[3])   # a tuple
print(word_items_list[3][0])   # first thing of the tuple, the word
print(word_items_list[3][1])   # second thing of the tuple, the count
"""

#sort key function. Sort on count 
def sort_tuple_count(t):     
    return t[1]

sort_how = input("Sort alphabetically (a) or descending numerically (n): ")
if sort_how == 'a':
    word_items_list.sort()
else:
    #word_items_list.sort(reverse=True,key=sort_tuple_count)
    word_items_list.sort(reverse=True,key=lambda t: t[1])
    
#print(word_items_list)
for tup_word in word_items_list:
    print(tup_word[0], tup_word[1])