'''
20210927

R. Dawes

Dictionaries - Part 3

Adds tkinter to create nicer output

Uses "stop words" to eliminate trivial words from the text
being analyzed

Introduces a set to contain the stop words
'''

import tkinter as tk


def letters_only(w):
   ''' remove all non-alpha characters from a string
   
      parameter:
         w - string
      
      return : string containing only the alpha characters in w
   '''
   new_word = ''
   for c in w:
      if c.isalpha():
         new_word += c
   return new_word

# main

stop_words_file = open('Data/StopWords/StopWords.txt','r')

# create an empty set
stop_words = set()
# add all the stop words to the set
for line in stop_words_file:
   word = letters_only(line)
   stop_words.add(word)

infile = open('Data/Journey_Centre_Earth.txt','r')

word_counts = {}
for line in infile:
   line_words = line.split()
   for word in line_words:
      word = word.lower()
      word = letters_only(word)
      # ignore word if it is a stop word
      if word not in stop_words:
         if word in word_counts:
            word_counts[word] += 1
         else:
            word_counts[word] = 1
            
most_frequent_word = ''
highest_frequency = 0

for w,c in iter(word_counts.items()):
   if c > highest_frequency:
      highest_frequency = c
      most_frequent_word = w
      

# use tkinter to show the most frequent word      
window = tk.Tk()
window.geometry("1200x400")
window.title("Most frequent word")
# place the output in a Text widget
show_text = tk.Text(window,height=5,width=120,font=(None,20))
show_text.grid(row=0, column=0)
show_text.insert(tk.END, "The most frequent significant word in 'Journey to the Centre of the Earth'  is \n\n\t\t" +
                                         most_frequent_word+
                                         "\n\nwhich occurs "+str(highest_frequency)+" times")                           

# start tkinter
window.mainloop()