''' 20210927 R. Dawes Dictionaries - Part 3 Adds tkinter to create nicer output Uses "stop words" to eliminate trivial words from the text being analyzed Introduces a set to contain the stop words ''' import tkinter as tk def letters_only(w): ''' remove all non-alpha characters from a string parameter: w - string return : string containing only the alpha characters in w ''' new_word = '' for c in w: if c.isalpha(): new_word += c return new_word # main stop_words_file = open('Data/StopWords/StopWords.txt','r') # create an empty set stop_words = set() # add all the stop words to the set for line in stop_words_file: word = letters_only(line) stop_words.add(word) infile = open('Data/Journey_Centre_Earth.txt','r') word_counts = {} for line in infile: line_words = line.split() for word in line_words: word = word.lower() word = letters_only(word) # ignore word if it is a stop word if word not in stop_words: if word in word_counts: word_counts[word] += 1 else: word_counts[word] = 1 most_frequent_word = '' highest_frequency = 0 for w,c in iter(word_counts.items()): if c > highest_frequency: highest_frequency = c most_frequent_word = w # use tkinter to show the most frequent word window = tk.Tk() window.geometry("1200x400") window.title("Most frequent word") # place the output in a Text widget show_text = tk.Text(window,height=5,width=120,font=(None,20)) show_text.grid(row=0, column=0) show_text.insert(tk.END, "The most frequent significant word in 'Journey to the Centre of the Earth' is \n\n\t\t" + most_frequent_word+ "\n\nwhich occurs "+str(highest_frequency)+" times") # start tkinter window.mainloop()