'''
20210927

R. Dawes

Dictionaries - Part 2
'''


def letters_only(w):
   ''' remove all non-alpha characters from a string
   
      parameter:
         w - string
      
      return : string containing only the alpha characters in w
   '''
   new_word = ''
   for c in w:
      if c.isalpha():
         new_word += c
   return new_word

# main

# open a text file for reading
#       To run this demo, you will need to create the Data
#       directory and copy the txt file into it
infile = open('Data/Journey_Centre_Earth.txt','r')

# create an empty dictionary
word_counts = {}
# iterate through the lines of the file
for line in infile:
   # split the line into a list, splitting on "white space"
   line_words = line.split()
   # iterate through the words in the list
   for word in line_words:
      # convert the word to lower case and remove all punctuation
      word = word.lower()
      word = letters_only(word)
      # ignore short words
      if len(word) > 3:
         # either increment the count for the word, or
         # add the word to the dictionary with a count of 1
         if word in word_counts:
            word_counts[word] += 1
         else:
            word_counts[word] = 1
            
most_frequent_word = ''
highest_frequency = 0

# find the word with the highest frequency by
# iterating through the dictionary
for w,c in iter(word_counts.items()):
   if c > highest_frequency:
      highest_frequency = c
      most_frequent_word = w
      
      
print("The most frequent significant word in 'Journey to the Centre of the Earth' is '", 
           most_frequent_word, "' which appears", highest_frequency, "times")