''' 20210927 R. Dawes Dictionaries - Part 2 ''' def letters_only(w): ''' remove all non-alpha characters from a string parameter: w - string return : string containing only the alpha characters in w ''' new_word = '' for c in w: if c.isalpha(): new_word += c return new_word # main # open a text file for reading # To run this demo, you will need to create the Data # directory and copy the txt file into it infile = open('Data/Journey_Centre_Earth.txt','r') # create an empty dictionary word_counts = {} # iterate through the lines of the file for line in infile: # split the line into a list, splitting on "white space" line_words = line.split() # iterate through the words in the list for word in line_words: # convert the word to lower case and remove all punctuation word = word.lower() word = letters_only(word) # ignore short words if len(word) > 3: # either increment the count for the word, or # add the word to the dictionary with a count of 1 if word in word_counts: word_counts[word] += 1 else: word_counts[word] = 1 most_frequent_word = '' highest_frequency = 0 # find the word with the highest frequency by # iterating through the dictionary for w,c in iter(word_counts.items()): if c > highest_frequency: highest_frequency = c most_frequent_word = w print("The most frequent significant word in 'Journey to the Centre of the Earth' is '", most_frequent_word, "' which appears", highest_frequency, "times")