# Takes a line s and replaces all punctuation marks given # in the list punctuationMarkes by blanks; returns the modified list def filterOutPunctuation(punctuationMarks, s): for mark in punctuationMarks: s = s.replace(mark, " ") return s # Creates from the raw list, dictionaries containing words of length 1, length 2, length 3, length 4, and # a separate dictionary for longer words. In each dictionary the words are the keys and their frequencies # are the values. def processWords(L): wordDict = [{}, {}, {}, {}, {}] for word in L: if len(word) <= 4: wordLength = len(word) else: wordLength = 5 if word in wordDict[wordLength-1]: wordDict[wordLength-1][word] += 1 else: wordDict[wordLength-1][word] = 1 return wordDict # Makes all the words in the wordList have lower case def makeLower(wordList): for i in range(len(wordList)): wordList[i] = wordList[i].lower() #Main program fileNames = ["alice.txt", "carol.txt", "hyde.txt", "war.txt", "gulliver.txt", "treasure.txt"] L = [] punctuationMarks = map(chr, range(0, ord("A")) + range(ord("Z")+1, ord("a")) + range(ord("z")+1, 127)) # Loop that processes all 6 input text files for name in fileNames: f = open(name) bigString = f.read() # read the entire text file in one go bigString = filterOutPunctuation(punctuationMarks, bigString) wordList = bigString.split() makeLower(wordList) L.extend(wordList) f.close() # Turn the raw word list into useful dictionaries wordList = processWords(L) # Block of code that produces output f = open("project1Output.txt", "w") for item in wordList: sortedWords = [] # Turn a dictionary into a list of size-2 sublists in which the # frequency appears first and the word appear next for k in item: sortedWords.append([item[k], k]) # Sort by frequencies, since they show up first sortedWords.sort() # Print for item in sortedWords: f.write(item[1] + " " + str(item[0]) + "\n") f.close()