lab 11

docx

School

Arizona State University *

*We aren’t endorsed by this school

Course

360

Subject

Computer Science

Date

Dec 6, 2023

Type

docx

Pages

6

Uploaded by ProfStraw17234

Report
Jordan Whitecar IFT 360 Lab 11 # -*- coding: utf-8 -*- """ Created on Mon Nov 27 15:36:53 2023 @author: jorda """ import string import re fp = open('text_only.csv', 'r') clean_text = [] for line in fp: #remove 's line = line.replace(' \'s', '') #string.punctuation contains all special characters. #this for loop removes all special characters for i in string.punctuation: line = line.replace(i,'') #removing special characters results in some extra white spaces. #replace mutliple white spaces with a single white space #use a regular expression (re) to define the multiple space pattern line = re.sub(' +', ' ', line) #remove the last two characters of the line which are: white space, new line break
line = line[:-2] #convert into all lower-case letters line = line.lower() #add line to cleaned data clean_text.append(line) unigrams = {} bigrams = {} trigrams = {} quadgrams = {} unigram_cnts = {} bigram_cnts = {} trigram_cnts = {} quadgram_cnts = {} #word suggestion indices one_wrd_sug = {} two_wrd_sug = {} thr_wrd_sug = {} for t in clean_text: #split the words in each sentence words = [i for i in t.split(' ')] #add single words to list of unigrams for w in words:
unigrams[w] = 0 unigram_cnts[w] = unigram_cnts.get(w,0)+1 #add every two successive words to the list of bigrams bi = zip( words[:-1] , words[1:] ) for b in bi: x=' '.join(b) bigrams[x] = 0 bigram_cnts[x] = bigram_cnts.get(x, 0)+1 #add entry to one word suggestion list if bigram_cnts[x]==1: if b[0] in one_wrd_sug: (one_wrd_sug[b[0]]).append(x) else: one_wrd_sug[b[0]] = [x] #add every three successive words to the list of trigrams tri = zip( words[:-2] , words[1:-1], words[2:] ) for t in tri: x = ' '.join(t) trigrams[x] = 0 trigram_cnts[x] = trigram_cnts.get(x, 0)+1 #add entry to two-word suggestion list if trigram_cnts[x]==1: k= t[0]+" "+t[1] if k in two_wrd_sug: (two_wrd_sug[k]).append(x) else:
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
two_wrd_sug[k] = [x] #add every four successive words to the list of quadgrams quad = zip( words[:-3] , words[1:-2], words[2:-1], words[3:] ) for q in quad: x = ' '.join(q) quadgrams[x] = 0 quadgram_cnts[x] = quadgram_cnts.get(x, 0)+1 #add entry to three-word suggestion list if quadgram_cnts[x]==1: k= q[0]+" "+q[1] if k in thr_wrd_sug: (thr_wrd_sug[k]).append(x) else: thr_wrd_sug[k] = [x] #print some sample words for which a suggestion can be made thr_wrds=['goldman sachs said', 'china based operation',] print("These are some of the two-word phrases that the agent can suggest a word after:") for i in thr_wrds: print(i) print("==================================") #print some sample words for which a suggestion can be made two_wrds=['goldman sachs', 'similar to', 'deal includes', 'private equity', 'china and', 'the longterm', 'kind of'] print("These are some of the two-word phrases that the agent can suggest a word after:") for i in two_wrds: print(i)
print("==================================") one_wrd = ['longer', 'identity', 'know', 'protocol', 'realtime', 'motorola', 'highquality', 'defense'] print("These are some of the one-word phrases that the agent can suggest a word after:") for i in one_wrd: print(i) print("==================================") #ask the user to enter a word, output a couple of suggestions wrd = input('enter one word to give you a suggestion for the following word: ') if wrd in one_wrd_sug: print('Suggestions:') for i in one_wrd_sug[wrd]: print(i) #ask the user to enter two words, output a couple of suggestions wrd = input('enter two words to give you a suggestion for the following word: ') if wrd in two_wrd_sug: print('Suggestions:') for i in two_wrd_sug[wrd]: print(i) #ask the user to enter three words, output a couple of suggestions wrd = input('enter three words to give you a suggestion for the following word: ') if wrd in thr_wrd_sug: print('Suggestions:') for i in thr_wrd_sug[wrd]: print(i)
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help