#html-to-list1.py import urllib, stripTags url 'https://en.wikisource.org/wiki/John_F._Kennedy%27s_Third_State_of_the_Union_Address' response html text = stripTags.stripTags (html) wordlist = text.split() %3D urllib.urlopen(url) response.read() print((wordlist[0:120])) # save-webpage.py import urllib url 'https://en.wikisource.org/wiki/John_F._Kennedy%27s_Third_State_of_the_Union_Address' response urllib.urlopen(url) %3D webContent = response.read() f = open('Kennedy_Third_SOTU.html', 'w') f.write(webContent) f.close # stripTags.py def stripTags (pageContents): startLoc = pageContents.find("") endLoc = pageContents.rfind ("") pageContents = pageContents[startLoc:endLoc] inside text %3D for char in pageContents: if char inside = 1 :,>. elif (inside 1 and char == '>'): inside = e elif inside =- 1: continue else: text += char return text

Database System Concepts
7th Edition
ISBN:9780078022159
Author:Abraham Silberschatz Professor, Henry F. Korth, S. Sudarshan
Publisher:Abraham Silberschatz Professor, Henry F. Korth, S. Sudarshan
Chapter1: Introduction
Section: Chapter Questions
Problem 1PE
icon
Related questions
Question

Explain what is happening in html-to-list1.py in your own words

#html-to-list1.py
import urllib, stripTags
url
'https://en.wikisource.org/wiki/John_F._Kennedy%27s_Third_State_of_the_Union_Address'
response
html
text = stripTags.stripTags (html)
wordlist = text.split()
%3D
urllib.urlopen(url)
response.read()
print((wordlist[0:120]))
Transcribed Image Text:#html-to-list1.py import urllib, stripTags url 'https://en.wikisource.org/wiki/John_F._Kennedy%27s_Third_State_of_the_Union_Address' response html text = stripTags.stripTags (html) wordlist = text.split() %3D urllib.urlopen(url) response.read() print((wordlist[0:120]))
# save-webpage.py
import urllib
url
'https://en.wikisource.org/wiki/John_F._Kennedy%27s_Third_State_of_the_Union_Address'
response
urllib.urlopen(url)
%3D
webContent =
response.read()
f = open('Kennedy_Third_SOTU.html', 'w')
f.write(webContent)
f.close
# stripTags.py
def stripTags (pageContents):
startLoc = pageContents.find("<p>")
endLoc = pageContents.rfind ("</p>")
pageContents = pageContents[startLoc:endLoc]
inside
text
%3D
for char in pageContents:
if char
inside = 1
:,>.
elif (inside
1 and char ==
'>'):
inside = e
elif inside =- 1:
continue
else:
text += char
return text
Transcribed Image Text:# save-webpage.py import urllib url 'https://en.wikisource.org/wiki/John_F._Kennedy%27s_Third_State_of_the_Union_Address' response urllib.urlopen(url) %3D webContent = response.read() f = open('Kennedy_Third_SOTU.html', 'w') f.write(webContent) f.close # stripTags.py def stripTags (pageContents): startLoc = pageContents.find("<p>") endLoc = pageContents.rfind ("</p>") pageContents = pageContents[startLoc:endLoc] inside text %3D for char in pageContents: if char inside = 1 :,>. elif (inside 1 and char == '>'): inside = e elif inside =- 1: continue else: text += char return text
Expert Solution
trending now

Trending now

This is a popular solution!

steps

Step by step

Solved in 2 steps

Blurred answer
Knowledge Booster
Features of HTML
Learn more about
Need a deep-dive on the concept behind this application? Look no further. Learn more about this topic, computer-science and related others by exploring similar questions and additional content below.
Recommended textbooks for you
Database System Concepts
Database System Concepts
Computer Science
ISBN:
9780078022159
Author:
Abraham Silberschatz Professor, Henry F. Korth, S. Sudarshan
Publisher:
McGraw-Hill Education
Starting Out with Python (4th Edition)
Starting Out with Python (4th Edition)
Computer Science
ISBN:
9780134444321
Author:
Tony Gaddis
Publisher:
PEARSON
Digital Fundamentals (11th Edition)
Digital Fundamentals (11th Edition)
Computer Science
ISBN:
9780132737968
Author:
Thomas L. Floyd
Publisher:
PEARSON
C How to Program (8th Edition)
C How to Program (8th Edition)
Computer Science
ISBN:
9780133976892
Author:
Paul J. Deitel, Harvey Deitel
Publisher:
PEARSON
Database Systems: Design, Implementation, & Manag…
Database Systems: Design, Implementation, & Manag…
Computer Science
ISBN:
9781337627900
Author:
Carlos Coronel, Steven Morris
Publisher:
Cengage Learning
Programmable Logic Controllers
Programmable Logic Controllers
Computer Science
ISBN:
9780073373843
Author:
Frank D. Petruzella
Publisher:
McGraw-Hill Education