I need help shorting my code! what can I do to write a cleaner code, or just reduce the paragraphs in my code by doing a list or loop? Can you revise my code and then give me an example code. Here's the code I did. # importing the PyPDF2 module import PyPDF2 # This is to get the sentence from the pdf files library import re # creating a pdf file object and giving loaction of pdf file pdfFileobj=open('C:/Users/jalej/Downloads/new artcle.pdf','rb') # creating a pdf reader object pdfReader=PyPDF2.PdfFileReader(pdfFileobj) # creating a page object pageObj=pdfReader.getPage(0) # extracting text from page print(pageObj.extractText()) # finally closing the pdf file object pdfFileobj.close() from pdfminer.high_level import extract_text # extracting the text from page numbers 1-8 result = extract_text('C:/Users/jalej/Downloads/new artcle.pdf',page_numbers=[0,1,2,3,4,5,6,7]) print(result) text = pageObj.extractText() # Getting the MHz from the pdf file searched_parameter = 'MHz' number_of_ocurrences = result.count(searched_parameter) print(number_of_ocurrences) sentences = re.findall(r'([^.]+MHz+[^.](?:.\d+)?)', result) print(sentences) # Getting the KHz from the pdf file searched_parameter = 'KHz' number_of_ocurrences = result.count(searched_parameter) print(number_of_ocurrences) sentences = re.findall(r'([^.]+KHz+[^.](?:.\d+)?)', result) print(sentences) # Getting the keyword of frequency from the pdf file searched_parameter = 'center frequency' number_of_ocurrences = result.count(searched_parameter) print(number_of_ocurrences) sentences = re.findall(r'([^.]+center frequency+[^.](?:.\d+)?)', result) print(sentences) # Getting the Intensity from the pdf file searched_parameter = 'Isppa' number_of_ocurrences = result.count(searched_parameter) print(number_of_ocurrences) sentences = re.findall(r'([^.]+Isppa+[^.](?:.\d+)?)', result) print(sentences) # Getting the Intensity from the pdf file searched_parameter = 'W/cm^2' number_of_ocurrences = result.count(searched_parameter) print(number_of_ocurrences) sentences = re.findall(r'([^.]+ W/cm^2+[^.](?:.\d+)?)', result) print(sentences) # Getting the Intensity from the pdf file searched_parameter = 'I_spta' number_of_ocurrences = result.count(searched_parameter) print(number_of_ocurrences) sentences = re.findall(r'([^.]+I_spta+[^.](?:.\d+)?)', result) print(sentences) # Getting the Intensity from the pdf file searched_parameter = 'Intensity' number_of_ocurrences = result.count(searched_parameter) print(number_of_ocurrences) sentences = re.findall(r'([^.]+Intensity+[^.](?:.\d+)?)', result) print(sentences) # Getting the pressure from the pdf file searched_parameter = 'MPa' number_of_ocurrences = result.count(searched_parameter) print(number_of_ocurrences) sentences = re.findall(r'([^.]+MPa+[^.](?:.\d+)?)', result) print(sentences)
I need help shorting my code! what can I do to write a cleaner code, or just reduce the paragraphs in my code by doing a list or loop? Can you revise my code and then give me an example code. Here's the code I did.
# importing the PyPDF2 module
import PyPDF2
# This is to get the sentence from the pdf files library
import re
# creating a pdf file object and giving loaction of pdf file
pdfFileobj=open('C:/Users/jalej/Downloads/new artcle.pdf','rb')
# creating a pdf reader object
pdfReader=PyPDF2.PdfFileReader(pdfFileobj)
# creating a page object
pageObj=pdfReader.getPage(0)
# extracting text from page
print(pageObj.extractText())
# finally closing the pdf file object
pdfFileobj.close()
from pdfminer.high_level import extract_text
# extracting the text from page numbers 1-8
result = extract_text('C:/Users/jalej/Downloads/new artcle.pdf',page_numbers=[0,1,2,3,4,5,6,7])
print(result)
text = pageObj.extractText()
# Getting the MHz from the pdf file
searched_parameter = 'MHz'
number_of_ocurrences = result.count(searched_parameter)
print(number_of_ocurrences)
sentences = re.findall(r'([^.]+MHz+[^.](?:.\d+)?)', result)
print(sentences)
# Getting the KHz from the pdf file
searched_parameter = 'KHz'
number_of_ocurrences = result.count(searched_parameter)
print(number_of_ocurrences)
sentences = re.findall(r'([^.]+KHz+[^.](?:.\d+)?)', result)
print(sentences)
# Getting the keyword of frequency from the pdf file
searched_parameter = 'center frequency'
number_of_ocurrences = result.count(searched_parameter)
print(number_of_ocurrences)
sentences = re.findall(r'([^.]+center frequency+[^.](?:.\d+)?)', result)
print(sentences)
# Getting the Intensity from the pdf file
searched_parameter = 'Isppa'
number_of_ocurrences = result.count(searched_parameter)
print(number_of_ocurrences)
sentences = re.findall(r'([^.]+Isppa+[^.](?:.\d+)?)', result)
print(sentences)
# Getting the Intensity from the pdf file
searched_parameter = 'W/cm^2'
number_of_ocurrences = result.count(searched_parameter)
print(number_of_ocurrences)
sentences = re.findall(r'([^.]+ W/cm^2+[^.](?:.\d+)?)', result)
print(sentences)
# Getting the Intensity from the pdf file
searched_parameter = 'I_spta'
number_of_ocurrences = result.count(searched_parameter)
print(number_of_ocurrences)
sentences = re.findall(r'([^.]+I_spta+[^.](?:.\d+)?)', result)
print(sentences)
# Getting the Intensity from the pdf file
searched_parameter = 'Intensity'
number_of_ocurrences = result.count(searched_parameter)
print(number_of_ocurrences)
sentences = re.findall(r'([^.]+Intensity+[^.](?:.\d+)?)', result)
print(sentences)
# Getting the pressure from the pdf file
searched_parameter = 'MPa'
number_of_ocurrences = result.count(searched_parameter)
print(number_of_ocurrences)
sentences = re.findall(r'([^.]+MPa+[^.](?:.\d+)?)', result)
print(sentences)
data:image/s3,"s3://crabby-images/00039/00039eaf710a9765f6db01fc5b9812260bf5cade" alt=""
Step by step
Solved in 2 steps
data:image/s3,"s3://crabby-images/e0cbe/e0cbe7c1cfa79a285a06530332b315bcf077d9a4" alt="Blurred answer"
data:image/s3,"s3://crabby-images/60092/600925f3c879aa48326d2697cc12cbd501c16012" alt="Database System Concepts"
data:image/s3,"s3://crabby-images/b5b1d/b5b1d5cf4b4f0b9fa5f7299e517dda8c78973ae2" alt="Starting Out with Python (4th Edition)"
data:image/s3,"s3://crabby-images/861e9/861e9f01dc31d6a60742dd6c59ed7da7e28cd75d" alt="Digital Fundamentals (11th Edition)"
data:image/s3,"s3://crabby-images/60092/600925f3c879aa48326d2697cc12cbd501c16012" alt="Database System Concepts"
data:image/s3,"s3://crabby-images/b5b1d/b5b1d5cf4b4f0b9fa5f7299e517dda8c78973ae2" alt="Starting Out with Python (4th Edition)"
data:image/s3,"s3://crabby-images/861e9/861e9f01dc31d6a60742dd6c59ed7da7e28cd75d" alt="Digital Fundamentals (11th Edition)"
data:image/s3,"s3://crabby-images/134f1/134f1b748b071d72903e45f776c363a56b72169f" alt="C How to Program (8th Edition)"
data:image/s3,"s3://crabby-images/3a774/3a774d976e0979e81f9a09e78124a494a1b36d93" alt="Database Systems: Design, Implementation, & Manag…"
data:image/s3,"s3://crabby-images/307b2/307b272f255471d7f7dc31378bac8a580ae1c49c" alt="Programmable Logic Controllers"