I need help shorting my code! what can I do to write a cleaner code, or just reduce the paragraphs in my code by doing a list or loop? Can you revise my code and then give me an example code. Here's the code I did. # importing the PyPDF2 module import PyPDF2 # This is to get the sentence from the pdf files library import re # creating a pdf file object and giving loaction of pdf file pdfFileobj=open('C:/Users/jalej/Downloads/new artcle.pdf','rb') # creating a pdf reader object pdfReader=PyPDF2.PdfFileReader(pdfFileobj) # creating a page object pageObj=pdfReader.getPage(0) # extracting text from page print(pageObj.extractText()) # finally closing the pdf file object pdfFileobj.close() from pdfminer.high_level import extract_text # extracting the text from page numbers 1-8 result = extract_text('C:/Users/jalej/Downloads/new artcle.pdf',page_numbers=[0,1,2,3,4,5,6,7]) print(result) text = pageObj.extractText() # Getting the MHz from the pdf file searched_parameter = 'MHz' number_of_ocurrences = result.count(searched_parameter) print(number_of_ocurrences) sentences = re.findall(r'([^.]+MHz+[^.](?:.\d+)?)', result) print(sentences) # Getting the KHz from the pdf file searched_parameter = 'KHz' number_of_ocurrences = result.count(searched_parameter) print(number_of_ocurrences) sentences = re.findall(r'([^.]+KHz+[^.](?:.\d+)?)', result) print(sentences) # Getting the keyword of frequency from the pdf file searched_parameter = 'center frequency' number_of_ocurrences = result.count(searched_parameter) print(number_of_ocurrences) sentences = re.findall(r'([^.]+center frequency+[^.](?:.\d+)?)', result) print(sentences) # Getting the Intensity from the pdf file searched_parameter = 'Isppa' number_of_ocurrences = result.count(searched_parameter) print(number_of_ocurrences) sentences = re.findall(r'([^.]+Isppa+[^.](?:.\d+)?)', result) print(sentences) # Getting the Intensity from the pdf file searched_parameter = 'W/cm^2' number_of_ocurrences = result.count(searched_parameter) print(number_of_ocurrences) sentences = re.findall(r'([^.]+ W/cm^2+[^.](?:.\d+)?)', result) print(sentences) # Getting the Intensity from the pdf file searched_parameter = 'I_spta' number_of_ocurrences = result.count(searched_parameter) print(number_of_ocurrences) sentences = re.findall(r'([^.]+I_spta+[^.](?:.\d+)?)', result) print(sentences) # Getting the Intensity from the pdf file searched_parameter = 'Intensity' number_of_ocurrences = result.count(searched_parameter) print(number_of_ocurrences) sentences = re.findall(r'([^.]+Intensity+[^.](?:.\d+)?)', result) print(sentences) # Getting the pressure from the pdf file searched_parameter = 'MPa' number_of_ocurrences = result.count(searched_parameter) print(number_of_ocurrences) sentences = re.findall(r'([^.]+MPa+[^.](?:.\d+)?)', result) print(sentences)
I need help shorting my code! what can I do to write a cleaner code, or just reduce the paragraphs in my code by doing a list or loop? Can you revise my code and then give me an example code. Here's the code I did.
# importing the PyPDF2 module
import PyPDF2
# This is to get the sentence from the pdf files library
import re
# creating a pdf file object and giving loaction of pdf file
pdfFileobj=open('C:/Users/jalej/Downloads/new artcle.pdf','rb')
# creating a pdf reader object
pdfReader=PyPDF2.PdfFileReader(pdfFileobj)
# creating a page object
pageObj=pdfReader.getPage(0)
# extracting text from page
print(pageObj.extractText())
# finally closing the pdf file object
pdfFileobj.close()
from pdfminer.high_level import extract_text
# extracting the text from page numbers 1-8
result = extract_text('C:/Users/jalej/Downloads/new artcle.pdf',page_numbers=[0,1,2,3,4,5,6,7])
print(result)
text = pageObj.extractText()
# Getting the MHz from the pdf file
searched_parameter = 'MHz'
number_of_ocurrences = result.count(searched_parameter)
print(number_of_ocurrences)
sentences = re.findall(r'([^.]+MHz+[^.](?:.\d+)?)', result)
print(sentences)
# Getting the KHz from the pdf file
searched_parameter = 'KHz'
number_of_ocurrences = result.count(searched_parameter)
print(number_of_ocurrences)
sentences = re.findall(r'([^.]+KHz+[^.](?:.\d+)?)', result)
print(sentences)
# Getting the keyword of frequency from the pdf file
searched_parameter = 'center frequency'
number_of_ocurrences = result.count(searched_parameter)
print(number_of_ocurrences)
sentences = re.findall(r'([^.]+center frequency+[^.](?:.\d+)?)', result)
print(sentences)
# Getting the Intensity from the pdf file
searched_parameter = 'Isppa'
number_of_ocurrences = result.count(searched_parameter)
print(number_of_ocurrences)
sentences = re.findall(r'([^.]+Isppa+[^.](?:.\d+)?)', result)
print(sentences)
# Getting the Intensity from the pdf file
searched_parameter = 'W/cm^2'
number_of_ocurrences = result.count(searched_parameter)
print(number_of_ocurrences)
sentences = re.findall(r'([^.]+ W/cm^2+[^.](?:.\d+)?)', result)
print(sentences)
# Getting the Intensity from the pdf file
searched_parameter = 'I_spta'
number_of_ocurrences = result.count(searched_parameter)
print(number_of_ocurrences)
sentences = re.findall(r'([^.]+I_spta+[^.](?:.\d+)?)', result)
print(sentences)
# Getting the Intensity from the pdf file
searched_parameter = 'Intensity'
number_of_ocurrences = result.count(searched_parameter)
print(number_of_ocurrences)
sentences = re.findall(r'([^.]+Intensity+[^.](?:.\d+)?)', result)
print(sentences)
# Getting the pressure from the pdf file
searched_parameter = 'MPa'
number_of_ocurrences = result.count(searched_parameter)
print(number_of_ocurrences)
sentences = re.findall(r'([^.]+MPa+[^.](?:.\d+)?)', result)
print(sentences)

Step by step
Solved in 2 steps









