How touse Python to implement an alternative approximate match using the Levenshtein edit distance?

Database System Concepts
7th Edition
ISBN:9780078022159
Author:Abraham Silberschatz Professor, Henry F. Korth, S. Sudarshan
Publisher:Abraham Silberschatz Professor, Henry F. Korth, S. Sudarshan
Chapter1: Introduction
Section: Chapter Questions
Problem 1PE
icon
Related questions
Question

How touse Python to implement an alternative approximate match using the Levenshtein edit distance?

```plaintext
# Import necessary libraries
import argparse
import os
import logging
import Levenshtein

# Configure logging
log = logging.getLogger('main_psearch')

# Constants
MIN_WORD = 5                     # Minimum word size in bytes
MAX_WORD = 15                    # Maximum word size in bytes
PREDECESSOR_SIZE = 32            # Bytes to print before match found
WINDOW_SIZE = 128                # Total values to dump when match found

# Name: ParseCommand() Function
def ParseCommandLine():
    # Setup command line argument parsing
    parser = argparse.ArgumentParser('Python Search')
    parser.add_argument('-v', '--verbose', help='enables printing of additional program messages', action='store_true')
    parser.add_argument('-k', '--keywords', type=ValidateFileRead, required=True, help='specify the file containing search words')
    parser.add_argument('-t', '--srchTarget', type=ValidateFileRead, required=True, help='specify the target file to search')
    parser.add_argument('-m', '--chMatrix', type=ValidateRead, required=True, help='specify the weighted matrix file')
    parser.add_argument('-d', '--editDistance', type=ValidateRead, required=True, help='specify distance of word')

    global gl_args
    gl_args = parser.parse_args()
    DisplayMessage("Command Line processed: Successfully")

    return

# Name: ValidateFileRead Function
def ValidateFileRead(theFile):
    # Validate if the path is a valid file
    if not os.path.isfile(theFile):
        raise argparse.ArgumentTypeError('File does not exist')
    # Validate the path is readable
    if os.access(theFile, os.R_OK):
        return theFile
    else:
        raise argparse.ArgumentTypeError('File is not readable')

# Name: DisplayMessage() Function
def DisplayMessage(msg):
    if gl_args.verbose:
        print(msg)

    return

# Name: SearchWords() Function
def SearchWords():
    # Create an empty set of search words
    searchWords = set()
    editDistance = []

    # Attempt to open and read search words
    try:
        fileWords = open(gl_args.keywords)
        for line in fileWords:
            searchWords.add(line.strip())
    except:
        log.error('Keyword File Failure: ' + gl_args.keywords)
        sys.exit()
Transcribed Image Text:```plaintext # Import necessary libraries import argparse import os import logging import Levenshtein # Configure logging log = logging.getLogger('main_psearch') # Constants MIN_WORD = 5 # Minimum word size in bytes MAX_WORD = 15 # Maximum word size in bytes PREDECESSOR_SIZE = 32 # Bytes to print before match found WINDOW_SIZE = 128 # Total values to dump when match found # Name: ParseCommand() Function def ParseCommandLine(): # Setup command line argument parsing parser = argparse.ArgumentParser('Python Search') parser.add_argument('-v', '--verbose', help='enables printing of additional program messages', action='store_true') parser.add_argument('-k', '--keywords', type=ValidateFileRead, required=True, help='specify the file containing search words') parser.add_argument('-t', '--srchTarget', type=ValidateFileRead, required=True, help='specify the target file to search') parser.add_argument('-m', '--chMatrix', type=ValidateRead, required=True, help='specify the weighted matrix file') parser.add_argument('-d', '--editDistance', type=ValidateRead, required=True, help='specify distance of word') global gl_args gl_args = parser.parse_args() DisplayMessage("Command Line processed: Successfully") return # Name: ValidateFileRead Function def ValidateFileRead(theFile): # Validate if the path is a valid file if not os.path.isfile(theFile): raise argparse.ArgumentTypeError('File does not exist') # Validate the path is readable if os.access(theFile, os.R_OK): return theFile else: raise argparse.ArgumentTypeError('File is not readable') # Name: DisplayMessage() Function def DisplayMessage(msg): if gl_args.verbose: print(msg) return # Name: SearchWords() Function def SearchWords(): # Create an empty set of search words searchWords = set() editDistance = [] # Attempt to open and read search words try: fileWords = open(gl_args.keywords) for line in fileWords: searchWords.add(line.strip()) except: log.error('Keyword File Failure: ' + gl_args.keywords) sys.exit()
Expert Solution
Code Snippet

We will use recursion to implement this problem.

#Recursive Levenshtein Function
def Leven(s, t):
    if s == "":
        return len(t)
    if t == "":
        return len(s)
    if s[-1] == t[-1]:
        c = 0
    else:
        c = 1
       
    r = min([Leven(s[:-1], t)+1,
               Leven(s, t[:-1])+1, 
               Leven(s[:-1], t[:-1]) + c])

    return r

print("minimum edit distance for the words pair of (intention, execution) is : ",Leven("intention", "execution"))
print("minimum edit distance for the words pair of (confuse, refuse) is : ",Leven("confuse", "refuse"))
trending now

Trending now

This is a popular solution!

steps

Step by step

Solved in 2 steps with 2 images

Blurred answer
Knowledge Booster
Structured English
Learn more about
Need a deep-dive on the concept behind this application? Look no further. Learn more about this topic, computer-science and related others by exploring similar questions and additional content below.
Recommended textbooks for you
Database System Concepts
Database System Concepts
Computer Science
ISBN:
9780078022159
Author:
Abraham Silberschatz Professor, Henry F. Korth, S. Sudarshan
Publisher:
McGraw-Hill Education
Starting Out with Python (4th Edition)
Starting Out with Python (4th Edition)
Computer Science
ISBN:
9780134444321
Author:
Tony Gaddis
Publisher:
PEARSON
Digital Fundamentals (11th Edition)
Digital Fundamentals (11th Edition)
Computer Science
ISBN:
9780132737968
Author:
Thomas L. Floyd
Publisher:
PEARSON
C How to Program (8th Edition)
C How to Program (8th Edition)
Computer Science
ISBN:
9780133976892
Author:
Paul J. Deitel, Harvey Deitel
Publisher:
PEARSON
Database Systems: Design, Implementation, & Manag…
Database Systems: Design, Implementation, & Manag…
Computer Science
ISBN:
9781337627900
Author:
Carlos Coronel, Steven Morris
Publisher:
Cengage Learning
Programmable Logic Controllers
Programmable Logic Controllers
Computer Science
ISBN:
9780073373843
Author:
Frank D. Petruzella
Publisher:
McGraw-Hill Education