Q2

School

Georgia Institute Of Technology *

*We aren’t endorsed by this school

Course

6242

Subject

Information Systems

Date

Feb 20, 2024

Type

Pages

Uploaded by ChiefBoar2052

########################### DO NOT MODIFY THIS SECTION ########################## ################################################################################# import sqlite3 from sqlite3 import Error import csv ################################################################################# ## Change to False to disable Sample SHOW = False ############### SAMPLE CLASS AND SQL QUERY ########################### ###################################################################### class Sample(): def sample(self): try: connection = sqlite3.connect("sample") connection.text_factory = str except Error as e: print("Error occurred: " + str(e)) print('\033[32m' + "Sample: " + '\033[m') # Sample Drop table connection.execute("DROP TABLE IF EXISTS sample;") # Sample Create connection.execute("CREATE TABLE sample(id integer, name text);") # Sample Insert connection.execute("INSERT INTO sample VALUES (?,?)", ("1", "test_name")) connection.commit() # Sample Select cursor = connection.execute("SELECT * FROM sample;") print(cursor.fetchall()) ###################################################################### class HW2_sql(): ############### DO NOT MODIFY THIS SECTION ########################### ###################################################################### def create_connection(self, path): connection = None try: connection = sqlite3.connect(path) connection.text_factory = str except Error as e: print("Error occurred: " + str(e)) return connection def execute_query(self, connection, query): cursor = connection.cursor() try: if query == "": return "Query Blank" else: cursor.execute(query) connection.commit()

return "Query executed successfully" except Error as e: return "Error occurred: " + str(e) ###################################################################### ###################################################################### # GTusername [0 points] def GTusername(self): gt_username = "zhong61/zhong87" return gt_username # Part a.i Create Tables [2 points] def part_ai_1(self, connection): ############### EDIT SQL STATEMENT ################################### part_ai_1_sql = """ CREATE TABLE IF NOT EXISTS movies ( id INTEGER, title TEXT, score REAL ); """ ###################################################################### return self.execute_query(connection, part_ai_1_sql) def part_ai_2(self, connection): ############### EDIT SQL STATEMENT ################################### part_ai_2_sql = """ CREATE TABLE IF NOT EXISTS movie_cast ( movie_id INTEGER, cast_id INTEGER, cast_name TEXT, birthday TEXT, popularity REAL ); """ ###################################################################### return self.execute_query(connection, part_ai_2_sql) # Part a.ii Import Data [2 points] def part_aii_1(self, connection, path): movies_path = path # Path to movies.csv with open(movies_path, 'r') as file: for line in file: line = line.strip() first_comma = line.find(',') # Position of first comma last_comma = line.rfind(',') # Position of last comma # Extracting id, title, and score if first_comma != -1 and last_comma != -1 and first_comma != last_comma: id = line[:first_comma] title = line[first_comma + 1:last_comma] score = line[last_comma + 1:] connection.execute("INSERT INTO movies (id, title, score) VALUES (?, ?, ?)", (id, title, score))

connection.commit() # Return the count of rows in the movies table sql = "SELECT COUNT(id) FROM movies;" cursor = connection.execute(sql) return cursor.fetchall()[0][0] def part_aii_2(self, connection, path): with open(path, 'r') as file: for line in file: line = line.strip() # Find positions of the first two and last two commas first_comma = line.find(',') second_comma = line.find(',', first_comma + 1) last_comma = line.rfind(',') second_last_comma = line.rfind(',', 0, last_comma) # Extracting movie_id, cast_id, cast_name, birthday, popularity if first_comma != -1 and second_comma != -1 and last_comma != -1 and second_last_comma != -1 and second_comma < second_last_comma: movie_id = line[:first_comma] cast_id = line[first_comma + 1:second_comma] cast_name = line[second_comma + 1:second_last_comma] birthday = line[second_last_comma + 1:last_comma] popularity = line[last_comma + 1:] # Handle 'None' value in birthday if birthday == "None": birthday = None connection.execute( "INSERT INTO movie_cast (movie_id, cast_id, cast_name, birthday, popularity) VALUES (?, ?, ?, ?, ?)", (movie_id, cast_id, cast_name, birthday, popularity)) connection.commit() # Return the count of rows in the movie_cast table sql = "SELECT COUNT(cast_id) FROM movie_cast;" cursor = connection.execute(sql) return cursor.fetchall()[0][0] # Part a.iii Vertical Database Partitioning [5 points] def part_aiii(self, connection): # Create the cast_bio table part_aiii_sql = """ CREATE TABLE IF NOT EXISTS cast_bio ( cast_id INTEGER, cast_name TEXT, birthday TEXT, popularity REAL ); """ self.execute_query(connection, part_aiii_sql) # Insert unique records into cast_bio from movie_cast part_aiii_insert_sql = """

Your preview ends here

Eager to read complete document? Join bartleby learn and gain access to the full version

Access to all documents
Unlimited textbook solutions
24/7 expert homework help

INSERT INTO cast_bio (cast_id, cast_name, birthday, popularity) SELECT DISTINCT cast_id, cast_name, birthday, popularity FROM movie_cast; """ self.execute_query(connection, part_aiii_insert_sql) # Return the count of rows in the cast_bio table sql = "SELECT COUNT(cast_id) FROM cast_bio;" cursor = connection.execute(sql) return cursor.fetchall()[0][0] # Part b Create Indexes [1 points] def part_b_1(self, connection): # Create index on the id column in the movies table part_b_1_sql = """ CREATE INDEX IF NOT EXISTS movie_index ON movies (id); """ return self.execute_query(connection, part_b_1_sql) def part_b_2(self, connection): # Create index on the cast_id column in the movie_cast table part_b_2_sql = """ CREATE INDEX IF NOT EXISTS cast_index ON movie_cast (cast_id); """ return self.execute_query(connection, part_b_2_sql) def part_b_3(self, connection): # Create index on the cast_id column in the cast_bio table part_b_3_sql = """ CREATE INDEX IF NOT EXISTS cast_bio_index ON cast_bio (cast_id); """ return self.execute_query(connection, part_b_3_sql) # Part c Calculate a Proportion [3 points] def part_c(self, connection): # SQL query to calculate the proportion part_c_sql = """ SELECT printf("%.2f", (SELECT COUNT(*) FROM movies WHERE score BETWEEN 7 AND 20) * 100.0 / COUNT(*) ) FROM movies; """ cursor = connection.execute(part_c_sql) return cursor.fetchall()[0][0] # Part d Find the Most Prolific Actors [4 points] def part_d(self, connection): # SQL query to find the most prolific actors part_d_sql = """ SELECT cast_name, COUNT(*) as appearance_count FROM movie_cast WHERE popularity > 10 GROUP BY cast_name ORDER BY appearance_count DESC, cast_name LIMIT 5; """ cursor = connection.execute(part_d_sql)

return cursor.fetchall() # Part e Find the Highest Scoring Movies With the Least Amount of Cast [4 points] def part_e(self, connection): # SQL query to find the highest scoring movies favoring smaller cast size part_e_sql = """ SELECT m.title as movie_title, printf("%.2f", m.score) as score, COUNT(mc.cast_id) as cast_count FROM movies m JOIN movie_cast mc ON m.id = mc.movie_id GROUP BY m.id ORDER BY m.score DESC, cast_count ASC, m.title LIMIT 5; """ cursor = connection.execute(part_e_sql) return cursor.fetchall() # Part f Get High Scoring Actors [4 points] def part_f(self, connection): # SQL query to find the top ten cast members with the highest average movie scores part_f_sql = """ SELECT mc.cast_id, mc.cast_name, printf("%.2f", AVG(m.score)) as average_score FROM movie_cast mc JOIN movies m ON mc.movie_id = m.id WHERE m.score >= 25 GROUP BY mc.cast_id, mc.cast_name HAVING COUNT(m.id) >= 3 ORDER BY average_score DESC, mc.cast_name LIMIT 10; """ cursor = connection.execute(part_f_sql) return cursor.fetchall() # Part g Creating Views [6 points] def part_g(self, connection): # SQL statement to create the good_collaboration view part_g_sql = """ CREATE VIEW IF NOT EXISTS good_collaboration AS SELECT mc1.cast_id as cast_member_id1, mc2.cast_id as cast_member_id2, COUNT(mc1.movie_id) as movie_count, AVG(m.score) as average_movie_score FROM movie_cast mc1 JOIN movie_cast mc2 ON mc1.movie_id = mc2.movie_id AND mc1.cast_id < mc2.cast_id JOIN movies m ON mc1.movie_id = m.id GROUP BY mc1.cast_id, mc2.cast_id HAVING COUNT(mc1.movie_id) >= 2 AND AVG(m.score) >= 40; """ return self.execute_query(connection, part_g_sql)

def part_gi(self, connection): # SQL query to find the top 5 cast members with the highest collaboration scores part_g_i_sql = """ SELECT mc.cast_id, mc.cast_name, printf("%.2f", AVG(g.average_movie_score)) as collaboration_score FROM (SELECT cast_member_id1 as cast_id, average_movie_score FROM good_collaboration UNION ALL SELECT cast_member_id2 as cast_id, average_movie_score FROM good_collaboration) as g JOIN movie_cast mc ON mc.cast_id = g.cast_id GROUP BY mc.cast_id, mc.cast_name ORDER BY collaboration_score DESC, mc.cast_name LIMIT 5; """ cursor = connection.execute(part_g_i_sql) return cursor.fetchall() # Part h FTS [4 points] def part_h(self, connection, path): # Create the movie_overview FTS table using fts4 part_h_sql = """ CREATE VIRTUAL TABLE IF NOT EXISTS movie_overview USING fts4( id INTEGER, overview TEXT ); """ connection.execute(part_h_sql) # Import data into movie_overview from movie_overview.csv movie_overview_path = path # Assuming path is the full path to movie_overview.csv with open(movie_overview_path, 'r') as file: for line in file: id, overview = line.strip().split(',', 1) # Splitting only on the first comma connection.execute("INSERT INTO movie_overview (id, overview) VALUES (?, ?)", (id, overview)) connection.commit() # Count the number of rows in the movie_overview table sql = "SELECT COUNT(id) FROM movie_overview;" cursor = connection.execute(sql) return cursor.fetchall()[0][0] def part_hi(self, connection): # Count the number of movies whose overview contains the word 'fight' part_hi_sql = """ SELECT COUNT(*) FROM movie_overview WHERE overview MATCH 'fight';

Your preview ends here

Eager to read complete document? Join bartleby learn and gain access to the full version

Access to all documents
Unlimited textbook solutions
24/7 expert homework help

""" cursor = connection.execute(part_hi_sql) return cursor.fetchall()[0][0] def part_hii(self, connection): # Count the number of movies that contain the terms 'space' and 'program' # within no more than 5 intervening terms part_hii_sql = """ SELECT COUNT(*) FROM movie_overview WHERE overview MATCH 'space NEAR/5 program'; """ cursor = connection.execute(part_hii_sql) return cursor.fetchall()[0][0] if __name__ == "__main__": ########################### DO NOT MODIFY THIS SECTION ########################## ################################################################################# if SHOW == True: sample = Sample() sample.sample() print('\033[32m' + "Q2 Output: " + '\033[m') db = HW2_sql() try: conn = db.create_connection("Q2") except: print("Database Creation Error") try: conn.execute("DROP TABLE IF EXISTS movies;") conn.execute("DROP TABLE IF EXISTS movie_cast;") conn.execute("DROP TABLE IF EXISTS cast_bio;") conn.execute("DROP VIEW IF EXISTS good_collaboration;") conn.execute("DROP TABLE IF EXISTS movie_overview;") except Exception as e: print("Error in Table Drops") print(e) try: print('\033[32m' + "part ai 1: " + '\033[m' + str(db.part_ai_1(conn))) print('\033[32m' + "part ai 2: " + '\033[m' + str(db.part_ai_2(conn))) except Exception as e: print("Error in Part a.i") print(e) try: print('\033[32m' + "Row count for Movies Table: " + '\033[m' + str(db.part_aii_1(conn, "data/movies.csv"))) print('\033[32m' + "Row count for Movie Cast Table: " + '\033[m' + str( db.part_aii_2(conn, "data/movie_cast.csv"))) except Exception as e: print("Error in part a.ii") print(e) try:

print('\033[32m' + "Row count for Cast Bio Table: " + '\033[m' + str(db.part_aiii(conn))) except Exception as e: print("Error in part a.iii") print(e) try: print('\033[32m' + "part b 1: " + '\033[m' + db.part_b_1(conn)) print('\033[32m' + "part b 2: " + '\033[m' + db.part_b_2(conn)) print('\033[32m' + "part b 3: " + '\033[m' + db.part_b_3(conn)) except Exception as e: print("Error in part b") print(e) try: print('\033[32m' + "part c: " + '\033[m' + str(db.part_c(conn))) except Exception as e: print("Error in part c") print(e) try: print('\033[32m' + "part d: " + '\033[m') for line in db.part_d(conn): print(line[0], line[1]) except Exception as e: print("Error in part d") print(e) try: print('\033[32m' + "part e: " + '\033[m') for line in db.part_e(conn): print(line[0], line[1], line[2]) except Exception as e: print("Error in part e") print(e) try: print('\033[32m' + "part f: " + '\033[m') for line in db.part_f(conn): print(line[0], line[1], line[2]) except Exception as e: print("Error in part f") print(e) try: print('\033[32m' + "part g: " + '\033[m' + str(db.part_g(conn))) print("\033[32mRow count for good_collaboration view:\033[m", conn.execute("select count(*) from good_collaboration").fetchall()[0] [0]) print('\033[32m' + "part g.i: " + '\033[m') for line in db.part_gi(conn): print(line[0], line[1], line[2]) except Exception as e: print("Error in part g") print(e) try: print('\033[32m' + "part h: " + '\033[m' + str(db.part_h(conn, "data/movie_overview.csv")))

print('\033[32m' + "Count h.i: " + '\033[m' + str(db.part_hi(conn))) print('\033[32m' + "Count h.ii: " + '\033[m' + str(db.part_hii(conn))) except Exception as e: print("Error in part h") print(e) conn.close() ################################################################################# #################################################################################

Your preview ends here

Eager to read complete document? Join bartleby learn and gain access to the full version

Access to all documents
Unlimited textbook solutions
24/7 expert homework help

Q2

Related Documents