ig IPCompleter.greedy=True %env OPENBLAS_NUM_THREADS=1 import pandas as pd import implicit from sklearn.model_selection import train_test_split import scipy.sparse as sps import numpy as np import matplotlib.pyplot as plt import tqdm from collections import defaultdict ! ls -lh yandex_music ! head -n 5 yandex_music/artists.json

Computer Networking: A Top-Down Approach (7th Edition)
7th Edition
ISBN:9780133594140
Author:James Kurose, Keith Ross
Publisher:James Kurose, Keith Ross
Chapter1: Computer Networks And The Internet
Section: Chapter Questions
Problem R1RQ: What is the difference between a host and an end system? List several different types of end...
icon
Related questions
Question

%config IPCompleter.greedy=True
%env OPENBLAS_NUM_THREADS=1
import pandas as pd
import implicit
from sklearn.model_selection import train_test_split
import scipy.sparse as sps
import numpy as np
import matplotlib.pyplot as plt
import tqdm
from collections import defaultdict

! ls -lh yandex_music

! head -n 5 yandex_music/artists.jsonl

! head -n 5 yandex_music/events.csv

artists = pd.read_json("yandex_music/artists.jsonl", orient='records', lines=True)

events = pd.read_csv("yandex_music/events.csv")

# most popular artists
(
    events
    .merge(artists)[['artistName', 'plays']]
    .groupby("artistName").sum()
    .sort_values('plays', ascending=False)
    .head(10)
)

train, test = train_test_split(events, test_size=0.05, random_state=0)

# prepare matrix for implicit library (https://implicit.readthedocs.io/en/latest/models.html)
# item_user (sparse csr_matrix) of item/user/confidence
# csc_matrix((data, (row_ind, col_ind)), [shape=(M, N)])
#     where ``data``, ``row_ind`` and ``col_ind`` satisfy the
#     relationship ``a[row_ind[k], col_ind[k]] = data[k]``.

item_user_train = sps.csc_matrix((1 + 10 * np.log2(train.plays + 1), (train.artistId, train.userId)))
item_user_train

%%time
model = implicit.als.AlternatingLeastSquares(factors=32, iterations=10, random_state=0)
model.fit(item_user_train)

target_artists = artists[artists.artistName.isin(['Coldplay', '50 Cent', 'AC/DC'])]

artist_to_name = dict(zip(artists.artistId, artists.artistName))
for _, row in target_artists.iterrows():
    print("#############", row.artistName, "#############")
    for a, s in model.similar_items(row.artistId, N=5):
        print(artist_to_name[a], "\t", s)

from sklearn.metrics import ndcg_score

NDCG for iALS

q2: Now let's replace global popularity with iALS prediction for each user.

Make sure to compute NDCG only for users with at least 2 different artists in the test set.

# prediction for user 100 and item 200 (a simple dot product):
print(model.user_factors[100].dot(model.item_factors[200]))

# YOUR CODE HERE

Expert Solution
trending now

Trending now

This is a popular solution!

steps

Step by step

Solved in 2 steps

Blurred answer
Similar questions
Recommended textbooks for you
Computer Networking: A Top-Down Approach (7th Edi…
Computer Networking: A Top-Down Approach (7th Edi…
Computer Engineering
ISBN:
9780133594140
Author:
James Kurose, Keith Ross
Publisher:
PEARSON
Computer Organization and Design MIPS Edition, Fi…
Computer Organization and Design MIPS Edition, Fi…
Computer Engineering
ISBN:
9780124077263
Author:
David A. Patterson, John L. Hennessy
Publisher:
Elsevier Science
Network+ Guide to Networks (MindTap Course List)
Network+ Guide to Networks (MindTap Course List)
Computer Engineering
ISBN:
9781337569330
Author:
Jill West, Tamara Dean, Jean Andrews
Publisher:
Cengage Learning
Concepts of Database Management
Concepts of Database Management
Computer Engineering
ISBN:
9781337093422
Author:
Joy L. Starks, Philip J. Pratt, Mary Z. Last
Publisher:
Cengage Learning
Prelude to Programming
Prelude to Programming
Computer Engineering
ISBN:
9780133750423
Author:
VENIT, Stewart
Publisher:
Pearson Education
Sc Business Data Communications and Networking, T…
Sc Business Data Communications and Networking, T…
Computer Engineering
ISBN:
9781119368830
Author:
FITZGERALD
Publisher:
WILEY