import numpy as np import json img_codes = np.load("data/image_codes.npy") captions = json.load(open('data/captions_tokenized.json')) for img_i in range(len(captions)): for caption_i inrange(len(captions[img_i])): sentence = captions[img_i][caption_i] captions[img_i][caption_i] = ["#START#"] + sentence.split(' ') + ["#END#"] class CaptionNet(nn.Module): def__init__(self, n_tokens=n_tokens, emb_size=128, lstm_units=256, cnn_feature_size=2048): """ A recurrent 'head' network for image captioning. See scheme above. """ super().__init__() # a layer that converts conv features to initial_h (h_0) and initial_c (c_0) self.cnn_to_h0 = nn.Linear(cnn_feature_size, lstm_units) self.cnn_to_c0 = nn.Linear(cnn_feature_size, lstm_units) # create embedding for input words. Use the parameters (e.g. emb_size). # YOUR CODE HERE # lstm: create a recurrent core of your network. Use either LSTMCell or just LSTM. # In the latter case (nn.LSTM), make sure batch_first=True # YOUR CODE HERE # create logits: linear layer that takes lstm hidden state as input and computes one number per token # YOUR CODE HERE defforward(self, image_vectors, captions_ix): """ Apply the network in training mode. :param image_vectors: torch tensor containing inception vectors. shape: [batch, cnn_feature_size] :param captions_ix: torch tensor containing captions as matrix. shape: [batch, word_i]. padded with pad_ix :returns: logits for next token at each tick, shape: [batch, word_i, n_tokens] """ self.lstm.flatten_parameters() initial_cell = self.cnn_to_c0(image_vectors) initial_hid = self.cnn_to_h0(image_vectors) # compute embeddings for captions_ix # YOUR CODE HERE # apply recurrent layer to captions_emb. # 1. initialize lstm state with initial_* from above # 2. feed it with captions. Mind the dimension order in docstring # 3. compute logits for next token probabilities # Note: if you used nn.LSTM, you can just give it (initial_cell[None], initial_hid[None]) as second arg # lstm_out should be lstm hidden state sequence of shape [batch, caption_length, lstm_units] # YOUR CODE HERE # compute logits from lstm_out # YOUR CODE HERE #for reference and more detail go to ---> https://colab.research.google.com/github/hse-aml/intro-to-dl-pytorch/blob/main/week06/week06_final_project_image_captioning.ipynb#scrollTo=x_wH1bYu-QK8
import numpy as np import json img_codes = np.load("data/image_codes.npy") captions = json.load(open('data/captions_tokenized.json')) for img_i in range(len(captions)): for caption_i inrange(len(captions[img_i])): sentence = captions[img_i][caption_i] captions[img_i][caption_i] = ["#START#"] + sentence.split(' ') + ["#END#"] class CaptionNet(nn.Module): def__init__(self, n_tokens=n_tokens, emb_size=128, lstm_units=256, cnn_feature_size=2048): """ A recurrent 'head' network for image captioning. See scheme above. """ super().__init__() # a layer that converts conv features to initial_h (h_0) and initial_c (c_0) self.cnn_to_h0 = nn.Linear(cnn_feature_size, lstm_units) self.cnn_to_c0 = nn.Linear(cnn_feature_size, lstm_units) # create embedding for input words. Use the parameters (e.g. emb_size). # YOUR CODE HERE # lstm: create a recurrent core of your network. Use either LSTMCell or just LSTM. # In the latter case (nn.LSTM), make sure batch_first=True # YOUR CODE HERE # create logits: linear layer that takes lstm hidden state as input and computes one number per token # YOUR CODE HERE defforward(self, image_vectors, captions_ix): """ Apply the network in training mode. :param image_vectors: torch tensor containing inception vectors. shape: [batch, cnn_feature_size] :param captions_ix: torch tensor containing captions as matrix. shape: [batch, word_i]. padded with pad_ix :returns: logits for next token at each tick, shape: [batch, word_i, n_tokens] """ self.lstm.flatten_parameters() initial_cell = self.cnn_to_c0(image_vectors) initial_hid = self.cnn_to_h0(image_vectors) # compute embeddings for captions_ix # YOUR CODE HERE # apply recurrent layer to captions_emb. # 1. initialize lstm state with initial_* from above # 2. feed it with captions. Mind the dimension order in docstring # 3. compute logits for next token probabilities # Note: if you used nn.LSTM, you can just give it (initial_cell[None], initial_hid[None]) as second arg # lstm_out should be lstm hidden state sequence of shape [batch, caption_length, lstm_units] # YOUR CODE HERE # compute logits from lstm_out # YOUR CODE HERE #for reference and more detail go to ---> https://colab.research.google.com/github/hse-aml/intro-to-dl-pytorch/blob/main/week06/week06_final_project_image_captioning.ipynb#scrollTo=x_wH1bYu-QK8
Computer Networking: A Top-Down Approach (7th Edition)
7th Edition
ISBN:9780133594140
Author:James Kurose, Keith Ross
Publisher:James Kurose, Keith Ross
Chapter1: Computer Networks And The Internet
Section: Chapter Questions
Problem R1RQ: What is the difference between a host and an end system? List several different types of end...
Related questions
Question
import numpy as np
import json
img_codes = np.load("data/image_codes.npy")
captions = json.load(open('data/captions_tokenized.json'))
for img_i in range(len(captions)):
for caption_i inrange(len(captions[img_i])):
sentence = captions[img_i][caption_i]
captions[img_i][caption_i] = ["#START#"] + sentence.split(' ') + ["#END#"]
class CaptionNet(nn.Module):
def__init__(self, n_tokens=n_tokens, emb_size=128, lstm_units=256, cnn_feature_size=2048):
""" A recurrent 'head' network for image captioning. See scheme above. """
super().__init__()
# a layer that converts conv features to initial_h (h_0) and initial_c (c_0)
self.cnn_to_h0 = nn.Linear(cnn_feature_size, lstm_units)
self.cnn_to_c0 = nn.Linear(cnn_feature_size, lstm_units)
# create embedding for input words. Use the parameters (e.g. emb_size).
# YOUR CODE HERE
# lstm: create a recurrent core of your network. Use either LSTMCell or just LSTM.
# In the latter case (nn.LSTM), make sure batch_first=True
# YOUR CODE HERE
# create logits: linear layer that takes lstm hidden state as input and computes one number per token
# YOUR CODE HERE
defforward(self, image_vectors, captions_ix):
"""
Apply the network in training mode.
:param image_vectors: torch tensor containing inception vectors. shape: [batch, cnn_feature_size]
:param captions_ix: torch tensor containing captions as matrix. shape: [batch, word_i].
padded with pad_ix
:returns: logits for next token at each tick, shape: [batch, word_i, n_tokens]
"""
self.lstm.flatten_parameters()
initial_cell = self.cnn_to_c0(image_vectors)
initial_hid = self.cnn_to_h0(image_vectors)
# compute embeddings for captions_ix
# YOUR CODE HERE
# apply recurrent layer to captions_emb.
# 1. initialize lstm state with initial_* from above
# 2. feed it with captions. Mind the dimension order in docstring
# 3. compute logits for next token probabilities
# Note: if you used nn.LSTM, you can just give it (initial_cell[None], initial_hid[None]) as second arg
# lstm_out should be lstm hidden state sequence of shape [batch, caption_length, lstm_units]
# YOUR CODE HERE
# compute logits from lstm_out
# YOUR CODE HERE
#for reference and more detail go to ---> https://colab.research.google.com/github/hse-aml/intro-to-dl-pytorch/blob/main/week06/week06_final_project_image_captioning.ipynb#scrollTo=x_wH1bYu-QK8
Expert Solution
This question has been solved!
Explore an expertly crafted, step-by-step solution for a thorough understanding of key concepts.
This is a popular solution!
Trending now
This is a popular solution!
Step by step
Solved in 3 steps with 1 images
Recommended textbooks for you
Computer Networking: A Top-Down Approach (7th Edi…
Computer Engineering
ISBN:
9780133594140
Author:
James Kurose, Keith Ross
Publisher:
PEARSON
Computer Organization and Design MIPS Edition, Fi…
Computer Engineering
ISBN:
9780124077263
Author:
David A. Patterson, John L. Hennessy
Publisher:
Elsevier Science
Network+ Guide to Networks (MindTap Course List)
Computer Engineering
ISBN:
9781337569330
Author:
Jill West, Tamara Dean, Jean Andrews
Publisher:
Cengage Learning
Computer Networking: A Top-Down Approach (7th Edi…
Computer Engineering
ISBN:
9780133594140
Author:
James Kurose, Keith Ross
Publisher:
PEARSON
Computer Organization and Design MIPS Edition, Fi…
Computer Engineering
ISBN:
9780124077263
Author:
David A. Patterson, John L. Hennessy
Publisher:
Elsevier Science
Network+ Guide to Networks (MindTap Course List)
Computer Engineering
ISBN:
9781337569330
Author:
Jill West, Tamara Dean, Jean Andrews
Publisher:
Cengage Learning
Concepts of Database Management
Computer Engineering
ISBN:
9781337093422
Author:
Joy L. Starks, Philip J. Pratt, Mary Z. Last
Publisher:
Cengage Learning
Prelude to Programming
Computer Engineering
ISBN:
9780133750423
Author:
VENIT, Stewart
Publisher:
Pearson Education
Sc Business Data Communications and Networking, T…
Computer Engineering
ISBN:
9781119368830
Author:
FITZGERALD
Publisher:
WILEY