2 each lstm layer bptt question! #15643
Unanswered
YooSungHyun
asked this question in
code help: NLP / ASR / TTS
Replies: 2 comments 1 reply
-
I think that is right import math
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from pytorch_lightning import LightningModule, Trainer
class LSTMModel(LightningModule):
"""LSTM sequence-to-sequence model for testing TBPTT with automatic optimization."""
def __init__(self, truncated_bptt_steps=2, input_size=1, hidden_size=8):
super().__init__()
torch.manual_seed(42)
self.input_size = input_size
self.hidden_size = hidden_size
self.lstm = torch.nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True)
self.lstm2 = torch.nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True)
self.linear = torch.nn.Linear(hidden_size * 2, 1)
self.automatic_optimization = True
def configure_optimizers(self):
return torch.optim.SGD(self.parameters(), lr=0.01)
def training_step(self, batch, batch_idx):
x, y = batch
# if hiddens is not None:
# hiddens1, hiddens2 = hiddens
# else:
# hiddens1 = None
# hiddens2 = None
pred1, _ = self.lstm(x)
pred2, _ = self.lstm2(x)
logits = torch.concat([pred1, pred2], dim=-1)
linear = self.linear(logits)
loss = F.mse_loss(linear, y)
return {"loss": loss}
def train_dataloader(self):
dataset = TensorDataset(torch.rand(50, 2000, self.input_size), torch.rand(50, 2000, self.input_size))
return DataLoader(dataset=dataset, batch_size=4)
model = LSTMModel(truncated_bptt_steps=100)
trainer = Trainer(
default_root_dir="./",
max_epochs=2,
log_every_n_steps=2,
enable_model_summary=False,
enable_checkpointing=False,
)
trainer.fit(model) time sequence 2000 import math
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from pytorch_lightning import LightningModule, Trainer
class LSTMModel(LightningModule):
"""LSTM sequence-to-sequence model for testing TBPTT with automatic optimization."""
def __init__(self, truncated_bptt_steps=2, input_size=1, hidden_size=8):
super().__init__()
torch.manual_seed(42)
self.input_size = input_size
self.hidden_size = hidden_size
self.lstm = torch.nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True)
self.lstm2 = torch.nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True)
self.linear = torch.nn.Linear(hidden_size * 2, 1)
self.truncated_bptt_steps = truncated_bptt_steps
self.automatic_optimization = True
def configure_optimizers(self):
return torch.optim.SGD(self.parameters(), lr=0.01)
def training_step(self, batch, batch_idx, hiddens):
x, y = batch
if hiddens is not None:
hiddens1, hiddens2 = hiddens
else:
hiddens1 = None
hiddens2 = None
pred1, hiddens1 = self.lstm(x, hiddens1)
pred2, hiddens2 = self.lstm2(x, hiddens2)
logits = torch.concat([pred1, pred2], dim=-1)
linear = self.linear(logits)
loss = F.mse_loss(linear, y)
return {"loss": loss, "hiddens": (hiddens1, hiddens2)}
def train_dataloader(self):
dataset = TensorDataset(torch.rand(50, 2000, self.input_size), torch.rand(50, 2000, self.input_size))
return DataLoader(dataset=dataset, batch_size=4)
model = LSTMModel(truncated_bptt_steps=100)
trainer = Trainer(
default_root_dir="./",
max_epochs=2,
log_every_n_steps=2,
enable_model_summary=False,
enable_checkpointing=False,
)
trainer.fit(model) and result (look loss)
and, i am read down optimize_step, i think hiddens only used connected past gradient, and type hint is any, and any used calculate. it just hiddens check -> cpu detach -> input kwargs -> go to next training_step am i right? |
Beta Was this translation helpful? Give feedback.
1 reply
-
just one of kinds lstm bptt loss is same to 2 lstm hiddens return import math
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from pytorch_lightning import LightningModule, Trainer
class LSTMModel(LightningModule):
"""LSTM sequence-to-sequence model for testing TBPTT with automatic optimization."""
def __init__(self, truncated_bptt_steps=2, input_size=1, hidden_size=8):
super().__init__()
torch.manual_seed(42)
self.input_size = input_size
self.hidden_size = hidden_size
self.lstm = torch.nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True)
self.lstm2 = torch.nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True)
self.linear = torch.nn.Linear(hidden_size * 2, 50)
self.truncated_bptt_steps = truncated_bptt_steps
self.automatic_optimization = True
def configure_optimizers(self):
return torch.optim.SGD(self.parameters(), lr=0.01)
def training_step(self, batch, batch_idx, hiddens):
x, y = batch
if hiddens is not None:
hiddens1 = hiddens
else:
hiddens1 = None
hiddens2 = None
pred1, hiddens1 = self.lstm(x, hiddens1)
pred2, hiddens2 = self.lstm2(x)
logits = torch.concat([pred1, pred2], dim=-1)
linear = self.linear(logits)
loss = F.mse_loss(linear, y)
return {"loss": loss, "hiddens": hiddens1}
def train_dataloader(self):
dataset = TensorDataset(torch.rand(50, 2000, self.input_size), torch.rand(50, 2000, self.input_size))
return DataLoader(dataset=dataset, batch_size=4)
model = LSTMModel(truncated_bptt_steps=100)
trainer = Trainer(
default_root_dir="./",
max_epochs=2,
log_every_n_steps=2,
enable_model_summary=False,
enable_checkpointing=False,
)
trainer.fit(model) loss=0.0831 ~ 0.0832 |
Beta Was this translation helpful? Give feedback.
0 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.
Uh oh!
There was an error while loading. Please reload this page.
-
I am making RNN Transducer
RNN Transducer is two each lstm parts
and 2 lstm based parts output concat
3. joint network
looks like this

so, i confused use
bptt
ontraining_step
.i used
self.truncated_bptt_steps = 2
andmodel's forward like this
and each transnet and prednet input like
lstm(inputs, prev_hidden_states)
and training_step looks like this
this code is look well?
Beta Was this translation helpful? Give feedback.
All reactions