言語処理100本ノック2020 第9章 RNN,CNN

はじめに

言語処理100本ノック2020

言語処理100本ノックは東北大学が公開している自然言語処理の問題集です。

とても良質なコンテンツで企業の研修や勉強会で使われています。

そんな言語処理100本ノックが2020年に改定されてました。昨今の状況を鑑みて、深層ニューラルネットワークに関する問題が追加されました。(その他にも細かい変更があります)

この記事では、言語処理100本ノック2020にPythonで取り組んでいきます。

他にも色々な解法があると思うので、一つの解答例としてご活用ください!

全100問の解説に戻る

80. ID番号への変換

問題51で構築した学習データ中の単語にユニークなID番号を付与したい.学習データ中で最も頻出する単語に1,2番目に頻出する単語に2,……といった方法で,学習データ中で2回以上出現する単語にID番号を付与せよ.そして,与えられた単語列に対して,ID番号の列を返す関数を実装せよ.ただし,出現頻度が2回未満の単語のID番号はすべて0とせよ.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
train = pd.read_csv(base+'train.txt', header=None, sep='\t')
valid = pd.read_csv(base+'valid.txt', header=None, sep='\t')
test = pd.read_csv(base+'test.txt', header=None, sep='\t')
vectorizer = CountVectorizer(min_df=2)
train_title = train.iloc[:,1].str.lower()
cnt = vectorizer.fit_transform(train_title).toarray()
sm = cnt.sum(axis=0)
idx = np.argsort(sm)[::-1]
words = np.array(vectorizer.get_feature_names())[idx]
d = dict()
for i in range(len(words)):
d[words[i]] = i+1
def get_id(sentence):
r = []
for word in sentence:
r.append(d.get(word,0))
return r

def df2id(df):
ids = []
for i in df.iloc[:,1].str.lower():
ids.append(get_id(i.split()))
return ids

X_train = df2id(train)
X_valid = df2id(valid)
X_test = df2id(test)

81. RNNによる予測

D番号で表現された単語列x=(x1,x2,…,xT)がある.ただし,Tは単語列の長さ,xt∈ℝVは単語のID番号のone-hot表記である(Vは単語の総数である).再帰型ニューラルネットワーク(RNN: Recurrent Neural Network)を用い,単語列xからカテゴリyを予測するモデルとして,次式を実装せよ.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import torch
dw = 300
dh = 50
class RNN(torch.nn.Module):
def __init__(self):
super().__init__()
self.emb = torch.nn.Embedding(len(words)+1,dw)
self.rnn = torch.nn.RNN(dw,dh,batch_first=True)
self.linear = torch.nn.Linear(dh,4)
self.softmax = torch.nn.Softmax()
def forward(self, x, h=None):
x = self.emb(x)
y, h = self.rnn(x, h)
y = y[:,-1,:] # 最後のステップ
y = self.linear(y)
y = self.softmax(y)
return y

82. 確率的勾配降下法による学習

確率的勾配降下法(SGD: Stochastic Gradient Descent)を用いて,問題81で構築したモデルを学習せよ.訓練データ上の損失と正解率,評価データ上の損失と正解率を表示しながらモデルを学習し,適当な基準(例えば10エポックなど)で終了させよ.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import torch
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
%load_ext tensorboard
!rm -rf ./runs
%tensorboard --logdir ./runs
writer = SummaryWriter()

max_len = 10
dw = 300
dh = 50
n_vocab = len(words) + 2
PAD = len(words) + 1

class RNN(torch.nn.Module):
def __init__(self):
super().__init__()
self.emb = torch.nn.Embedding(n_vocab,dw,padding_idx=PAD)
self.rnn = torch.nn.RNN(dw,dh,batch_first=True)
self.linear = torch.nn.Linear(dh,4)
self.softmax = torch.nn.Softmax()
def forward(self, x, h=None):
x = self.emb(x)
y, h = self.rnn(x, h)
y = y[:,-1,:] # 最後のステップ
y = self.linear(y)
# y = self.softmax(y) # torch.nn.CrossEntropyLoss()がsoftmaxは含む
return y

def list2tensor(data, max_len):
new = []
for d in data:
if len(d) > max_len:
d = d[:max_len]
else:
d += [PAD] * (max_len - len(d))
new.append(d)
return torch.tensor(new, dtype=torch.int64)

def accuracy(pred, label):
pred = np.argmax(pred.data.numpy(), axis=1)
label = label.data.numpy()
return (pred == label).mean()



train = pd.read_csv(base+'train.txt', header=None, sep='\t')
valid = pd.read_csv(base+'valid.txt', header=None, sep='\t')
test = pd.read_csv(base+'test.txt', header=None, sep='\t')

X_train = df2id(train)
X_valid = df2id(valid)
X_test = df2id(test)

X_train = list2tensor(X_train,max_len)
X_valid = list2tensor(X_valid,max_len)
X_test = list2tensor(X_test,max_len)

y_train = np.loadtxt(base+'y_train.txt')
y_train = torch.tensor(y_train, dtype=torch.int64)
y_valid = np.loadtxt(base+'y_valid.txt')
y_valid = torch.tensor(y_valid, dtype=torch.int64)
y_test = np.loadtxt(base+'y_test.txt')
y_test = torch.tensor(y_test, dtype=torch.int64)


model = RNN()
ds = TensorDataset(X_train, y_train)
# DataLoaderを作成
loader = DataLoader(ds, batch_size=1, shuffle=True)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-1)

for epoch in range(2):
for xx, yy in loader:
y_pred = model(xx)
loss = loss_fn(y_pred, yy)
optimizer.zero_grad()
loss.backward()
optimizer.step()
with torch.no_grad():
y_pred = model(X_train)
loss = loss_fn(y_pred, y_train)
writer.add_scalar('Loss/train', loss, epoch)
writer.add_scalar('Accuracy/train', accuracy(y_pred,y_train), epoch)
print (accuracy(y_pred,y_train))

y_pred = model(X_valid)
loss = loss_fn(y_pred, y_valid)
writer.add_scalar('Loss/valid', loss, epoch)
writer.add_scalar('Accuracy/valid', accuracy(y_pred,y_valid), epoch)
print (accuracy(y_pred,y_valid))

83. ミニバッチ化・GPU上での学習

問題82のコードを改変し,B事例ごとに損失・勾配を計算して学習を行えるようにせよ(Bの値は適当に選べ).また,GPU上で学習を実行せよ.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import torch
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
%load_ext tensorboard
!rm -rf ./runs
%tensorboard --logdir ./runs
writer = SummaryWriter()

max_len = 10
dw = 300
dh = 50
n_vocab = len(words) + 2
PAD = len(words) + 1

class RNN(torch.nn.Module):
def __init__(self):
super().__init__()
self.emb = torch.nn.Embedding(n_vocab,dw,padding_idx=PAD)
self.rnn = torch.nn.RNN(dw,dh,batch_first=True)
self.linear = torch.nn.Linear(dh,4)
self.softmax = torch.nn.Softmax()
def forward(self, x, h=None):
x = self.emb(x)
y, h = self.rnn(x, h)
y = y[:,-1,:] # 最後のステップ
y = self.linear(y)
# y = self.softmax(y) # torch.nn.CrossEntropyLoss()がsoftmaxは含む
return y

def list2tensor(data, max_len):
new = []
for d in data:
if len(d) > max_len:
d = d[:max_len]
else:
d += [PAD] * (max_len - len(d))
new.append(d)
return torch.tensor(new, dtype=torch.int64)

def accuracy(pred, label):
pred = np.argmax(pred.data.to('cpu').numpy(), axis=1)
label = label.data.to('cpu').numpy()
return (pred == label).mean()

train = pd.read_csv(base+'train.txt', header=None, sep='\t')
valid = pd.read_csv(base+'valid.txt', header=None, sep='\t')
test = pd.read_csv(base+'test.txt', header=None, sep='\t')

X_train = df2id(train)
X_valid = df2id(valid)
X_test = df2id(test)

X_train = list2tensor(X_train,max_len)
X_valid = list2tensor(X_valid,max_len)
X_test = list2tensor(X_test,max_len)

y_train = np.loadtxt(base+'y_train.txt')
y_train = torch.tensor(y_train, dtype=torch.int64)
y_valid = np.loadtxt(base+'y_valid.txt')
y_valid = torch.tensor(y_valid, dtype=torch.int64)
y_test = np.loadtxt(base+'y_test.txt')
y_test = torch.tensor(y_test, dtype=torch.int64)


model = RNN()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
ds = TensorDataset(X_train.to(device), y_train.to(device))
# DataLoaderを作成
loader = DataLoader(ds, batch_size=1024, shuffle=True)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-1)

for epoch in range(10):
for xx, yy in loader:
y_pred = model(xx)
loss = loss_fn(y_pred, yy)
optimizer.zero_grad()
loss.backward()
optimizer.step()
with torch.no_grad():
y_pred = model(X_train.to(device))
loss = loss_fn(y_pred, y_train.to(device))
writer.add_scalar('Loss/train', loss, epoch)
writer.add_scalar('Accuracy/train', accuracy(y_pred,y_train), epoch)
print (accuracy(y_pred,y_train))

y_pred = model(X_valid.to(device))
loss = loss_fn(y_pred, y_valid.to(device))
writer.add_scalar('Loss/valid', loss, epoch)
writer.add_scalar('Accuracy/valid', accuracy(y_pred,y_valid), epoch)
print (accuracy(y_pred,y_valid))

84. 単語ベクトルの導入

事前学習済みの単語ベクトル(例えば,Google Newsデータセット(約1,000億単語)での学習済み単語ベクトル)で単語埋め込みemb(x)を初期化し,学習せよ.

model.emb.weight(変数名は適宜読み換えてください)のようにアクセスできます。テンソルをパラメータとして扱いたい場合はtorch.nn.Parameter()に通す必要があります。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import torch
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

max_len = 10
dw = 300
dh = 50
n_vocab = len(words) + 2
PAD = len(words) + 1

class RNN(torch.nn.Module):
def __init__(self):
super().__init__()
self.emb = torch.nn.Embedding(n_vocab,dw,padding_idx=PAD)
self.rnn = torch.nn.RNN(dw,dh,batch_first=True)
self.linear = torch.nn.Linear(dh,4)
self.softmax = torch.nn.Softmax()
def forward(self, x, h=None):
x = self.emb(x)
y, h = self.rnn(x, h)
y = y[:,-1,:] # 最後のステップ
y = self.linear(y)
# y = self.softmax(y) # torch.nn.CrossEntropyLoss()がsoftmaxは含む
return y

def list2tensor(data, max_len):
new = []
for d in data:
if len(d) > max_len:
d = d[:max_len]
else:
d += [PAD] * (max_len - len(d))
new.append(d)
return torch.tensor(new, dtype=torch.int64)

def accuracy(pred, label):
pred = np.argmax(pred.data.to('cpu').numpy(), axis=1)
label = label.data.to('cpu').numpy()
return (pred == label).mean()


train = pd.read_csv(base+'train.txt', header=None, sep='\t')
valid = pd.read_csv(base+'valid.txt', header=None, sep='\t')
test = pd.read_csv(base+'test.txt', header=None, sep='\t')

X_train = df2id(train)
X_valid = df2id(valid)
X_test = df2id(test)

X_train = list2tensor(X_train,max_len)
X_valid = list2tensor(X_valid,max_len)
X_test = list2tensor(X_test,max_len)

y_train = np.loadtxt(base+'y_train.txt')
y_train = torch.tensor(y_train, dtype=torch.int64)
y_valid = np.loadtxt(base+'y_valid.txt')
y_valid = torch.tensor(y_valid, dtype=torch.int64)
y_test = np.loadtxt(base+'y_test.txt')
y_test = torch.tensor(y_test, dtype=torch.int64)

model = RNN()
#print (model.emb.weight)
for k,v in d.items():
if k in w2v.vocab:
#v = np.random.randint(1,PAD) #ランダムな単語ベクトルでも効果があるか
model.emb.weight[v] = torch.tensor(w2v[k], dtype=torch.float32)
#print (model.emb.weight)
model.emb.weight = torch.nn.Parameter(model.emb.weight)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

ds = TensorDataset(X_train.to(device), y_train.to(device))
# DataLoaderを作成
loader = DataLoader(ds, batch_size=1024, shuffle=True)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-1)

for epoch in range(10):
for xx, yy in loader:
y_pred = model(xx)
loss = loss_fn(y_pred, yy)
optimizer.zero_grad()
loss.backward()
optimizer.step()
with torch.no_grad():
y_pred = model(X_train.to(device))
loss = loss_fn(y_pred, y_train.to(device))
writer.add_scalar('Loss/train', loss, epoch)
writer.add_scalar('Accuracy/train', accuracy(y_pred,y_train), epoch)
print (accuracy(y_pred,y_train))

y_pred = model(X_valid.to(device))
loss = loss_fn(y_pred, y_valid.to(device))
writer.add_scalar('Loss/valid', loss, epoch)
writer.add_scalar('Accuracy/valid', accuracy(y_pred,y_valid), epoch)
print (accuracy(y_pred,y_valid))
#print (model.emb.weight)

85. 双方向RNN・多層化

順方向と逆方向のRNNの両方を用いて入力テキストをエンコードし,モデルを学習せよ.

torch.nn.RNN(bidirectional=True)とすれば良いです。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import torch
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

max_len = 10
dw = 300
dh = 50
n_vocab = len(words) + 2
PAD = len(words) + 1

class RNN(torch.nn.Module):
def __init__(self):
super().__init__()
self.emb = torch.nn.Embedding(n_vocab,dw,padding_idx=PAD)
self.rnn1 = torch.nn.RNN(dw,dh,bidirectional=True,batch_first=True)
self.rnn2 = torch.nn.RNN(2*dh,dh,bidirectional=True,batch_first=True)
self.rnn3 = torch.nn.RNN(2*dh,dh,bidirectional=True,batch_first=True)
self.linear = torch.nn.Linear(2*dh,4)
self.softmax = torch.nn.Softmax()
def forward(self, x, h=None):
x = self.emb(x)
y, h = self.rnn1(x, h)
y, h = self.rnn2(y, h)
y, h = self.rnn3(y, h)
y = y[:,-1,:] # 最後のステップ
y = self.linear(y)
# y = self.softmax(y) # torch.nn.CrossEntropyLoss()がsoftmaxは含む
return y

def list2tensor(data, max_len):
new = []
for d in data:
if len(d) > max_len:
d = d[:max_len]
else:
d += [PAD] * (max_len - len(d))
new.append(d)
return torch.tensor(new, dtype=torch.int64)

def accuracy(pred, label):
pred = np.argmax(pred.data.to('cpu').numpy(), axis=1)
label = label.data.to('cpu').numpy()
return (pred == label).mean()


train = pd.read_csv(base+'train.txt', header=None, sep='\t')
valid = pd.read_csv(base+'valid.txt', header=None, sep='\t')
test = pd.read_csv(base+'test.txt', header=None, sep='\t')

X_train = df2id(train)
X_valid = df2id(valid)
X_test = df2id(test)

X_train = list2tensor(X_train,max_len)
X_valid = list2tensor(X_valid,max_len)
X_test = list2tensor(X_test,max_len)

y_train = np.loadtxt(base+'y_train.txt')
y_train = torch.tensor(y_train, dtype=torch.int64)
y_valid = np.loadtxt(base+'y_valid.txt')
y_valid = torch.tensor(y_valid, dtype=torch.int64)
y_test = np.loadtxt(base+'y_test.txt')
y_test = torch.tensor(y_test, dtype=torch.int64)

model = RNN()
#print (model.emb.weight)
for k,v in d.items():
if k in w2v.vocab:
#v = np.random.randint(1,PAD) #ランダムな単語ベクトルでも効果があるか
model.emb.weight[v] = torch.tensor(w2v[k], dtype=torch.float32)
#print (model.emb.weight)
model.emb.weight = torch.nn.Parameter(model.emb.weight)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

ds = TensorDataset(X_train.to(device), y_train.to(device))
# DataLoaderを作成
loader = DataLoader(ds, batch_size=1024, shuffle=True)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-1)

for epoch in range(10):
for xx, yy in loader:
y_pred = model(xx)
loss = loss_fn(y_pred, yy)
optimizer.zero_grad()
loss.backward()
optimizer.step()
with torch.no_grad():
y_pred = model(X_train.to(device))
loss = loss_fn(y_pred, y_train.to(device))
writer.add_scalar('Loss/train', loss, epoch)
writer.add_scalar('Accuracy/train', accuracy(y_pred,y_train), epoch)
print (accuracy(y_pred,y_train))

y_pred = model(X_valid.to(device))
loss = loss_fn(y_pred, y_valid.to(device))
writer.add_scalar('Loss/valid', loss, epoch)
writer.add_scalar('Accuracy/valid', accuracy(y_pred,y_valid), epoch)
print (accuracy(y_pred,y_valid))

86. 畳み込みニューラルネットワーク (CNN)

ID番号で表現された単語列x=(x1,x2,…,xT)がある.ただし,Tは単語列の長さ,xt∈ℝVは単語のID番号のone-hot表記である(Vは単語の総数である).畳み込みニューラルネットワーク(CNN: Convolutional Neural Network)を用い,単語列xからカテゴリyを予測するモデルを実装せよ.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import torch
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

max_len = 10
dw = 300
dh = 50
n_vocab = len(words) + 2
PAD = len(words) + 1

class CNN(torch.nn.Module):
def __init__(self):
super().__init__()
self.emb = torch.nn.Embedding(n_vocab,dw,padding_idx=PAD)
self.conv = torch.nn.Conv1d(dw,dh,3,padding=1) # in_channels:dw, out_channels: dh
self.relu = torch.nn.ReLU()
self.pool = torch.nn.MaxPool1d(max_len)
self.linear = torch.nn.Linear(dh,4)
self.softmax = torch.nn.Softmax()
def forward(self, x, h=None):
x = self.emb(x)
x = x.view(x.shape[0], x.shape[2], x.shape[1])
x = self.conv(x)
x = self.relu(x)
x = x.view(x.shape[0], x.shape[1], x.shape[2])
x = self.pool(x)
x = x.view(x.shape[0], x.shape[1])
y = self.linear(x)
y = self.softmax(y) # torch.nn.CrossEntropyLoss()がsoftmaxは含む
return y

def list2tensor(data, max_len):
new = []
for d in data:
if len(d) > max_len:
d = d[:max_len]
else:
d += [PAD] * (max_len - len(d))
new.append(d)
return torch.tensor(new, dtype=torch.int64)

def accuracy(pred, label):
pred = np.argmax(pred.data.to('cpu').numpy(), axis=1)
label = label.data.to('cpu').numpy()
return (pred == label).mean()


train = pd.read_csv(base+'train.txt', header=None, sep='\t')
valid = pd.read_csv(base+'valid.txt', header=None, sep='\t')
test = pd.read_csv(base+'test.txt', header=None, sep='\t')

X_train = df2id(train)
X_valid = df2id(valid)
X_test = df2id(test)

X_train = list2tensor(X_train,max_len)
X_valid = list2tensor(X_valid,max_len)
X_test = list2tensor(X_test,max_len)

y_train = np.loadtxt(base+'y_train.txt')
y_train = torch.tensor(y_train, dtype=torch.int64)
y_valid = np.loadtxt(base+'y_valid.txt')
y_valid = torch.tensor(y_valid, dtype=torch.int64)
y_test = np.loadtxt(base+'y_test.txt')
y_test = torch.tensor(y_test, dtype=torch.int64)

model = CNN()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
with torch.no_grad():
y_pred = model(X_train.to(device))

87. 確率的勾配降下法によるCNNの学習

確率的勾配降下法(SGD: Stochastic Gradient Descent)を用いて,問題86で構築したモデルを学習せよ.訓練データ上の損失と正解率,評価データ上の損失と正解率を表示しながらモデルを学習し,適当な基準(例えば10エポックなど)で終了させよ.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import torch
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

max_len = 10
dw = 300
dh = 50
n_vocab = len(words) + 2
PAD = len(words) + 1

class CNN(torch.nn.Module):
def __init__(self):
super().__init__()
self.emb = torch.nn.Embedding(n_vocab,dw,padding_idx=PAD)
self.conv = torch.nn.Conv1d(dw,dh,3,padding=1) # in_channels:dw, out_channels: dh
self.relu = torch.nn.ReLU()
self.pool = torch.nn.MaxPool1d(max_len)
self.linear = torch.nn.Linear(dh,4)
self.softmax = torch.nn.Softmax()
def forward(self, x, h=None):
x = self.emb(x)
x = x.view(x.shape[0], x.shape[2], x.shape[1])
x = self.conv(x)
x = self.relu(x)
x = x.view(x.shape[0], x.shape[1], x.shape[2])
x = self.pool(x)
x = x.view(x.shape[0], x.shape[1])
y = self.linear(x)
return y

def list2tensor(data, max_len):
new = []
for d in data:
if len(d) > max_len:
d = d[:max_len]
else:
d += [PAD] * (max_len - len(d))
new.append(d)
return torch.tensor(new, dtype=torch.int64)

def accuracy(pred, label):
pred = np.argmax(pred.data.to('cpu').numpy(), axis=1)
label = label.data.to('cpu').numpy()
return (pred == label).mean()


train = pd.read_csv(base+'train.txt', header=None, sep='\t')
valid = pd.read_csv(base+'valid.txt', header=None, sep='\t')
test = pd.read_csv(base+'test.txt', header=None, sep='\t')

X_train = df2id(train)
X_valid = df2id(valid)
X_test = df2id(test)

X_train = list2tensor(X_train,max_len)
X_valid = list2tensor(X_valid,max_len)
X_test = list2tensor(X_test,max_len)

y_train = np.loadtxt(base+'y_train.txt')
y_train = torch.tensor(y_train, dtype=torch.int64)
y_valid = np.loadtxt(base+'y_valid.txt')
y_valid = torch.tensor(y_valid, dtype=torch.int64)
y_test = np.loadtxt(base+'y_test.txt')
y_test = torch.tensor(y_test, dtype=torch.int64)

model = CNN()
#print (model.emb.weight)
for k,v in d.items():
if k in w2v.vocab:
#v = np.random.randint(1,PAD) #ランダムな単語ベクトルでも効果があるか
model.emb.weight[v] = torch.tensor(w2v[k], dtype=torch.float32)
#print (model.emb.weight)

model.emb.weight = torch.nn.Parameter(model.emb.weight)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

ds = TensorDataset(X_train.to(device), y_train.to(device))
# DataLoaderを作成
loader = DataLoader(ds, batch_size=1024, shuffle=True)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-1)
for epoch in range(10):
for xx, yy in loader:
y_pred = model(xx)
loss = loss_fn(y_pred, yy)
optimizer.zero_grad()
loss.backward()
optimizer.step()
with torch.no_grad():
y_pred = model(X_train.to(device))
loss = loss_fn(y_pred, y_train.to(device))
writer.add_scalar('Loss/train', loss, epoch)
writer.add_scalar('Accuracy/train', accuracy(y_pred,y_train), epoch)
print (accuracy(y_pred,y_train))

y_pred = model(X_valid.to(device))
loss = loss_fn(y_pred, y_valid.to(device))
writer.add_scalar('Loss/valid', loss, epoch)
writer.add_scalar('Accuracy/valid', accuracy(y_pred,y_valid), epoch)
print (accuracy(y_pred,y_valid))

88. パラメータチューニング

問題85や問題87のコードを改変し,ニューラルネットワークの形状やハイパーパラメータを調整しながら,高性能なカテゴリ分類器を構築せよ.
optimizerAdamに変更しました。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import torch
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

max_len = 10
dw = 300
dh = 50
n_vocab = len(words) + 2
PAD = len(words) + 1

class CNN(torch.nn.Module):
def __init__(self):
super().__init__()
self.emb = torch.nn.Embedding(n_vocab,dw,padding_idx=PAD)
self.conv = torch.nn.Conv1d(dw,dh,3,padding=1) # in_channels:dw, out_channels: dh
self.relu = torch.nn.ReLU()
self.pool = torch.nn.MaxPool1d(max_len)
self.linear = torch.nn.Linear(dh,4)
self.softmax = torch.nn.Softmax()
def forward(self, x, h=None):
x = self.emb(x)
x = x.view(x.shape[0], x.shape[2], x.shape[1])
x = self.conv(x)
x = self.relu(x)
x = x.view(x.shape[0], x.shape[1], x.shape[2])
x = self.pool(x)
x = x.view(x.shape[0], x.shape[1])
y = self.linear(x)
return y

def list2tensor(data, max_len):
new = []
for d in data:
if len(d) > max_len:
d = d[:max_len]
else:
d += [PAD] * (max_len - len(d))
new.append(d)
return torch.tensor(new, dtype=torch.int64)

def accuracy(pred, label):
pred = np.argmax(pred.data.to('cpu').numpy(), axis=1)
label = label.data.to('cpu').numpy()
return (pred == label).mean()


train = pd.read_csv(base+'train.txt', header=None, sep='\t')
valid = pd.read_csv(base+'valid.txt', header=None, sep='\t')
test = pd.read_csv(base+'test.txt', header=None, sep='\t')

X_train = df2id(train)
X_valid = df2id(valid)
X_test = df2id(test)

X_train = list2tensor(X_train,max_len)
X_valid = list2tensor(X_valid,max_len)
X_test = list2tensor(X_test,max_len)

y_train = np.loadtxt(base+'y_train.txt')
y_train = torch.tensor(y_train, dtype=torch.int64)
y_valid = np.loadtxt(base+'y_valid.txt')
y_valid = torch.tensor(y_valid, dtype=torch.int64)
y_test = np.loadtxt(base+'y_test.txt')
y_test = torch.tensor(y_test, dtype=torch.int64)

model = CNN()
#print (model.emb.weight)
for k,v in d.items():
if k in w2v.vocab:
#v = np.random.randint(1,PAD) #ランダムな単語ベクトルでも効果があるか
model.emb.weight[v] = torch.tensor(w2v[k], dtype=torch.float32)
#print (model.emb.weight)

model.emb.weight = torch.nn.Parameter(model.emb.weight)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

ds = TensorDataset(X_train.to(device), y_train.to(device))
# DataLoaderを作成
loader = DataLoader(ds, batch_size=1024, shuffle=True)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())
for epoch in range(50):
for xx, yy in loader:
y_pred = model(xx)
loss = loss_fn(y_pred, yy)
optimizer.zero_grad()
loss.backward()
optimizer.step()
with torch.no_grad():
y_pred = model(X_train.to(device))
loss = loss_fn(y_pred, y_train.to(device))
writer.add_scalar('Loss/train', loss, epoch)
writer.add_scalar('Accuracy/train', accuracy(y_pred,y_train), epoch)
print (accuracy(y_pred,y_train))

y_pred = model(X_valid.to(device))
loss = loss_fn(y_pred, y_valid.to(device))
writer.add_scalar('Loss/valid', loss, epoch)
writer.add_scalar('Accuracy/valid', accuracy(y_pred,y_valid), epoch)
print (accuracy(y_pred,y_valid))

89. 事前学習済み言語モデルからの転移学習

事前学習済み言語モデル(例えばBERTなど)を出発点として,ニュース記事見出しをカテゴリに分類するモデルを構築せよ.

BERTを転移学習させます。BERTを利用するためにライブラリtransformersを利用しました。BERTについては以下のページを参考にしてください。
BERTの理解に役立つ資料まとめ

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
!pip install transformers
import torch
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from transformers import *
import torch.nn as nn
import torch.nn.functional as F

max_len = 15
PAD = 0
n_unit = 768

tokenizer_class = BertTokenizer
tokenizer = tokenizer_class.from_pretrained('bert-base-uncased')

def dfs_freeze(model):
for name, child in model.named_children():
for param in child.parameters():
param.requires_grad = False
dfs_freeze(child)


class BertClassifier(nn.Module):
def __init__(self, n_classes=4):
super(BertClassifier, self).__init__()
self.bert_model = BertModel.from_pretrained('bert-base-uncased')
self.fc = nn.Linear(n_unit, n_classes)

def forward(self, ids):
seg_ids = torch.zeros_like(ids) # 全て同一セグメントとみなす
attention_mask = (ids > 0)
last_hidden_state, _ = self.bert_model(input_ids=ids, token_type_ids=seg_ids, attention_mask=attention_mask)
x = last_hidden_state[:,0,:] # CLSトークン
logit = self.fc(x.view(-1,n_unit))
return logit


def list2tensor(data, max_len):
new = []
for d in data:
if len(d) > max_len:
d = d[:max_len]
else:
d += [PAD] * (max_len - len(d))
new.append(d)
return torch.tensor(new, dtype=torch.int64)

def accuracy(pred, label):
pred = np.argmax(pred.data.to('cpu').numpy(), axis=1)
label = label.data.to('cpu').numpy()
return (pred == label).mean()

def df2id(df):
tokenized = df[1].apply((lambda x: tokenizer.encode(x, add_special_tokens=True)))
return tokenized

train = pd.read_csv(base+'train.txt', header=None, sep='\t')
valid = pd.read_csv(base+'valid.txt', header=None, sep='\t')
test = pd.read_csv(base+'test.txt', header=None, sep='\t')

X_train = df2id(train)
X_valid = df2id(valid)
X_test = df2id(test)

X_train = list2tensor(X_train,max_len)
X_valid = list2tensor(X_valid,max_len)
X_test = list2tensor(X_test,max_len)

y_train = np.loadtxt(base+'y_train.txt')
y_train = torch.tensor(y_train, dtype=torch.int64)
y_valid = np.loadtxt(base+'y_valid.txt')
y_valid = torch.tensor(y_valid, dtype=torch.int64)
y_test = np.loadtxt(base+'y_test.txt')
y_test = torch.tensor(y_test, dtype=torch.int64)
model = BertClassifier()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

dfs_freeze(model)
model.fc.requires_grad_(True)

ds = TensorDataset(X_train.to(device), y_train.to(device))
# DataLoaderを作成
loader = DataLoader(ds, batch_size=1024, shuffle=True)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())
for epoch in range(30):
print(epoch)
for xx, yy in loader:
y_pred = model(xx)
loss = loss_fn(y_pred, yy)
optimizer.zero_grad()
loss.backward()
optimizer.step()
with torch.no_grad():
y_pred = model(X_train.to(device))
loss = loss_fn(y_pred, y_train.to(device))
writer.add_scalar('Loss/train', loss, epoch)
writer.add_scalar('Accuracy/train', accuracy(y_pred,y_train), epoch)
print (accuracy(y_pred,y_train))

y_pred = model(X_valid.to(device))
loss = loss_fn(y_pred, y_valid.to(device))
writer.add_scalar('Loss/valid', loss, epoch)
writer.add_scalar('Accuracy/valid', accuracy(y_pred,y_valid), epoch)
print (accuracy(y_pred,y_valid))

最後に

全100問の解説に戻る

記事情報

  • 投稿日:2020年5月14日
  • 最終更新日:2020年5月27日