You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
92 lines
2.9 KiB
Python
92 lines
2.9 KiB
Python
#!/usr/bin/env python3
|
|
|
|
from tinygrad.tensor import Tensor
|
|
from tinygrad.helpers import dtypes
|
|
from tinygrad.nn import Linear, Embedding
|
|
from tinygrad.nn.optim import SGD
|
|
from tinygrad.jit import TinyJit
|
|
from extra.training import sparse_categorical_crossentropy
|
|
import random
|
|
import numpy as np
|
|
import numpy.random as npr
|
|
|
|
words = ['lorem', 'ipsum', 'dolor', 'sit', 'amet', 'consectetur', 'adipiscing',
|
|
'elit', 'sed', 'do', 'eiusmod', 'tempor', 'incididunt', 'ut', 'labore', 'et',
|
|
'dolore', 'magna', 'aliqua', 'ut', 'enim', 'ad', 'minim', 'veniam', 'quis',
|
|
'nostrud', 'exercitation', 'ullamco', 'laboris', 'nisi', 'ut', 'aliquip', 'ex',
|
|
'ea', 'commodo', 'consequat', 'duis', 'aute', 'irure', 'dolor', 'in',
|
|
'reprehenderit', 'in', 'voluptate', 'velit', 'esse', 'cillum', 'dolore', 'eu',
|
|
'fugiat', 'nulla', 'pariatur', 'excepteur', 'sint', 'occaecat', 'cupidatat',
|
|
'non', 'proident', 'sunt', 'in', 'culpa', 'qui', 'officia', 'deserunt',
|
|
'mollit', 'anim', 'id', 'est', 'laborum']
|
|
|
|
def atoi(char):
|
|
if char == '.':
|
|
return 0
|
|
assert char >= "a" and char <= "z"
|
|
return ord(char) - ord("a") + 1
|
|
|
|
def itoa(char):
|
|
charint = int(char)
|
|
if charint == 0:
|
|
return "."
|
|
assert char >= 1 and char <= 26
|
|
return chr(charint -1 + ord('a'))
|
|
|
|
def dataset(words, block_size=3):
|
|
X, Y = [], []
|
|
for word in words:
|
|
window = [0] * block_size # sliding window context
|
|
for ch in word + ".":
|
|
ix = atoi(ch)
|
|
X.append(window)
|
|
Y.append(ix)
|
|
window = window[1:] + [ix]
|
|
return np.array(X, dtype=np.float32), np.array(Y, dtype=np.float32)
|
|
|
|
class Model:
|
|
def __init__(self, emb_size=10, hidden_n=100, vocab_size=27):
|
|
self.emb = Embedding(vocab_size, emb_size)
|
|
self.l1 = Linear(emb_size * block_size, hidden_n)
|
|
self.l2 = Linear(hidden_n, vocab_size)
|
|
|
|
def __call__(self, x, training=True):
|
|
if training:
|
|
for p in self.parameters():
|
|
p.requires_grad = True
|
|
emb = self.emb(x)
|
|
h = self.l1(emb.reshape(emb.shape[0], -1)).tanh()
|
|
logits = self.l2(h)
|
|
return logits
|
|
|
|
def parameters(self):
|
|
return [self.l1.weight, self.l1.bias, self.l2.weight, self.l2.bias, self.emb.weight]
|
|
|
|
@TinyJit
|
|
def jitloss(x,y):
|
|
return sparse_categorical_crossentropy(x, y).realize()
|
|
|
|
block_size=3
|
|
random.shuffle(words)
|
|
X,Y = dataset(words, block_size)
|
|
m = Model()
|
|
opt = SGD(m.parameters(), lr=0.1)
|
|
|
|
for step in range(1000):
|
|
batch_ix = npr.randint(0, X.shape[0], (32,))
|
|
x_batch, y_batch = Tensor(X[batch_ix], requires_grad=False), Y[batch_ix]
|
|
|
|
logits = m(x_batch)
|
|
loss = jitloss(logits, y_batch)
|
|
print(loss.grad.numpy())
|
|
opt.zero_grad()
|
|
|
|
loss.backward()
|
|
if m.parameters()[0].grad is None:
|
|
break
|
|
|
|
opt.step()
|
|
|
|
if step % 100 == 0:
|
|
print(f"Step {step+1} | Loss: {loss.numpy()}")
|