-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
133 lines (99 loc) · 3.35 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import math
class Value:
def __init__(self, data: int, _children=(), _op="", label=""):
self.data = data
self._prev = set(_children)
self._op = _op
self.label = label
self._backward = lambda: None
self.grad = 0.0
def backward(self):
# topological order all of the children in the graph
topo = []
visited = set()
def build_topo(v):
if v not in visited:
visited.add(v)
for child in v._prev:
build_topo(child)
topo.append(v)
build_topo(self)
# go one variable at a time and apply the chain rule to get its gradient
self.grad = 1.0
for v in reversed(topo):
v._backward()
return topo
def __repr__(self):
return f"Value({self.data:.4E})"
def __add__(self, other):
other = other if isinstance(other, Value) else Value(other)
out = Value(
self.data + other.data,
_children=(self, other),
_op="+",
label="", # f"(+ {self.label} {other.label})",
)
def _backward():
self.grad += out.grad
other.grad += out.grad
out._backward = _backward
return out
def __rmul__(self, other): # other * self
return self * other
def __radd__(self, other):
return self + other
def __mul__(self, other):
other = other if isinstance(other, Value) else Value(other)
out = Value(
self.data * other.data,
_children=(self, other),
_op="*",
label="", # f"(* {self.label} {other.label})",
)
def _backward():
self.grad += other.data * out.grad
other.grad += self.data * out.grad
out._backward = _backward
return out
def __rtruediv__(self, other): # other / self = (self**-1) * other
return self**-1 * other
def __truediv__(self, other): # self / other = self * other**-1
return self * other**-1
def __pow__(self, other): # self**other
assert isinstance(other, (int, float)), "only supports float and powers"
out = Value(
self.data**other,
(self,),
_op=f"**{other}",
label="", # f"{self.label}**{other}"
)
def _backward():
self.grad += (other * self.data ** (other - 1)) * out.grad
out._backward = _backward
return out
def __neg__(self):
return self * -1
def __sub__(self, other):
return self + (-other)
def __rsub__(self, other): # other - self = -self + other
return other + (-self)
def exp(self):
out = Value(
data=math.exp(self.data),
label="", # f"exp({self.label})",
_children=(self,),
_op="exp",
)
def _backward():
self.grad += out.grad * out.data
out._backward = _backward
return out
def tanh(self):
# https://en.wikipedia.org/wiki/Hyperbolic_functions
x = self.data
t = (math.exp(2 * x) - 1) / (math.exp(2 * x) + 1)
out = Value(data=t, _children=(self,), _op="tanh", label="")
def _backward():
self.grad += out.grad * (1 - t**2)
out._backward = _backward
return out