add normalizing and no-biasing features to DenseBroadcast

2018-06-24 12:18:30 +02:00 · 2018-06-24 12:18:30 +02:00 · 18e4376aae
commit 18e4376aae
parent f52fabc549
1 changed files with 22 additions and 5 deletions
--- a/nn.lua
+++ b/nn.lua
@ -600,22 +600,35 @@ function Dense:forward(X)
    return Y
 end

-function DenseBroadcast:init(dim)
+function DenseBroadcast:init(dim, norm_in, biasing)
    -- same as Dense but applies the same to every m of (m, n).
    Layer.init(self, "DenseBroadcast")
    assert(type(dim) == "number")
    self.dim = dim
-    self.coeffs = self:_new_weights(init_he_normal) -- should be normal, but...
+    self.norm_in = norm_in and true or false
+    if self.norm_in then
+        self.coeffs = self:_new_weights(init_normal)
+    else
+        self.coeffs = self:_new_weights(init_he_normal)
+    end
+    if self.biasing then
        self.biases = self:_new_weights(init_zeros)
    end
+    self.c = 1.0
+end

 function DenseBroadcast:make_shape(parent)
    self.shape_in = parent.shape_out
    assert(#self.shape_in == 2)
    self.shape_out = {self.shape_in[1], self.dim}
    self.coeffs.shape = {self.shape_in[#self.shape_in], self.dim}
+    if self.biasing then
        self.biases.shape = {1, self.dim}
    end
+    if self.norm_in then
+        self.c = 1 / sqrt(prod(self.shape_in))
+    end
+end

 function DenseBroadcast:forward(X)
    local bs = checkshape(X, self.shape_in)
@ -623,7 +636,11 @@ function DenseBroadcast:forward(X)
    local Y = self.cache

    dot(X, self.coeffs, 3, 1, Y)
-    for i, v in ipairs(Y) do Y[i] = v + self.biases[(i - 1) % self.dim + 1] end
+    if self.biasing then
+        for i, v in ipairs(Y) do Y[i] = self.c * v + self.biases[(i - 1) % self.dim + 1] end
+    elseif self.norm_in then
+        for i, v in ipairs(Y) do Y[i] = self.c * v end
+    end

    checkshape(Y, self.shape_out)
    return Y