Source code for qualia2.nn.modules.linear

# -*- coding: utf-8 -*-
import math 
from .module import Module
from ...core import * 
from ...functions import linear, tensordot
from ...autograd import Tensor 

[docs]class Linear(Module):
    '''Applies a linear transformation to the incoming data\n 
    Model: 
        y = x*w.T + b 
     
    Args: 
        in_features (int): size of each input sample 
        out_features (int): size of each output sample 
        bias (bool): whether to use bias. Default: True 
     
    Shape: 
        - Input: [N, *, in_features] where '∗' means any number of additional dimensions. 
        - Output: [N, *, out_features] where '∗' means any number of additional dimensions. 
    ''' 
    def __init__(self, in_features, out_features, bias=True): 
        super().__init__() 
        self.in_features = in_features
        self.out_features = out_features
        self.num_params += in_features*out_features
        self.weight = Tensor(np.random.uniform(-math.sqrt(1/in_features), math.sqrt(1/in_features),(in_features, out_features))) 
        if bias: 
            self.bias = Tensor(np.random.uniform(-math.sqrt(1/in_features), math.sqrt(1/in_features),(out_features))) 
            self.num_params += out_features
        else: 
            self.bias = None
            
    def __repr__(self):
        return '{}({}, {}, bias={}) at 0x{:0{}X}'.format(self.__class__.__name__, self.in_features, self.out_features, str(self.bias is not None), id(self), 16)
    
[docs]    def forward(self, x): 
        if self.bias is not None:
            result = tensordot(x, self.weight) + self.bias
        else:
            result = tensordot(x, self.weight)
        if self.input_shape is None:
            self.input_shape = x.shape
        if self.output_shape is None:
            self.output_shape = result.shape
        return result

[docs]class Bilinear(Module):
    '''Applies a bilinear transformation to the incoming data\n 
    Model: 
        y = x1*w*x2 + b 
     
    Args: 
        in_features1 (int): size of each input sample 
        in_features2 (int): size of each second input sample 
        out_features (int): size of each output sample 
        bias (bool): whether to use bias. Default: True 
     
    Shape: 
        - Input1: [N, *, in_features1] where '∗' means any number of additional dimensions.
        - Input2: [N, *, in_features2] where '∗' means any number of additional dimensions. 
        - Output: [N, *, out_features] where '∗' means any number of additional dimensions. 
    ''' 
    def __init__(self, in_features1, in_features2, out_features, bias=True):
        super().__init__()
        self.in_features1 = in_features1
        self.in_features2 = in_features2
        self.out_features = out_features
        self.num_params += in_features1*in_features2*out_features
        self.weight = Tensor(np.random.uniform(-math.sqrt(1/in_features1), math.sqrt(1/in_features1),(out_features, in_features1, in_features2))) 
        if bias: 
            self.bias = Tensor(np.random.uniform(-math.sqrt(1/in_features1), math.sqrt(1/in_features1),(out_features))) 
            self.num_params += out_features
        else: 
            self.bias = None
    
    def __repr__(self):
        return '{}({}, {}, {}, bias={}) at 0x{:0{}X}'.format(self.__class__.__name__, self.in_features1, self.in_features2, self.out_features, str(self.bias is not None), id(self), 16)

[docs]    def forward(self, x1, x2):
        if self.bias is not None:
            result = (x1.expand_dims(len(x1.shape)-1)@(tensordot(self.weight,x2.T,(2,0))).T).squeeze(len(x1.shape)-1) + self.bias
        else:
            result = (x1.expand_dims(len(x1.shape)-1)@(tensordot(self.weight,x2.T,(2,0))).T).squeeze(len(x1.shape)-1)
        if self.input_shape is None:
            self.input_shape = [x1.shape, x2.shape]
        if self.output_shape is None:
            self.output_shape = result.shape
        return result
    
[docs]class CLinear(Module):
    '''Applies a complex linear transformation to the incoming data\n 
    Model: 
        y = x*w.T + b 
     
    Args: 
        in_features (int): size of each input sample 
        out_features (int): size of each output sample 
        bias (bool): whether to use bias. Default: True 
     
    Shape: 
        - Input: [N, *, in_features] where '∗' means any number of additional dimensions. 
        - Output: [N, *, out_features] where '∗' means any number of additional dimensions. 
    ''' 
    def __init__(self, in_features, out_features, bias=True): 
        super().__init__() 
        self.in_features = in_features
        self.out_features = out_features
        self.num_params += in_features*out_features
        self.weight = Tensor(np.random.normal(0, math.sqrt(1/in_features),(in_features, out_features))+1j*np.random.normal(0, math.sqrt(1/in_features),(in_features, out_features)),dtype='complex128') 
        if bias: 
            self.bias = Tensor(np.zeros(out_features)+1j*np.zeros(out_features),dtype='complex128') 
            self.num_params += out_features
        else: 
            self.bias = None
            
    def __repr__(self):
        return '{}({}, {}, bias={}) at 0x{:0{}X}'.format(self.__class__.__name__, self.in_features, self.out_features, str(self.bias is not None), id(self), 16)
    
[docs]    def forward(self, x): 
        if self.bias is not None:
            result = tensordot(x, self.weight) + self.bias
        else:
            result = tensordot(x, self.weight)
        if self.input_shape is None:
            self.input_shape = x.shape
        if self.output_shape is None:
            self.output_shape = result.shape
        return result

[docs]class NoisyLinear(Module):
    '''Applies a linear transformation with parametric noise added to its weights\n
    Model:
        y = (w+sig_w*eps_w)*x+(b+sig_b*eps_b)

    Args:
        in_features (int): size of each input sample 
        out_features (int): size of each output sample 
        std_init (float): std for initializing weights
        factorised_noise (bool): 
    
    Shape: 
        - Input: [N, *, in_features] where '∗' means any number of additional dimensions. 
        - Output: [N, *, out_features] where '∗' means any number of additional dimensions. 
    
    Reference:
        https://arxiv.org/abs/1706.10295
    '''
    def __init__(self, in_features, out_features, std_init=0.4, factorised_noise=True):
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.std_init = std_init
        self.factorised_noise = factorised_noise
        mu_range = 1.0 / math.sqrt(self.in_features)
        self.weight_mu = Tensor(np.random.uniform(-mu_range, mu_range, (out_features, in_features)))
        self.weight_sigma = Tensor(np.empty(out_features, in_features))
        self.weight_sigma.fill(self.std_init / math.sqrt(self.in_features))
        self.weight_epsilon = Tensor(np.empty(out_features, in_features), requires_grad=False)
        self.bias_mu = Tensor(np.random.uniform(-mu_range, mu_range, (out_features)))
        self.bias_sigma = Tensor(np.empty(out_features))
        self.bias_sigma.fill(self.std_init / math.sqrt(self.out_features))
        self.bias_epsilon = Tensor(np.empty(out_features), requires_grad=False)
        self.sample_noise()

[docs]    def reset_params(self):
        mu_range = 1.0 / math.sqrt(self.in_features)
        self.weight_mu.data = np.random.uniform(-mu_range, mu_range, (out_features, in_features))
        self.weight_sigma.fill(self.std_init / math.sqrt(self.in_features))
        self.bias_mu.data = np.random.uniform(-mu_range, mu_range, (out_features))
        self.bias_sigma.fill(self.std_init / math.sqrt(self.out_features))

    def _scale_noise(self, size):
        x = np.random.randn(size)
        return np.sign(x)*(np.sqrt(np.abs(x)))

[docs]    def sample_noise(self):
        if self.factorised_noise:
            epsilon_in = self._scale_noise(self.in_features)
            epsilon_out = self._scale_noise(self.out_features)
            self.weight_epsilon.data = np.outer(epsilon_out, epsilon_in)
            self.bias_epsilon.data = epsilon_out
        else:
            self.weight_epsilon.data = np.random.randn(self.out_features, self.in_features)
            self.bias_epsilon.data = np.random.randn(self.out_features)

[docs]    def forward(self, inp):
        if self.training:
            return tensordot(inp, self.weight_mu + self.weight_sigma * self.weight_epsilon) + self.bias_mu + self.bias_sigma * self.bias_epsilon
        else:
            return tensordot(inp, self.weight_mu) + self.bias_mu