# -*- coding: utf-8 -*-
"""**RosenPy: An Open Source Python Framework for Complex-Valued Neural Networks**.
*Copyright © A. A. Cruz, K. S. Mayer, D. S. Arantes*.
*License*
This file is part of RosenPy.
RosenPy is an open source framework distributed under the terms of the GNU General
Public License, as published by the Free Software Foundation, either version 3 of
the License, or (at your option) any later version. For additional information on
license terms, please open the Readme.md file.
RosenPy is distributed in the hope that it will be useful to every user, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with RosenPy. If not, see <http://www.gnu.org/licenses/>.
"""
from rosenpy.utils import act_func, init_func
[docs]class Optimizer:
"""
Base class for all optimizers used in the neural network.
This class defines common parameters and methods that can be used
by all derived optimizers.
"""
def __init__(self, beta=100, beta1=0.9, beta2=0.999, epsilon=1e-8):
"""
Initializes the optimizer with default hyperparameters.
Parameters:
-----------
beta : float, optional
The value for the beta parameter. Default is 100.
beta1 : float, optional
The value for the beta1 parameter. Default is 0.9.
beta2 : float, optional
The value for the beta2 parameter. Default is 0.999.
epsilon : float, optional
A small constant added to prevent division by zero. Default is 1e-8.
"""
self.beta = beta
self.beta1 = beta1
self.beta2 = beta2
self.epsilon = epsilon
self.vt = None
self.ut = None
self.xp = None
self.optimizer = None
[docs] def set_module(self, xp):
"""
Sets the backend module (NumPy or CuPy) for matrix operations.
Parameters:
-----------
xp : module
The backend module (NumPy or CuPy).
"""
self.xp = xp
[docs] def update_parameters(self, parameters, gradients, learning_rate, epoch, mt, vt, ut):
"""
Updates the parameters of the neural network based on the gradients.
This is a placeholder method that should be implemented by subclasses.
Parameters:
-----------
parameters : tuple
The parameters of the neural network.
gradients : tuple
The gradients of the loss function with respect to the parameters.
learning_rate : tuple
The learning rates for updating the parameters.
epoch : int
The current epoch number.
mt : tuple
The first moment estimates.
vt : tuple
The second moment estimates.
ut : tuple
The third moment estimates.
Returns:
--------
tuple
The updated parameters along with the updated moment estimates.
"""
raise NotImplementedError("Subclasses must implement update_parameters method.")
[docs]class GradientDescent(Optimizer):
"""
Gradient Descent optimizer.
This class implements the standard gradient descent optimization algorithm.
"""
[docs] def update_parameters(self, parameters, gradients, learning_rate, epoch, mt, vt, ut):
"""
Updates the parameters using the gradient descent optimizer.
Parameters:
-----------
parameters : tuple
The parameters of the neural network.
gradients : tuple
The gradients of the loss function with respect to the parameters.
learning_rate : tuple
The learning rates for updating the parameters.
epoch : int
The current epoch number.
mt : tuple
The first moment estimates (not used in this optimizer).
vt : tuple
The second moment estimates (not used in this optimizer).
ut : tuple
The third moment estimates (not used in this optimizer).
Returns:
--------
tuple
The updated parameters.
"""
return tuple(p + lr * g for p, g, lr in zip(parameters, gradients, learning_rate)) + (mt, vt, ut)
[docs]class Adam(Optimizer):
"""
Adam optimizer.
This class implements the Adam optimization algorithm, which is an adaptive
learning rate optimization algorithm.
"""
[docs] def update_parameters(self, parameters, gradients, learning_rate, epoch, mt, vt, ut):
"""
Updates the parameters using the Adam optimizer.
Parameters:
-----------
Same as the parent class.
Returns:
--------
tuple
The updated parameters along with the updated moment estimates.
"""
updated_parameters, updated_mt, updated_vt = [], [], []
for p, g, lr, m, v in zip(parameters, gradients, learning_rate, mt, vt):
m = self.beta1 * m + (1 - self.beta1) * g
v = self.beta2 * v + (1 - self.beta2) * (self.xp.abs(g) ** 2)
mc = m / (1 - self.beta1 ** epoch)
vc = v / (1 - self.beta2 ** epoch)
updated_parameters.append(p + lr * (mc / (self.xp.sqrt(vc) + self.epsilon)))
updated_mt.append(m)
updated_vt.append(v)
return tuple(updated_parameters + [updated_mt, updated_vt, ut])
[docs]class CVAdam(Optimizer):
"""
Complex-Valued Adam optimizer.
This class implements the complex-valued version of the Adam optimization algorithm.
"""
[docs] def update_parameters(self, parameters, gradients, learning_rate, epoch, mt, vt, ut):
"""
Updates the parameters using the complex-valued Adam optimizer.
Parameters:
-----------
Same as the parent class.
Returns:
--------
tuple
The updated parameters along with the updated moment estimates.
"""
updated_parameters, updated_mt, updated_vt = [], [], []
for p, g, lr, m, v in zip(parameters, gradients, learning_rate, mt, vt):
m = self.beta1 * m + (1 - self.beta1) * g
v = self.beta2 * v + (1 - self.beta2) * (self.xp.real(g) ** 2 + 1j * self.xp.imag(g) ** 2)
mc = m / (1 - self.beta1 ** epoch)
vc = v / (1 - self.beta2 ** epoch)
r = self.xp.real(mc) / (self.xp.sqrt(self.xp.real(vc)) + self.epsilon)
i = self.xp.imag(mc) / (self.xp.sqrt(self.xp.imag(vc)) + self.epsilon)
updated_parameters.append(p + lr * (r + 1j * i))
updated_mt.append(m)
updated_vt.append(v)
return tuple(updated_parameters + [updated_mt, updated_vt, ut])
[docs]class AMSGrad(Optimizer):
"""
AMSGrad optimizer.
This class implements the AMSGrad optimization algorithm, a variant of Adam that
improves convergence in certain cases by keeping track of the maximum past squared gradient.
"""
[docs] def update_parameters(self, parameters, gradients, learning_rate, epoch, mt, vt, ut):
"""
Updates the parameters using the AMSGrad optimizer.
Parameters:
-----------
Same as the parent class.
Returns:
--------
tuple
The updated parameters along with the updated moment estimates.
"""
updated_parameters, updated_mt, updated_vt, updated_ut = [], [], [], []
for p, g, lr, m, v, u in zip(parameters, gradients, learning_rate, mt, vt, ut):
m = self.beta1 * m + (1 - self.beta1) * g
v = self.beta2 * v + (1 - self.beta2) * (self.xp.abs(g) ** 2)
u = self.xp.maximum(u, v)
updated_parameters.append(p + lr * (m / (self.xp.sqrt(u) + self.epsilon)))
updated_mt.append(m)
updated_vt.append(v)
updated_ut.append(u)
return tuple(updated_parameters + [updated_mt, updated_vt, updated_ut])
[docs]class SAMSGrad(Optimizer):
[docs] def update_parameters(self, parameters, gradients, learning_rate, epoch, mt, vt, ut):
"""
Updates the parameters using the SAMSGrad optimizer.
Parameters:
-----------
Same as the parent class.
Returns:
--------
tuple
The updated parameters along with the updated moment estimates.
"""
updated_parameters, updated_mt, updated_vt, updated_ut = [], [], [], []
for p, g, lr, m, v, u in zip(parameters, gradients, learning_rate, mt, vt, ut):
m = self.beta1 * m + (1 - self.beta1) * g
v = self.beta2 * v + (1 - self.beta2) * (self.xp.abs(g) ** 2)
u = self.xp.maximum(self.xp.abs(u), self.xp.abs(v))
updated_params.append(p + lr * (m / ((1 / self.beta) * self.xp.log(1 + self.xp.exp(self.beta * self.xp.sqrt(u))))))
updated_mt.append(m)
updated_vt.append(v)
updated_ut.append(u)
return tuple(updated_parameters + [updated_mt, updated_vt, updated_ut])
[docs]class CVAMSGrad(Optimizer):
"""
Updates the parameters using the complex-valued SAMSGrad optimizer.
Parameters:
-----------
Same as the parent class.
Returns:
--------
tuple
The updated parameters along with the updated moment estimates.
"""
[docs] def update_parameters(self, parameters, gradients, learning_rate, epoch, mt, vt, ut):
updated_parameters, updated_mt, updated_vt, updated_ut = [], [], [], []
for p, g, lr, m, v, u in zip(parameters, gradients, learning_rate, mt, vt, ut):
m = self.beta1 * m + (1 - self.beta1) * g
v = self.beta2 * v + (1 - self.beta2) * (self.xp.real(g) ** 2 + 1j * self.xp.imag(g) ** 2)
u = self.xp.maximum(self.xp.abs(self.xp.real(u)), self.xp.abs(self.xp.real(v))) + 1j * self.xp.maximum(self.xp.abs(self.xp.imag(u)), self.xp.abs(self.xp.imag(v)))
real_part = self.xp.real(m) / (self.xp.sqrt(self.xp.real(u)) + self.epsilon)
imag_part = self.xp.imag(m) / (self.xp.sqrt(self.xp.imag(u)) + self.epsilon)
updated_parameters.append(p + lr * (real_part + 1j * imag_part))
updated_mt.append(m)
updated_vt.append(v)
updated_ut.append(u)
return tuple(updated_parameters + [updated_mt, updated_vt, updated_ut])
[docs]class Adamax(Optimizer):
[docs] def update_parameters(self, parameters, gradients, learning_rate, epoch, mt, vt, ut):
"""
Updates the parameters using the Adamax optimizer.
Parameters:
-----------
Same as the parent class.
Returns:
--------
tuple
The updated parameters along with the updated moment estimates.
"""
updated_parameters, updated_mt, updated_vt = [], [], []
for p, g, lr, m, v in zip(parameters, gradients, learning_rate, mt, vt):
m = self.beta1 * m + (1 - self.beta1) * g
v = self.xp.maximum(self.beta2 * v, self.xp.abs(g))
updated_parameters.append(p + (lr / (1 - self.beta1 ** epoch)) * m / (v + self.epsilon))
updated_mt.append(m)
updated_vt.append(v)
return tuple(updated_parameters + [updated_mt, updated_vt, ut])
[docs]class CVAdamax(Optimizer):
[docs] def update_parameters(self, parameters, gradients, learning_rate, epoch, mt, vt, ut):
"""
Updates the parameters using the complex-valued Adamax optimizer.
Parameters:
-----------
Same as the parent class.
Returns:
--------
tuple
The updated parameters along with the updated moment estimates.
"""
updated_parameters, updated_mt, updated_vt = [], [], []
for p, g, lr, m, v in zip(parameters, gradients, learning_rate, mt, vt):
m = self.beta1 * m + (1 - self.beta1) * g
v_real = self.xp.maximum(self.beta2 * self.xp.real(v), self.xp.abs(self.xp.real(g)))
v_imag = self.xp.maximum(self.beta2 * self.xp.imag(v), self.xp.abs(self.xp.imag(g)))
v = v_real + 1j * v_imag
real_part = self.xp.real(m) / (self.xp.real(v) + self.epsilon)
imag_part = self.xp.imag(m) / (self.xp.imag(v) + self.epsilon)
updated_parameters.append(p + (lr / (1 - self.beta1 ** epoch)) * (real_part + 1j * imag_part))
updated_mt.append(m)
updated_vt.append(v)
return tuple(updated_parameters + [updated_mt, updated_vt, ut])
[docs]class CVAdaGrad(Optimizer):
[docs] def update_parameters(self, parameters, gradients, learning_rate, epoch, mt, vt, ut):
"""
Updates the parameters using the complex-valued AdaGrad optimizer.
Parameters:
-----------
Same as the parent class.
Returns:
--------
tuple
The updated parameters along with the updated moment estimates.
"""
updated_parameters, updated_mt = [], []
for p, lr, g, m, v in zip(parameters, learning_rate, gradients, mt, vt):
m = m + (self.xp.real(g)**2 + 1j*self.xp.imag(g)**2)
real_part = self.xp.real(g) / self.xp.sqrt(self.xp.real(m) + self.epsilon)
imag_part = self.xp.imag(g) / self.xp.sqrt(self.xp.imag(m) + self.epsilon)
updated_parameters.append(p + lr * (real_part + 1j * imag_part))
updated_mt.append(m)
return tuple(updated_parameters + [updated_mt, vt, ut])
[docs]class AdaGrad(Optimizer):
[docs] def update_parameters(self, parameters, gradients, learning_rate, epoch, mt, vt, ut):
"""
Updates the parameters using the AdaGrad optimizer.
Parameters:
-----------
Same as the parent class.
Returns:
--------
tuple
The updated parameters along with the updated moment estimates.
"""
updated_parameters, updated_mt = [], []
for p, lr, g, m in zip(parameters, learning_rate, gradients, mt):
m = m + (self.xp.abs(g) ** 2)
updated_parameters.append(p + lr * (g / (self.xp.sqrt(m) + self.epsilon)))
updated_mt.append(m)
return tuple(updated_parameters + [updated_mt, vt, ut])
[docs]class RMSprop(Optimizer):
[docs] def update_parameters(self, parameters, gradients, learning_rate, epoch, mt, vt, ut):
"""
Updates the parameters using the RMSprop optimizer.
Parameters:
-----------
Same as the parent class.
Returns:
--------
tuple
The updated parameters along with the updated moment estimates.
"""
updated_parameters, updated_mt = [], []
for p, g, lr, m in zip(parameters, gradients, learning_rate, mt):
m = m * self.beta1 + (1 - self.beta1) * self.xp.abs(g) ** 2
updated_parameters.append(p + lr * g / (self.xp.sqrt(m) + self.epsilon))
updated_mt.append(m)
return tuple(updated_parameters + [updated_mt, vt, ut])
[docs]class CVRMSprop(Optimizer):
[docs] def update_parameters(self, parameters, gradients, learning_rate, epoch, mt, vt, ut):
"""
Updates the parameters using the complex-valued RMSprop optimizer.
Parameters:
-----------
Same as the parent class.
Returns:
--------
tuple
The updated parameters along with the updated moment estimates.
"""
updated_parameters, updated_mt = [], []
for p, g, lr, m in zip(parameters, gradients, learning_rate, mt):
m = m * self.beta1 + (1 - self.beta1) * (self.xp.real(g) ** 2 + 1j * self.xp.imag(g) ** 2)
real_part = self.xp.real(g) / (self.xp.sqrt(self.xp.real(m) + self.epsilon))
imag_part = self.xp.imag(g) / (self.xp.sqrt(self.xp.imag(m) + self.epsilon))
updated_parameters.append(p + lr * (real_part + 1j * imag_part))
updated_mt.append(m)
return tuple(updated_parameters + [updated_mt, vt, ut])
[docs]class Nadam(Optimizer):
[docs] def update_parameters(self, parameters, gradients, learning_rate, epoch, mt, vt, ut):
"""
Updates the parameters using the Nadam optimizer.
Parameters:
-----------
Same as the parent class.
Returns:
--------
tuple
The updated parameters along with the updated moment estimates.
"""
updated_parameters, updated_mt, updated_vt = [], [], []
for p, g, lr, m, v in zip(parameters, gradients, learning_rate, mt, vt):
m = m * self.beta1 + (1 - self.beta1) * g
v = self.beta2 * v + (1 - self.beta2) * self.xp.abs(g) ** 2
mt_hat = (1 - self.beta1) * g / (1 - self.beta1 ** (epoch + 1)) + self.beta1 * m / (1 - self.beta1 ** epoch)
updated_parameters.append(p + lr * mt_hat / (self.xp.sqrt(v / (1 - self.beta2 ** epoch)) + self.epsilon))
updated_mt.append(m)
updated_vt.append(v)
return tuple(updated_parameters + [updated_mt, updated_vt, ut])
[docs]class CVNadam(Nadam):
[docs] def update_parameters(self, parameters, gradients, learning_rate, epoch, mt, vt, ut):
"""
Updates the parameters using the complex-valued Nadam optimizer.
Parameters:
-----------
parameters : list of arrays
The parameters of the neural network.
gradients : list of arrays
The gradients of the loss function with respect to the parameters.
learning_rate : float
The learning rate for updating the parameters.
epoch : int
The current epoch number.
mt : list of arrays
The first moment estimates.
vt : list of arrays
The second moment estimates.
ut : list of arrays
The third moment estimates.
Returns:
--------
tuple
The updated parameters along with the updated moment estimates.
"""
updated_parameters, updated_mt, updated_vt = [], [], []
for param, grad, lr, m, v in zip(parameters, gradients, learning_rate, mt, vt):
m = self.beta1 * m + (1 - self.beta1) * grad
v = self.beta2 * v + (1 - self.beta2) * (self.xp.real(grad) ** 2 + 1j * self.xp.imag(grad) ** 2)
mt_hat = (1 - self.beta1) * grad / (1 - self.beta1 ** (epoch + 1)) + self.beta1 * m / (1 - self.beta1 ** epoch)
vc = v / (1 - self.beta2 ** epoch)
real_update = self.xp.real(mt_hat) / (self.xp.sqrt(self.xp.real(vc)) + self.epsilon)
imag_update = self.xp.imag(mt_hat) / (self.xp.sqrt(self.xp.imag(vc)) + self.epsilon)
updated_parameters.append(param + lr * (real_update + 1j * imag_update))
updated_mt.append(m)
updated_vt.append(v)
return tuple(updated_parameters + [updated_mt, updated_vt, ut])