Source code for qubit.descriptors

import warnings
import math
import numpy as np
from qubit.data import atomnumber
from qubit.data import atomsymbol
import random

[docs]class Descriptor: """Parent class for all descriptors """
[docs] def generate(self): """Placeholder Raises: NotImplementedError """ raise NotImplementedError
[docs] def normalize(self): """Placeholder Raises: NotImplementedError """ raise NotImplementedError
[docs]class CoulombMatrix(Descriptor): """Provides functionality to generate the Coulomb Matrix (1). (1) Montavon, G.; Hansen, K.; Fazli, S.; Rupp, M.; Biegler, F.; Ziehe, A.; Tkatchenko, A.; Lilienfeld, A.; Müller, K.-R. Learning Invariant Representations of Molecules for Atomization Energy Prediction. In Advances in Neural Information Processing Systems; Pereira, F., Burges, C. J. C., Bottou, L., Weinberger, K. Q., Eds.; Curran Associates, Inc., 2012; Vol. 25. """
[docs] def generate(atoms, xyz, randomize=False): """Generates the Coulomb Matrix (1). (1) Montavon, G.; Hansen, K.; Fazli, S.; Rupp, M.; Biegler, F.; Ziehe, A.; Tkatchenko, A.; Lilienfeld, A.; Müller, K.-R. Learning Invariant Representations of Molecules for Atomization Energy Prediction. In Advances in Neural Information Processing Systems; Pereira, F., Burges, C. J. C., Bottou, L., Weinberger, K. Q., Eds.; Curran Associates, Inc., 2012; Vol. 25. Args: atoms (list): A list of atoms. xyz (2D list): A list of 3D coordinates. Returns: 2D list: The Coulomb Matrix. """ # determine the lenght of the molecule and atomnumbers n = len(atoms) if type(atoms[0]) == str: z = [atomnumber[atom] for atom in atoms] else: z = atoms # create an empty matrix cm = np.zeros((n, n)) # calculate the values, populate the array and return the coulomb matrix for i in range(n): for j in range(n): if i == j: cm[i][j] = 0.5 * z[i] ** 2.4 elif i < j: cm[i][j] = ( z[i] * z[j] / (np.linalg.norm(np.array(xyz[i]) - np.array(xyz[j]))) ) cm[j][i] = cm[i][j] if randomize: return CoulombMatrix.randomize(cm) else: return cm
[docs] def randomize(coulomb_matrix): """Randomizes the Coulomb Matrix as described in (1). (1) Montavon, G.; Hansen, K.; Fazli, S.; Rupp, M.; Biegler, F.; Ziehe, A.; Tkatchenko, A.; Lilienfeld, A.; Müller, K.-R. Learning Invariant Representations of Molecules for Atomization Energy Prediction. In Advances in Neural Information Processing Systems; Pereira, F., Burges, C. J. C., Bottou, L., Weinberger, K. Q., Eds.; Curran Associates, Inc., 2012; Vol. 25. Args: coulomb_matrix (2D list): The Coulomb Matrix as generated in :func:`~Qubit.descriptors.CoulombMatrix.generate` Returns: 2D list: The randomized Coulomb Matrix. """ # calculate the row normals of a coulomb matrix row_norms = np.array([np.linalg.norm(row) for row in coulomb_matrix], dtype=float) # draw random numbers from a normal distribution rand = np.random.RandomState() n = rand.normal(size=row_norms.size) # calcualte the permutation p = np.argsort(row_norms + n) # Permute row wise then coulomn wise return coulomb_matrix[p][:, p]
[docs] def pad_matrix(matrix, size): """Applies padding to a matrix. You can use this function to scale a matrix to a given size. The empty space is filled with zeros. Example: Can be used to pad the Coulomb Matrix. Args: matrix (2D np.array): Matrix to pad in a nested list format. size (int): The size to scale the matrix to. Returns: ndarray: The padded matrix. """ if isinstance(matrix, list): matrix = np.array(list) # calculate the wished size size = size - matrix.shape[0] # confirm the size isn't less than the default matrix size if size <= 0: m = matrix warnings.warn( "Trying to reduce the matrix size, default matrix size has been returned!", Warning, ) else: # pad the matrix m = np.pad(matrix, (0, size)) return m
[docs] def normalize(coulomb_matrix, phi=1, negative_dimensions=0, positive_dimensions=0): """Normalizes the Coulomb Matrix by tensorizing it. May require padding. This method is an adaption from (1). (1) Montavon, G.; Hansen, K.; Fazli, S.; Rupp, M.; Biegler, F.; Ziehe, A.; Tkatchenko, A.; Lilienfeld, A.; Müller, K.-R. Learning Invariant Representations of Molecules for Atomization Energy Prediction. In Advances in Neural Information Processing Systems; Pereira, F., Burges, C. J. C., Bottou, L., Weinberger, K. Q., Eds.; Curran Associates, Inc., 2012; Vol. 25. Args: coulomb_matrix (2D list): The Coulomb Matrix. phi (int, optional): Equivalent to an offset. Defaults to 1. negative_dimensions (int, optional): The amount of negative dimensions describing the tensor. Defaults to 0. positive_dimension (int, optional): The amount of positive dimensions describing the tensor. Defaults to 0. Returns: 2D list: Tensorized Coulomb Matrix. """ tensors = [] cm = np.array(coulomb_matrix) def sigmoid(x): return np.exp(x) / (1 + np.exp(x)) # generate negative layers for i in range(-negative_dimensions, 0): tensor = sigmoid((cm + (i * phi)) / phi) # i = negative here tensors.append(tensor) # generate base layer tensor = sigmoid(cm / phi) tensors.append(tensor) # generate negapositive layers for i in range(positive_dimensions): tensor = sigmoid((cm + (i * phi)) / phi) tensors.append(tensor) return np.array(tensors)
[docs]class CoulombVector(Descriptor): """Provides functionality to generate the Coulomb Vector. """
[docs] def generate(atoms, xyz): # determine the lenght of the molecule and atomnumbers n = len(atoms) if type(atoms[0]) == str: z = [atomnumber[atom] for atom in atoms] else: z = atoms # create an empty matrix cm = np.zeros((n, n+1)) # calculate the values, populate the array and return the coulomb matrix for i in range(n): for j in range(n+1): if i == j: cm[i][j] = 0.5 * z[i] ** 2.4 elif j == n: cm[i][j] = z[i] elif i < j: cm[i][j] = ( z[i] * z[j] / (np.linalg.norm(np.array(xyz[i]) - np.array(xyz[j]))) ) cm[j][i] = cm[i][j] return cm
def remove_atom(coulomb_vector): atom = coulomb_vector[-1] coulomb_vector = coulomb_vector[:-1] return atom, coulomb_vector
[docs] def randomize(coulomb_vector): """Randomizes the Coulomb Vector. """ random.shuffle(coulomb_vector) return coulomb_vector
[docs] def pad_vector(vector, size): """Applies padding to a vector. You can use this function to scale a vector to a given size. The empty space is filled with zeros. Example: Can be used to pad the Coulomb Vector. Args: matrix (2D np.array): Matrix to pad in a nested list format. size (int): The size to scale the matrix to. Returns: ndarray: The padded vector. """ if isinstance(vector, list): vector = np.asarray(vector) # calculate the wished size size = size - vector.size # confirm the size isn't less than the default matrix size if size <= 0: m = vector warnings.warn( "Trying to reduce the matrix size, default matrix size has been returned!", Warning, ) else: # pad the matrix m = np.pad(vector, (0, size)) return m
[docs] def normalize(coulomb_vector, phi=1, negative_dimensions=0, positive_dimensions=0): return CoulombMatrix.normalize( coulomb_vector, phi=phi, negative_dimensions=negative_dimensions, positive_dimensions=positive_dimensions)