Source code for qubit.descriptors

import warnings
import math
import numpy as np
from qubit.data import atomnumber
from qubit.data import atomsymbol
import random

[docs]class Descriptor:
    """Parent class for all descriptors
    """
    
[docs]    def generate(self):
        """Placeholder

        Raises:
            NotImplementedError
        """
        raise NotImplementedError
    
[docs]    def normalize(self):
        """Placeholder

        Raises:
            NotImplementedError
        """
        raise NotImplementedError

[docs]class CoulombMatrix(Descriptor):
    """Provides functionality to generate the Coulomb Matrix (1).

    (1) Montavon, G.; Hansen, K.; Fazli, S.; Rupp, M.; Biegler, F.; Ziehe, A.;
    Tkatchenko, A.; Lilienfeld, A.; Müller, K.-R.
    Learning Invariant Representations of Molecules for Atomization Energy Prediction.
    In Advances in Neural Information Processing Systems;
    Pereira, F., Burges, C. J. C., Bottou, L., Weinberger, K. Q., Eds.;
    Curran Associates, Inc., 2012; Vol. 25.
    """

[docs]    def generate(atoms, xyz, randomize=False):
        """Generates the Coulomb Matrix (1).

        (1) Montavon, G.; Hansen, K.; Fazli, S.; Rupp, M.; Biegler, F.; Ziehe, A.;
        Tkatchenko, A.; Lilienfeld, A.; Müller, K.-R.
        Learning Invariant Representations of Molecules for Atomization Energy Prediction.
        In Advances in Neural Information Processing Systems;
        Pereira, F., Burges, C. J. C., Bottou, L., Weinberger, K. Q., Eds.;
        Curran Associates, Inc., 2012; Vol. 25.

        Args:
            atoms (list): A list of atoms.
            xyz (2D list): A list of 3D coordinates.

        Returns:
            2D list: The Coulomb Matrix.
        """
        # determine the lenght of the molecule and atomnumbers
        n = len(atoms)

        if type(atoms[0]) == str:
            z = [atomnumber[atom] for atom in atoms]
        else:
            z = atoms

        # create an empty matrix
        cm = np.zeros((n, n))

        # calculate the values, populate the array and return the coulomb matrix
        for i in range(n):
            for j in range(n):
                if i == j:
                    cm[i][j] = 0.5 * z[i] ** 2.4
                elif i < j:
                    cm[i][j] = (
                        z[i] * z[j] /
                        (np.linalg.norm(np.array(xyz[i]) - np.array(xyz[j])))
                    )
                    cm[j][i] = cm[i][j]

        if randomize:
            return CoulombMatrix.randomize(cm)
        else:
            return cm

[docs]    def randomize(coulomb_matrix):
        """Randomizes the Coulomb Matrix as described in (1).

        (1) Montavon, G.; Hansen, K.; Fazli, S.; Rupp, M.; Biegler, F.; Ziehe, A.;
        Tkatchenko, A.; Lilienfeld, A.; Müller, K.-R.
        Learning Invariant Representations of Molecules for Atomization Energy Prediction.
        In Advances in Neural Information Processing Systems;
        Pereira, F., Burges, C. J. C., Bottou, L., Weinberger, K. Q., Eds.;
        Curran Associates, Inc., 2012; Vol. 25.

        Args:
            coulomb_matrix (2D list): The Coulomb Matrix as generated in :func:`~Qubit.descriptors.CoulombMatrix.generate`

        Returns:
            2D list: The randomized Coulomb Matrix.
        """
        # calculate the row normals of a coulomb matrix
        row_norms = np.array([np.linalg.norm(row)
                              for row in coulomb_matrix], dtype=float)

        # draw random numbers from a normal distribution
        rand = np.random.RandomState()
        n = rand.normal(size=row_norms.size)

        # calcualte the permutation
        p = np.argsort(row_norms + n)

        # Permute row wise then coulomn wise
        return coulomb_matrix[p][:, p]

[docs]    def pad_matrix(matrix, size):
        """Applies padding to a matrix.

        You can use this function to scale a matrix to a given size.
        The empty space is filled with zeros.

        Example: Can be used to pad the Coulomb Matrix.

        Args:
            matrix (2D np.array): Matrix to pad in a nested list format.
            size (int): The size to scale the matrix to.

        Returns:
            ndarray: The padded matrix.
        """
        if isinstance(matrix, list):
            matrix = np.array(list)

        # calculate the wished size
        size = size - matrix.shape[0]

        # confirm the size isn't less than the default matrix size
        if size <= 0:
            m = matrix
            warnings.warn(
                "Trying to reduce the matrix size, default matrix size has been returned!",
                Warning,
            )
        else:
            # pad the matrix
            m = np.pad(matrix, (0, size))
        return m

[docs]    def normalize(coulomb_matrix, phi=1, negative_dimensions=0, positive_dimensions=0):
        """Normalizes the Coulomb Matrix by tensorizing it. May require padding.
        This method is an adaption from (1).

        (1) Montavon, G.; Hansen, K.; Fazli, S.; Rupp, M.; Biegler, F.; Ziehe, A.;
        Tkatchenko, A.; Lilienfeld, A.; Müller, K.-R.
        Learning Invariant Representations of Molecules for Atomization Energy Prediction.
        In Advances in Neural Information Processing Systems;
        Pereira, F., Burges, C. J. C., Bottou, L., Weinberger, K. Q., Eds.;
        Curran Associates, Inc., 2012; Vol. 25.


        Args:
            coulomb_matrix (2D list): The Coulomb Matrix.
            phi (int, optional): Equivalent to an offset. Defaults to 1.
            negative_dimensions (int, optional): The amount of negative dimensions describing the tensor. Defaults to 0.
            positive_dimension (int, optional): The amount of positive dimensions describing the tensor. Defaults to 0.

        Returns:
            2D list: Tensorized Coulomb Matrix.
        """
        tensors = []
        cm = np.array(coulomb_matrix)

        def sigmoid(x):
            return np.exp(x) / (1 + np.exp(x))

        # generate negative layers
        for i in range(-negative_dimensions, 0):
            tensor = sigmoid((cm + (i * phi)) / phi) # i = negative here
            tensors.append(tensor)

        # generate base layer
        tensor = sigmoid(cm / phi)
        tensors.append(tensor)

        # generate negapositive layers
        for i in range(positive_dimensions):
            tensor = sigmoid((cm + (i * phi)) / phi)
            tensors.append(tensor)
        return np.array(tensors)

[docs]class CoulombVector(Descriptor):
    """Provides functionality to generate the Coulomb Vector.
    """

[docs]    def generate(atoms, xyz):
        # determine the lenght of the molecule and atomnumbers
        n = len(atoms)

        if type(atoms[0]) == str:
            z = [atomnumber[atom] for atom in atoms]
        else:
            z = atoms

        # create an empty matrix
        cm = np.zeros((n, n+1))

        # calculate the values, populate the array and return the coulomb matrix
        for i in range(n):
            for j in range(n+1):
                if i == j:
                    cm[i][j] = 0.5 * z[i] ** 2.4
                elif j == n:
                    cm[i][j] = z[i]
                elif i < j:
                    cm[i][j] = (
                        z[i] * z[j] /
                        (np.linalg.norm(np.array(xyz[i]) - np.array(xyz[j])))
                    )
                    cm[j][i] = cm[i][j]
        return cm

    def remove_atom(coulomb_vector):
        atom = coulomb_vector[-1]
        coulomb_vector = coulomb_vector[:-1]
        return atom, coulomb_vector

[docs]    def randomize(coulomb_vector):
        """Randomizes the Coulomb Vector.
        """
        random.shuffle(coulomb_vector)
        return coulomb_vector

[docs]    def pad_vector(vector, size):
        """Applies padding to a vector.

        You can use this function to scale a vector to a given size.
        The empty space is filled with zeros.

        Example: Can be used to pad the Coulomb Vector.

        Args:
            matrix (2D np.array): Matrix to pad in a nested list format.
            size (int): The size to scale the matrix to.

        Returns:
            ndarray: The padded vector.
        """
        if isinstance(vector, list):
            vector = np.asarray(vector)

        # calculate the wished size
        size = size - vector.size

        # confirm the size isn't less than the default matrix size
        if size <= 0:
            m = vector
            warnings.warn(
                "Trying to reduce the matrix size, default matrix size has been returned!",
                Warning,
            )
        else:
            # pad the matrix
            m = np.pad(vector, (0, size))
        return m

[docs]    def normalize(coulomb_vector, phi=1, negative_dimensions=0, positive_dimensions=0):
        return CoulombMatrix.normalize(
            coulomb_vector,
            phi=phi,
            negative_dimensions=negative_dimensions,
            positive_dimensions=positive_dimensions)