Source code for qubit.descriptors

import warnings
import math
import numpy as np
from qubit.data import atomnumber
import random


[docs]class Descriptor:
    """Parent class for all descriptors
    """
    
[docs]    def generate(self):
        """Placeholder

        Raises:
            NotImplementedError
        """
        raise NotImplementedError
    
[docs]    def normalize(self):
        """Placeholder

        Raises:
            NotImplementedError
        """
        raise NotImplementedError

[docs]class CoulombVector(Descriptor):
    def randomize(coulomb_vector):
        random.shuffle(coulomb_vector)
        return coulomb_vector
    
[docs]    def normalize(self, coulomb_vector, phi=1, slope=0.7, negative_dimensions=0, positive_dimension=0):

        tensors = []
        cv = np.array(coulomb_vector)

        # generate negative layers
        for i in range(negative_dimensions):
            tensor = np.empty(len(cv))
            for i, x in enumerate(coulomb_vector):
                tensor[i] = round((1/2)+((1/2)*math.tanh(((x-(i*phi))/phi)*slope)))
            tensors.append(tensor)

        # generate base layer
        tensor = np.empty(len(cv))
        for i, x in enumerate(coulomb_vector):
            tensor[i] = round((1/2)+((1/2)*math.tanh((x/phi)*slope)))
        tensors.append(tensor)

        # generate negapositive layers
        for i in range(positive_dimension):
            tensor = np.empty(len(cv))
            for i, x in enumerate(coulomb_vector):
                tensor[i] = round((1/2)+((1/2)*math.tanh(((x+(i*phi))/phi)*slope)))
            tensors.append(tensor)
        return np.array(tensors)
    
    def pad_vector(vector, size):
        
        if isinstance(vector, list):
            vector = np.asarray(vector)
            
        # calculate the wished size
        size = size - vector.size
        
        # confirm the size isn't less than the default matrix size
        if size <= 0:
            m = vector
            warnings.warn(
                "Trying to reduce the matrix size, default matrix size has been returned!",
                Warning,
            )
        else:
            # pad the matrix
            m = np.pad(vector, (0, size))
        return m

[docs]class CoulombMatrix(Descriptor):
    """Provides functionality to generate the Coulomb Matrix (1).

    (1) Montavon, G.; Hansen, K.; Fazli, S.; Rupp, M.; Biegler, F.; Ziehe, A.;
    Tkatchenko, A.; Lilienfeld, A.; Müller, K.-R.
    Learning Invariant Representations of Molecules for Atomization Energy Prediction.
    In Advances in Neural Information Processing Systems;
    Pereira, F., Burges, C. J. C., Bottou, L., Weinberger, K. Q., Eds.;
    Curran Associates, Inc., 2012; Vol. 25.
    """

[docs]    def generate(self, atoms, xyz, randomize=False):
        """Generates the Coulomb Matrix (1).

        (1) Montavon, G.; Hansen, K.; Fazli, S.; Rupp, M.; Biegler, F.; Ziehe, A.;
        Tkatchenko, A.; Lilienfeld, A.; Müller, K.-R.
        Learning Invariant Representations of Molecules for Atomization Energy Prediction.
        In Advances in Neural Information Processing Systems;
        Pereira, F., Burges, C. J. C., Bottou, L., Weinberger, K. Q., Eds.;
        Curran Associates, Inc., 2012; Vol. 25.

        Args:
            atoms (list): A list of atoms.
            xyz (2D list): A list of 3D coordinates.

        Returns:
            2D list: The Coulomb Matrix.
        """
        # determine the lenght of the molecule and atomnumbers
        n = len(atoms)

        if type(atoms[0]) == str:
            z = [atomnumber[atom] for atom in atoms]
        else:
            z = atoms

        # create an empty matrix
        cm = np.zeros((n, n))

        # calculate the values, populate the array and return the coulomb matrix
        for i in range(n):
            for j in range(n):
                if i == j:
                    cm[i][j] = 0.5 * z[i] ** 2.4
                elif i < j:
                    cm[i][j] = (
                        z[i] * z[j] /
                        (np.linalg.norm(np.array(xyz[i]) - np.array(xyz[j])))
                    )
                    cm[j][i] = cm[i][j]

        if randomize:
            return self.randomize(cm)
        else:
            return cm

[docs]    def randomize(coulomb_matrix):
        """Randomizes the Coulomb Matrix as described in (1).

        (1) Montavon, G.; Hansen, K.; Fazli, S.; Rupp, M.; Biegler, F.; Ziehe, A.;
        Tkatchenko, A.; Lilienfeld, A.; Müller, K.-R.
        Learning Invariant Representations of Molecules for Atomization Energy Prediction.
        In Advances in Neural Information Processing Systems;
        Pereira, F., Burges, C. J. C., Bottou, L., Weinberger, K. Q., Eds.;
        Curran Associates, Inc., 2012; Vol. 25.

        Args:
            coulomb_matrix (2D list): The Coulomb Matrix as generated in :func:`~Qubit.descriptors.CoulombMatrix.generate`

        Returns:
            2D list: The randomized Coulomb Matrix.
        """
        # calculate the row normals of a coulomb matrix
        row_norms = np.array([np.linalg.norm(row)
                              for row in coulomb_matrix], dtype=float)

        # draw random numbers from a normal distribution
        rand = np.random.RandomState()
        n = rand.normal(size=row_norms.size)

        # calcualte the permutation
        p = np.argsort(row_norms + n)

        # Permute row wise then coulomn wise
        return coulomb_matrix[p][:, p]

[docs]    def pad_matrix(matrix, size):
        """Applies padding to a matrix.

        You can use this function to scale a matrix to a given size.
        The empty space is filled with zeros.

        Example: Can be used to pad the Coulomb Matrix.

        Args:
            matrix (2D np.array): Matrix to pad in a nested list format.
            size (int): The size to scale the matrix to.

        Returns:
            ndarray: The padded matrix.
        """
        if isinstance(matrix, list):
            matrix = np.array(list)

        # calculate the wished size
        size = size - matrix.shape[0]

        # confirm the size isn't less than the default matrix size
        if size <= 0:
            m = matrix
            warnings.warn(
                "Trying to reduce the matrix size, default matrix size has been returned!",
                Warning,
            )
        else:
            # pad the matrix
            m = np.pad(matrix, (0, size))
        return m

[docs]    def normalize(self, coulomb_matrix, phi=1, slope=0.7, negative_dimensions=0, positive_dimension=0):
        """Normalizes the Coulomb Matrix by tensorizing it. May require padding.
        This method is an adaption from (1).

        (1) Montavon, G.; Hansen, K.; Fazli, S.; Rupp, M.; Biegler, F.; Ziehe, A.;
        Tkatchenko, A.; Lilienfeld, A.; Müller, K.-R.
        Learning Invariant Representations of Molecules for Atomization Energy Prediction.
        In Advances in Neural Information Processing Systems;
        Pereira, F., Burges, C. J. C., Bottou, L., Weinberger, K. Q., Eds.;
        Curran Associates, Inc., 2012; Vol. 25.


        Args:
            coulomb_matrix (2D list): The Coulomb Matrix.
            phi (int, optional): Equivalent to an offset. Defaults to 1.
            slope (float, optional): The slope of the binarization function. Defaults to 0.7.
            negative_dimensions (int, optional): The amount of negative dimensions describing the tensor. Defaults to 0.
            positive_dimension (int, optional): The amount of positive dimensions describing the tensor. Defaults to 0.

        Returns:
            2D list: Tensorized Coulomb Matrix.
        """
        tensors = []
        cm = np.array(coulomb_matrix)

        # generate negative layers
        for i in range(negative_dimensions):
            tensor = np.empty([cm.shape[0], cm.shape[1]])
            for iy, y in enumerate(coulomb_matrix):
                for ix, x in enumerate(y):
                    tensor[ix, iy] = int(round((
                        1/2)+((1/2)*math.tanh(((x-(i*phi))/phi)*slope)), 0))
            tensors.append(tensor)

        # generate base layer
        tensor = np.empty([cm.shape[0], cm.shape[1]])
        for iy, y in enumerate(coulomb_matrix):
            for ix, x in enumerate(y):
                tensor[ix, iy] = int(round((1/2)+((1/2)*math.tanh((x/phi)*slope)), 0))
        tensors.append(tensor)

        # generate negapositive layers
        for i in range(positive_dimension):
            tensor = np.empty([cm.shape[0], cm.shape[1]])
            for iy, y in enumerate(coulomb_matrix):
                for ix, x in enumerate(y):
                    tensor[ix, iy] = int(round((
                        1/2)+((1/2)*math.tanh(((x+(i*phi))/phi)*slope)), 0))
            tensors.append(tensor)
        return np.array(tensors)