Source code for matador.fingerprints.pdf

# coding: utf-8
# Distributed under the terms of the MIT License.

""" This submodule defines classes for computing, combining
and convolving pair distribution functions.

"""


from itertools import combinations_with_replacement
import itertools
import copy
from math import ceil, copysign
import time

import numpy as np
import numba

from matador.utils.cell_utils import frac2cart, cart2volume
from matador.utils.cell_utils import standardize_doc_cell
from matador.utils.chem_utils import get_stoich
from matador.fingerprints.fingerprint import Fingerprint, FingerprintFactory


[docs]class PDF(Fingerprint): """ This class implements the calculation and comparison of pair distribution functions. Attributes: r_space (ndarray) : 1-D array containing real space grid gr (ndarray): 1-D array containing total PDF dr (float): real-space grid spacing in Å rmax (float): extent of real-space grid in Å label (str): structure label elem_gr (dict): dict with pairs of element symbol keys, containing 1-D arrays of projected PDFs (if calculated) number_density (float): number density for renormalisation and comparison with other PDFs kwargs (dict): arguments used to create PDF """ def __init__(self, doc, lazy=False, **kwargs): """ Initialise parameters and run PDF (unless lazy=True). Parameters: doc (dict) : matador document to calculate PDF of Keyword Arguments: dr (float) : bin width for PDF (Angstrom) (DEFAULT: 0.01) gaussian_width (float) : width of Gaussian smearing (Angstrom) (DEFAULT: 0.01) num_images (int/str) : number of unit cell images include in PDF calculation (DEFAULT: 'auto') max_num_images (int) : cutoff number of unit cells before crashing (DEFAULT: 50) rmax (float) : maximum distance cutoff for PDF (Angstrom) (DEFAULT: 15) projected (bool) : optionally calculate the element-projected PDF standardize (bool) : standardize cell before calculating PDF lazy (bool) : if True, calculator is not called when initializing PDF object timing (bool) : if True, print the total time taken to calculate the PDF """ prop_defaults = {'dr': 0.01, 'gaussian_width': 0.1, 'rmax': 15, 'num_images': 'auto', 'style': 'smear', 'debug': False, 'timing': False, 'low_mem': False, 'projected': True, 'max_num_images': 50, 'standardize': True} # read and store kwargs self.kwargs = prop_defaults self.kwargs.update({key: kwargs[key] for key in kwargs if kwargs[key] is not None}) # useful data for labelling self.spg = None structure = copy.deepcopy(doc) if self.kwargs.get('standardize'): structure = standardize_doc_cell(structure) self.spg = structure['space_group'] self.stoichiometry = structure.get('stoichiometry', get_stoich(structure['atom_types'])) # private variables self._num_images = self.kwargs.get('num_images') self._lattice = np.asarray(structure['lattice_cart']) self._poscart = np.asarray(frac2cart(structure['lattice_cart'], structure['positions_frac'])).reshape(-1, 3) self._types = structure['atom_types'] self._num_atoms = len(self._poscart) self._volume = cart2volume(self._lattice) self._image_vec = None # public variables self.rmax = self.kwargs.get('rmax') self.number_density = self._num_atoms / self._volume self.dr = self.kwargs.get('dr') self.r_space = None self.gr = None self.elem_gr = None self.label = None if self.kwargs.get('label'): self.label = self.kwargs["label"] elif 'text_id' in structure: self.label = ' '.join(structure['text_id']) if not lazy: if self.kwargs.get('timing'): start = time.time() self.calc_pdf() if self.kwargs.get('timing'): end = time.time() print('PDF calculated in {:.3f} s'.format(end - start))
[docs] def calc_pdf(self): """ Wrapper to calculate PDF with current settings. """ if self._image_vec is None: if self.kwargs.get('debug'): start = time.time() self._set_image_trans_vectors() if self.kwargs.get('debug'): end = time.time() print('Image vectors length = {}'.format(len(self._image_vec))) print('Set image trans vectors in {} s'.format(end - start)) if self.kwargs.get('projected'): if self.kwargs.get('debug'): start = time.time() if self.elem_gr is None: self._calc_projected_pdf() if self.kwargs.get('debug'): end = time.time() print('Calculated projected PDF {} s'.format(end - start)) if self.gr is None: self._calc_unprojected_pdf()
[docs] def calculate(self): """ Alias for `self.calc_pdf`. """ self.calc_pdf()
def _calc_distances(self, poscart, poscart_b=None): """ Calculate PBC distances with cdist. Parameters: poscart (numpy.ndarray): array of absolute atomic coordinates. Keyword Arguments: poscart_b (numpy.ndarray): absolute positions of a second type of atoms, where only A-B distances will be calculated. Returns: numpy.ndarray: pair d_ij matrix with values > rmax < 1e-12 removed. """ from matador.utils.cell_utils import calc_pairwise_distances_pbc return calc_pairwise_distances_pbc(poscart, self._image_vec, self._lattice, self.rmax, filter_zero=True, compress=True, poscart_b=poscart_b, debug=self.kwargs.get('debug')) def _calc_unprojected_pdf(self): """ Wrapper function to calculate distances and output a broadened and normalised PDF. Sets self.gr and self.r_space to G(r) and r respectively. """ if self.elem_gr is not None: self._calc_unprojected_pdf_from_projected() else: distances = self._calc_distances(self._poscart) self.r_space = np.arange(0, self.rmax + self.dr, self.dr) self.gr = self._get_broadened_normalised_pdf(distances, style=self.kwargs.get('style'), gaussian_width=self.kwargs.get('gaussian_width')) def _calc_projected_pdf(self): """ Calculate broadened and normalised element-projected PDF of a matador document. Sets self.elem_gr of e.g. Li2Zn3 to { ('Li', 'Li'): G_{Li-Li}(r), ('Li', 'Zn'): G_{Li-Zn}(r), ('Zn', 'Zn'): G_{Zn-Zn}(r) } """ # initalise dict of element pairs with correct keys style = self.kwargs.get('style') gw = self.kwargs.get('gaussian_width') self.r_space = np.arange(0, self.rmax + self.dr, self.dr) elem_gr = dict() for comb in combinations_with_replacement(set(self._types), 2): elem_gr[tuple(set(comb))] = np.zeros_like(self.r_space) for elem_type in elem_gr: poscart = [self._poscart[i] for i in range(len(self._poscart)) if self._types[i] == elem_type[0]] poscart_b = ([self._poscart[i] for i in range(len(self._poscart)) if self._types[i] == elem_type[1]] if len(elem_type) == 2 else None) distances = self._calc_distances(poscart, poscart_b=poscart_b) elem_gr[elem_type] = (len(elem_type) * self._get_broadened_normalised_pdf(distances, style=style, gaussian_width=gw)) self.elem_gr = elem_gr def _calc_unprojected_pdf_from_projected(self): """" Reconstruct full PDF from projected. """ self.gr = np.zeros_like(self.r_space) for key in self.elem_gr: self.gr += self.elem_gr[key] @staticmethod @numba.njit def _normalize_gr(gr, r_space, dr, num_atoms, number_density): """ Normalise a broadened PDF, ignoring the Gaussian magnitude. """ norm = 4 * np.pi * (r_space + dr)**2 * dr * num_atoms * number_density return np.divide(gr, norm) @staticmethod @numba.njit def _dist_hist(distances, r_space, dr): """ Bin the pair-wise distances according to the radial grid. Parameters: distances (numpy.ndarray): array of pair-wise distances. r_space (numpy.ndarray): radial grid dr (float): bin width. """ hist = np.zeros_like(r_space) for dij in distances: hist[ceil(dij / dr)] += 1 return hist def _get_broadened_normalised_pdf(self, distances, style='smear', gaussian_width=0.1): """ Broaden the values provided as distances and return G(r) and r_space of the normalised PDF. Parameters: distances (numpy.ndarray): distances used to calculate PDF Keyword arguments: style (str): either 'smear' or 'histogram' gaussian_width (float): smearing width in Angstrom^1/2 Returns: gr (np.ndarray): G(r), the PDF of supplied distances """ if style == 'histogram' or gaussian_width == 0: gr = self._dist_hist(distances, self.r_space, self.dr) else: # otherwise do normal smearing hist = self._dist_hist(distances, self.r_space, self.dr) gr = self._broadening_unrolled(hist, self.r_space, gaussian_width) gr = self._normalize_gr(gr, self.r_space, self.dr, self._num_atoms, self.number_density) return gr @staticmethod def _get_image_trans_vectors_auto(lattice, rmax, dr, max_num_images=50): """ Finds all "images" (integer 3-tuples, supercells) that have atoms within rmax + dr + longest LV of the parent lattice. Parameters: lattice (list): list of lattice vectors. rmax (float): maximum radial distance. dr (float): PDF bin width. Keyword arguments: max_num_images (int): the greatest integer multiple of LVs to search out to. Returns: list: list of int 3-tuples of cells, up to rmax or if max_num_images is exceeded, just up to 1 cell away. """ image_vec = set() # find longest combination of single LV's max_trans = 0 _lattice = np.asarray(lattice) products = list(itertools.product(range(-1, 2), repeat=3)) for prod in products: trans = prod @ _lattice length = np.sqrt(np.sum(trans**2)) if length > max_trans: max_trans = length unit_vector_lengths = np.sqrt(np.sum(_lattice**2, axis=1)) limits = [int((dr + rmax + max_trans) / length) for length in unit_vector_lengths] for ind, limit in enumerate(limits): if abs(limit) > max_num_images: limits[ind] = int(copysign(max_num_images, limit)) products = itertools.product(*(range(-lim, lim+1) for lim in limits)) for prod in products: trans = prod @ _lattice length = np.sqrt(np.sum(trans**2)) if length <= rmax + dr + max_trans: image_vec.add(prod) return image_vec def _set_image_trans_vectors(self): """ Sets self._image_vec to a list/generator of image translation vectors, based on self._num_images. If self._num_images is an integer, create all 3-member integer combinations up to the value. If self._num_images is 'auto', create all translation vectors up to length self.rmax. e.g. self._image_vec = [[1, 0, 1], [0, 1, 1], [1, 1, 1]]. """ if self._num_images == 'auto': self._image_vec = self._get_image_trans_vectors_auto( self._lattice, self.rmax, self.dr, max_num_images=self.kwargs.get('max_num_images') ) else: self._image_vec = list(itertools.product(range(-self._num_images, self._num_images + 1), repeat=3))
[docs] def get_sim_distance(self, pdf_b, projected=False): """ Return the similarity between two PDFs. """ return PDFOverlap(self, pdf_b, projected=projected).similarity_distance
[docs] def pdf(self): """ Return G(r) and the r_space for easy plotting. """ try: return (self.r_space, self.gr) except AttributeError: return (None, None)
[docs] def plot_projected_pdf(self, **kwargs): """ Plot projected PDFs. Keyword arguments: keys (list): plot only a subset of projections, e.g. [('K', )]. other_pdfs (list of PDF): other PDFs to plot. """ from matador.plotting.pdf_plotting import plot_projected_pdf plot_projected_pdf(self, **kwargs)
[docs] def plot(self, **kwargs): """ Plot PDFs. Keyword arguments: other_pdfs (list of PDF): other PDFs to add to the plot. """ from matador.plotting.pdf_plotting import plot_pdf plot_pdf(self, **kwargs)
[docs]class PDFFactory(FingerprintFactory): """ This class computes PDF objects from a list of structures, as concurrently as possible. The PDFs are stored under the `pdf` key inside each structure dict. Attributes: nprocs (int): number of concurrent processes. """ default_key = 'pdf' fingerprint = PDF
[docs]class PDFOverlap: """ Calculate the PDFOverlap between two PDF objects, pdf_a and pdf_b, with number density rescaling. Attributes: pdf_a (PDF): first PDF to compare. pdf_b (PDF): second PDF to compare. fine_dr (float): fine grid scale on which to compare. similarity_distance (float): "distance" between PDFs. overlap_int (float): the value of the overlap integral. """ def __init__(self, pdf_a, pdf_b, projected=False): """ Perform the overlap and similarity distance calculations. Parameters: pdf_a (PDF): first PDF to compare. pdf_b (PDF): second PDF to compare. Keyword arguments: projected : if True, attempt to use projected PDFs. """ self.pdf_a = pdf_a self.pdf_b = pdf_b self.fine_dr = self.pdf_a.dr / 2.0 # initialise with large number self.similarity_distance = 1e10 self.overlap_int = 0 if projected: if isinstance(pdf_a.elem_gr, dict) and isinstance(pdf_b.elem_gr, dict): self.projected_pdf_overlap() else: print('Projected PDFs missing, continuing with total.') self.pdf_overlap() else: self.pdf_overlap()
[docs] def pdf_overlap(self): """ Calculate the overlap of two PDFs via a simple meshed sum of their difference. """ self.overlap_int = 0 self.similarity_distance = 1e10 self.fine_space = np.arange(0, self.pdf_a.rmax, self.fine_dr) self.fine_gr_a = np.interp(self.fine_space, self.pdf_a.r_space, self.pdf_a.gr) self.fine_gr_b = np.interp(self.fine_space, self.pdf_b.r_space, self.pdf_b.gr) # scaling factor here is normalising to number density density_rescaling_factor = pow(self.pdf_b.number_density / (self.pdf_a.number_density), 1 / 3) rescale_factor = density_rescaling_factor self.fine_gr_a = np.interp(self.fine_space, rescale_factor * self.fine_space, self.fine_gr_a) self.fine_gr_a = self.fine_gr_a[:int(len(self.fine_space) * 0.75)] self.fine_gr_b = self.fine_gr_b[:int(len(self.fine_space) * 0.75)] self.fine_space = self.fine_space[:int(len(self.fine_space) * 0.75)] overlap_fn = self.fine_gr_a - self.fine_gr_b worst_case_overlap_int = np.trapz(np.abs(self.fine_gr_a), dx=self.pdf_a.dr/2.0) + \ np.trapz(np.abs(self.fine_gr_b), dx=self.pdf_b.dr/2.0) self.overlap_int = np.trapz(np.abs(overlap_fn), dx=self.pdf_a.dr / 2.0) self.similarity_distance = self.overlap_int / worst_case_overlap_int self.overlap_fn = overlap_fn
[docs] def projected_pdf_overlap(self): """ Calculate the overlap of two projected PDFs via a simple meshed sum of their difference. """ self.fine_space = np.arange(0, self.pdf_a.rmax, self.fine_dr) self.overlap_int = 0 self.similarity_distance = 1e10 elems = set(key for key in self.pdf_a.elem_gr) if elems != set(key for key in self.pdf_b.elem_gr): for key in self.pdf_b.elem_gr: elems.add(key) # pad out missing elements with zero PDFs for key in elems: if key not in self.pdf_a.elem_gr: self.pdf_a.elem_gr[key] = np.zeros_like(self.pdf_a.r_space) if key not in self.pdf_b.elem_gr: self.pdf_b.elem_gr[key] = np.zeros_like(self.pdf_b.r_space) self.fine_elem_gr_a, self.fine_elem_gr_b = dict(), dict() for key in elems: self.fine_elem_gr_a[key] = np.interp(self.fine_space, self.pdf_a.r_space, self.pdf_a.elem_gr[key]) self.fine_elem_gr_b[key] = np.interp(self.fine_space, self.pdf_b.r_space, self.pdf_b.elem_gr[key]) # scaling factor here is normalising to number density density_rescaling_factor = pow((self.pdf_b.number_density) / (self.pdf_a.number_density), 1 / 3) rescale_factor = density_rescaling_factor for key in elems: self.fine_elem_gr_a[key] = ( np.interp(self.fine_space, rescale_factor * self.fine_space, self.fine_elem_gr_a[key]) ) for key in elems: self.fine_elem_gr_a[key] = self.fine_elem_gr_a[key][:int(len(self.fine_space) * 0.75)] self.fine_elem_gr_b[key] = self.fine_elem_gr_b[key][:int(len(self.fine_space) * 0.75)] self.fine_space = self.fine_space[:int(len(self.fine_space) * 0.75)] for key in elems: overlap_fn = self.fine_elem_gr_a[key] - self.fine_elem_gr_b[key] worst_case_a = np.trapz(np.abs(self.fine_elem_gr_a[key]), dx=self.pdf_a.dr / 2.0) worst_case_b = np.trapz(np.abs(self.fine_elem_gr_b[key]), dx=self.pdf_b.dr / 2.0) worst_case_overlap = worst_case_a + worst_case_b overlap = np.trapz(np.abs(overlap_fn), dx=self.pdf_a.dr / 2.0) self.overlap_int += overlap / worst_case_overlap self.similarity_distance = self.overlap_int / len(elems)
[docs] def plot_diff(self): """ Simple plot for comparing two PDFs. """ from matador.plotting.pdf_plotting import plot_diff_overlap plot_diff_overlap(self)
[docs] def plot_projected_diff(self): """ Simple plot for comparing two projected PDFs. """ from matador.plotting.pdf_plotting import plot_projected_diff_overlap plot_projected_diff_overlap(self)
[docs]class CombinedProjectedPDF: """ Take some computed PDFs and add them together. """ def __init__(self, pdf_cursor): """ Create CombinedPDF object from list of PDFs. Parameters: pdf_cursor (:obj:`list` of :obj:`PDF`): list of PDF objects to combine. """ self.dr = min([pdf.dr for pdf in pdf_cursor]) self.rmax = min([pdf.rmax for pdf in pdf_cursor]) self.r_space = np.arange(0, self.rmax + self.dr, self.dr) self.label = 'Combined PDF' if any([not pdf.elem_gr for pdf in pdf_cursor]): raise RuntimeError('Projected PDFs not found.') keys = {key for pdf in pdf_cursor for key in pdf.elem_gr} self.elem_gr = {key: np.zeros_like(self.r_space) for key in keys} for pdf in pdf_cursor: for key in pdf.elem_gr: self.elem_gr[key] += np.interp(self.r_space, pdf.r_space, pdf.elem_gr[key])
[docs] def plot_projected_pdf(self): """ Plot the combined PDF. """ from matador.plotting.pdf_plotting import plot_projected_pdf plot_projected_pdf(self)