Source code for matador.hull.phase_diagram

# coding: utf-8
# Distributed under the terms of the MIT License.

""" This submodule implements the base PhaseDiagram creator that interfaces
with QueryConvexHull and EnsembleHull.


from traceback import print_exc
import bisect

import scipy.spatial
import numpy as np

from matador.utils.hull_utils import (
from matador.utils.chem_utils import get_formula_from_stoich
from matador.utils.cursor_utils import (

EPS = 1e-12

[docs]class PhaseDiagram: """This class encapsulates the actual phase data, e.g. the actual energy and compositions found to be stable. Attributes: structures (numpy.ndarray): the array passed to init used to make the hull, with the first (num_species-1) columns containing normalised concentrations, and the final column containing formation energy. convex_hull (scipy.spatial.ConvexHull): the actual convex hull returned by SciPy. formation_key (list): index/key specification of formation energy per atom from top level of each document. """ def __init__(self, cursor, formation_key, dimension): """Compute the convex hull of data passed, to retrieve hull distances and thus stable structures. Parameters: cursor (list[dict]): list of matador documents to make phase diagram from. formation_key (str or list): location of the formation energy inside each document, either a single key or iterable of keys to use with `recursive_get`. """ self._dimension = dimension self.cursor = cursor self.formation_key = formation_key structures = np.hstack( ( get_array_from_cursor(cursor, "concentration").reshape( len(cursor), dimension - 1 ), get_array_from_cursor(cursor, self.formation_key).reshape( len(cursor), 1 ), ) ) # define self._structure_slice as the filtered array of points actually used to create the convex hull # which can include/exclude points from the passed structures. This array is the one indexed by # vertices/simplices in ConvexHull if self._dimension == 3: # add a point "above" the hull # for simple removal of extraneous vertices (e.g. top of 2D hull) dummy_point = [0.333, 0.333, 1e5] # if ternary, use all structures, not just those with negative eform for compatibility reasons self._structure_slice = np.vstack((structures, dummy_point)) else: # filter out those with positive formation energy, to reduce expense computing hull self._structure_slice = structures[np.where(structures[:, -1] <= 0 + EPS)] # filter out "duplicates" in _structure_slice # this prevents breakages if no structures are on the hull and chempots are duplicated # but it might be faster to hardcode this case individually self._structure_slice = np.unique(self._structure_slice, axis=0) # if we only have the chempots (or worse) with negative formation energy, don't even make the hull if len(self._structure_slice) <= dimension: if len(self._structure_slice) < dimension: raise RuntimeError( "No chemical potentials on hull... either mysterious use of custom chempots, or worry!" ) self.convex_hull = FakeHull() else: try: self.convex_hull = scipy.spatial.ConvexHull(self._structure_slice) except scipy.spatial.qhull.QhullError: print(self._structure_slice) print("Error with QHull, plotting formation energies only...") print_exc() self.convex_hull = FakeHull() # remove vertices that have positive formation energy filtered_vertices = [ vertex for vertex in self.convex_hull.vertices if self._structure_slice[vertex, -1] <= 0 + EPS ] bad_simplices = set() for ind, simplex in enumerate(self.convex_hull.simplices): for vertex in simplex: if vertex not in filtered_vertices: bad_simplices.add(ind) filtered_simplices = [ simplex for ind, simplex in enumerate(self.convex_hull.simplices) if ind not in bad_simplices ] self.convex_hull = FakeHull() self.convex_hull.points = self._structure_slice self.convex_hull.vertices = list(filtered_vertices) self.convex_hull.simplices = list(filtered_simplices) self.hull_dist = self.get_hull_distances(structures, precompute=True) set_cursor_from_array(self.cursor, self.hull_dist, "hull_distance") self.structures = structures self.stable_structures = [ doc for doc in self.cursor if doc["hull_distance"] < EPS ] def __str__(self): """Print underlying phase diagram.""" return display_results( self.cursor, hull=True, colour=False, energy_key=self.formation_key, sort=False, return_str=True, )
[docs] def get_hull_distances(self, structures, precompute=False, **kwargs): """Returns array of distances to pre-computed binary or ternary hull, from array containing concentrations and energies. Parameters: structures (numpy.ndarray): N x n array of concentrations and enthalpies for N structures, with up to 2 columns of concentrations and the last column containing the structure's formation enthalpy. Keyword arguments: precompute (bool): whether or not to bootstrap hull distances from previously computed values at the same stoichiometry. Returns: numpy.ndarray: N-dim array storing distances to the hull for N structures, """ if precompute: # dict with formula keys, containing tuple of pre-computed enthalpy/atom and hull distance cached_formula_dists = dict() cache_hits = 0 cache_misses = 0 if isinstance(structures, list): structures = np.asarray(structures) # if only chem pots on hull, dist = energy if len(self._structure_slice) == self._dimension: hull_dist = np.ones((len(structures))) hull_dist = structures[:, -1] # if binary hull, do binary search elif self._dimension == 2: tie_line_comp = self._structure_slice[self.convex_hull.vertices, 0] tie_line_energy = self._structure_slice[self.convex_hull.vertices, -1] tie_line_comp = np.asarray(tie_line_comp) tie_line_energy = tie_line_energy[np.argsort(tie_line_comp)] tie_line_comp = tie_line_comp[np.argsort(tie_line_comp)] hull_dist = np.empty((len(structures))) hull_dist.fill(np.nan) if precompute: for ind, _ in enumerate(structures): formula = get_formula_from_stoich( self.cursor[ind]["stoichiometry"], sort=True, tex=False ) if formula in cached_formula_dists: hull_dist[ind] = ( structures[ind, -1] - cached_formula_dists[formula][0] + cached_formula_dists[formula][1] ) cache_hits += 1 else: i = bisect.bisect_left(tie_line_comp, structures[ind, 0]) gradient, intercept = vertices2line( [ [tie_line_comp[i - 1], tie_line_energy[i - 1]], [tie_line_comp[i], tie_line_energy[i]], ] ) # calculate hull_dist hull_dist[ind] = structures[ind, -1] - ( gradient * structures[ind, 0] + intercept ) cached_formula_dists[formula] = ( structures[ind, -1], hull_dist[ind], ) cache_misses += 1 else: for ind, _ in enumerate(structures): i = bisect.bisect_left(tie_line_comp, structures[ind, 0]) gradient, intercept = vertices2line( [ [tie_line_comp[i - 1], tie_line_energy[i - 1]], [tie_line_comp[i], tie_line_energy[i]], ] ) # calculate hull_dist hull_dist[ind] = structures[ind, -1] - ( gradient * structures[ind, 0] + intercept ) # if ternary, use barycentric coords elif self._dimension == 3: # loop through structures and find which plane they correspond to # using barycentric coordinates, if a formula has already been # computed then calculate delta relative to that and skip self.convex_hull.planes = [ [self._structure_slice[vertex] for vertex in simplex] for simplex in self.convex_hull.simplices ] structures_finished = [False] * len(structures) hull_dist = np.empty(len(structures)) hull_dist.fill(np.nan) cart_planes_inv = [] planes_height_fn = [] for ind, plane in enumerate(self.convex_hull.planes): cart_planes = barycentric2cart(plane).T cart_planes[-1, :] = 1 # if projection of triangle in 2D is a line, do binary search if np.linalg.det(cart_planes) == 0: cart_planes_inv.append(None) planes_height_fn.append(None) else: cart_planes_inv.append(np.linalg.inv(cart_planes)) planes_height_fn.append(vertices2plane(plane)) for idx, structure in enumerate(structures): for ind, plane in enumerate(self.convex_hull.planes): if cart_planes_inv[ind] is None: continue if ( precompute and get_formula_from_stoich( self.cursor[idx]["stoichiometry"], sort=True, tex=False ) in cached_formula_dists ): formula = get_formula_from_stoich( self.cursor[idx]["stoichiometry"], sort=True, tex=False ) if formula in cached_formula_dists: cache_hits += 1 hull_dist[idx] = ( structures[idx, -1] - cached_formula_dists[formula][0] + cached_formula_dists[formula][1] ) structures_finished[idx] = True elif is_point_in_triangle( structure, cart_planes_inv[ind], preprocessed_triangle=True ): structures_finished[idx] = True hull_dist[idx] = planes_height_fn[ind](structure) if precompute: cached_formula_dists[ get_formula_from_stoich( self.cursor[idx]["stoichiometry"], sort=True, tex=False, ) ] = (structure[-1], hull_dist[idx]) cache_misses += 1 break # mask values very close to 0 with 0 hull_dist[np.where(np.abs(hull_dist) < EPS)] = 0 failed_structures = [] for ind, structure in enumerate(structures_finished): if not structure: failed_structures.append(ind) if failed_structures: raise RuntimeError( "There were issues calculating the hull distance for {} structures.".format( len(failed_structures) ) ) # otherwise, set to zero until proper N-d distance can be implemented else: raise NotImplementedError( "Unable to compute {dimension}-dimensional hull distances (yet) " "consider breaking your phase diagram into a pseudo-ternary or pseudo-binary system." ) if np.isnan(hull_dist).any(): raise RuntimeError( f"Some hull distances failed, found NaNs at {np.isnan(hull_dist, where=True)}" ) return hull_dist