Source code for matador.utils.chem_utils

# coding: utf-8
# Distributed under the terms of the MIT License.

""" This submodule defines some useful chemical functions and
constants, with a focus on battery materials.

"""


import copy
import warnings

import numpy as np
from matador.data.constants import * # noqa

EPS = 1e-8


[docs]def get_iupac_ordering(): """ Stub for implementing IUPAC chemical ordering in formulae. """ raise NotImplementedError
[docs]def get_periodic_table(): """ Return some periodic table macros. """ periodic_table = dict() periodic_table['I'] = ['Li', 'Na', 'K', 'Rb', 'Cs', 'Fr'] periodic_table['II'] = ['Be', 'Mg', 'Ca', 'Sr', 'Ba', 'Ra'] periodic_table['III'] = ['B', 'Al', 'Ga', 'In', 'Tl'] periodic_table['IV'] = ['C', 'Si', 'Ge', 'Sn', 'Pb'] periodic_table['V'] = ['N', 'P', 'As', 'Sb', 'Bi'] periodic_table['VI'] = ['O', 'S', 'Se', 'Te', 'Po'] periodic_table['VII'] = ['F', 'Cl', 'Br', 'I', 'At'] periodic_table['Tran'] = ['Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg'] periodic_table['Lan'] = ['La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu'] periodic_table['Act'] = ['Ac', 'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr'] periodic_table['X'] = [elem for group in periodic_table for elem in periodic_table[group]] return periodic_table
[docs]def get_molar_mass(elem): """ Returns molar mass of chosen element. """ import periodictable return periodictable.elements.symbol(elem).mass
[docs]def get_atomic_number(elem): """ Returns atomic number of chosen element. """ import periodictable return periodictable.elements.symbol(elem).number
[docs]def get_atomic_symbol(atomic_number): """ Returns elemental symbol from atomic number. """ import periodictable return periodictable.elements[atomic_number].symbol
[docs]def get_concentration(doc, elements, include_end=False): """ Returns x for A_x B_{1-x} or x,y for A_x B_y C_z, (x+y+z=1). Parameters: doc (list/dict): structure to evaluate OR matador-style stoichiometry. elements (list): list of element symbols to enforce ordering. Keyword arguments: include_end (bool): whether or not to return the final value, i.e. [x, y, z] rather than [x, y] in the above. Returns: list of float: concentrations of elements in given order. """ if isinstance(doc, dict): if doc.get('stoichiometry') is None: raise RuntimeError('No stoichiometry found.') stoich = doc['stoichiometry'] else: stoich = doc concs = [0.0] * (len(elements) - bool(not include_end)) for _, elem in enumerate(stoich): if (include_end and elem[0] in elements) or (not include_end and elem[0] in elements[:-1]): concs[elements.index(elem[0])] = elem[1] / float(get_atoms_per_fu(doc)) return concs
[docs]def get_num_intercalated(cursor): """ Return array of the number of intercalated atoms per host atom from a list of structures, of type defined by the first entry in the structures' concentration vectors. Parameters: cursor (list of dict): structures to evaluate. Returns: ndarray: number of intercalated ions in each structure. """ from .cursor_utils import get_array_from_cursor x = np.zeros((len(cursor))) comps = get_array_from_cursor(cursor, 'concentration') for idx, comp in enumerate(comps): if len(comp) > 1: comp = comp[0] if 1 - comp == 0: x[idx] = np.NaN else: x[idx] = comp / (1 - comp) return x
[docs]def get_binary_grav_capacities(x, m_B): """ Returns capacity in mAh/g from x/y in A_x B_y and m_B in a.m.u. """ x = np.array(x) if m_B != 0: return x * FARADAY_CONSTANT_Cpermol * Cperg_to_mAhperg / m_B return float('NaN')
[docs]def get_generic_grav_capacity(concs, elements): """ Returns gravimetric capacity of <elements[0]> in mAh/g of matador doc. """ tmp_concs = np.array(concs, copy=True) # if no Li, capacity = 0... # tmp_concs /= np.min(concs) x = tmp_concs[0] if x == 0: return 0.0 masses = dict() m_B = 0 for elem in elements: masses[elem] = get_molar_mass(elem) for ind, elem in enumerate(elements): if ind == 0: continue else: m_B += masses[elem] * tmp_concs[ind] Q = get_binary_grav_capacities(x, m_B) return Q
[docs]def get_binary_volumetric_capacity(initial_doc, final_doc): """ For initial (delithiated/sodiated) (single element) structure and final (maximally charged) binary structure, calculate the volumetric capacity. Parameters: initial_doc (dict): matador doc of delithiated phase final_doc (dict): matador doc of maximally lithiated phase Returns: volumetric_capacity (float): capacity in mAh/cm^3. """ assert len(initial_doc['stoichiometry']) == 1 assert len(final_doc['stoichiometry']) == 2 for _ion in final_doc['stoichiometry']: if _ion[0] != initial_doc['stoichiometry'][0][0]: ion = _ion[0] for species in final_doc['stoichiometry']: if species[0] == ion: num_ion = species[1] else: num_B = species[1] num_ions_per_initial_fu = num_ion / num_B volume_per_fu_cm3 = initial_doc['cell_volume'] * ANGSTROM_CUBED_TO_CENTIMETRE_CUBED / initial_doc['num_fu'] return (num_ions_per_initial_fu / volume_per_fu_cm3) * (ELECTRON_CHARGE * Cperg_to_mAhperg)
[docs]def get_atoms_per_fu(doc): """ Calculate and return the number of atoms per formula unit. Parameters: doc (list/dict): structure to evaluate OR matador-style stoichiometry. """ if 'stoichiometry' in doc: return sum([elem[1] for elem in doc['stoichiometry']]) return sum([elem[1] for elem in doc])
[docs]def get_formation_energy(chempots, doc, energy_key='enthalpy_per_atom'): """ From given chemical potentials, calculate the simplest formation energy per atom of the desired document. Note: recursive_get(doc, energy_key) MUST return an energy per atom for the target doc and the chemical potentials. Parameters: chempots (list of dict): list of chempot structures, must be unique. doc (dict): structure to evaluate. Keyword arguments: energy_key (str or list): name of energy field to use to calculate formation energy. Can use a list of keys/subkeys/indices to query nested dicts with `matador.utils.cursor_utils.recursive_get`. Returns: float: formation energy per atom. """ from matador.utils.cursor_utils import recursive_get # warn user if per_atom energy is not found if isinstance(energy_key, (list, tuple)) and not any(['per_atom' in str(key) for key in energy_key]) \ or (isinstance(energy_key, str) and 'per_atom' not in energy_key): warnings.warn('Requested energy key {} in get_formation_energy may' ' not be per atom, if so results will be incorrect.'.format(energy_key)) try: formation = recursive_get(doc, energy_key) except KeyError as exc: print('Doc {} missing key {}'.format(doc['source'], energy_key)) raise exc # see if num chempots has been set and try to reuse it if 'num_chempots' in doc: num_chempots = doc['num_chempots'] else: num_chempots = get_number_of_chempots(doc, chempots) num_atoms_per_fu = get_atoms_per_fu(doc) for ind, mu in enumerate(chempots): num_atoms_per_mu = get_atoms_per_fu(mu) try: mu_energy = recursive_get(mu, energy_key) except KeyError as exc: raise exc('Chemical potential {} missing key {}'.format(mu['source'], energy_key)) formation -= mu_energy * num_chempots[ind] * num_atoms_per_mu / num_atoms_per_fu return formation
[docs]def get_number_of_chempots(stoich, chempot_stoichs, precision=5): """ Return the required number of each (arbitrary) chemical potentials to construct one formula unit of the input stoichiometry. Uses least-squares as implemented by `numpy.linalg.lstsq` and rounds the output precision based on the `precision` kwarg. Parameters: stoich (list/dict): matador-style stoichiometry, e.g. [['Li', 3], ['P', 1]], or the full document. chempot_stoichs (list/dict): list of stoichiometries of the input chemical potentials, or the full documents. Keyword arguments: precision (int/None): number of decimal places to round answer to. None maintains the precision from `numpy.linalg.lstsq`. Returns: list: number of each chemical potential required to create 1 formula unit. Raises: RuntimeError: if the stoichiometry provided cannot be created with the given chemical potentials. """ # need to support dict, list and Crystal inputs try: stoich = stoich['stoichiometry'] except (TypeError, ValueError): pass try: chempot_stoichs = [mu['stoichiometry'] for mu in chempot_stoichs] except (TypeError, ValueError): pass # find all elements present in the chemical potentials elements = set() for mu in chempot_stoichs: for elem, _ in mu: elements.add(elem) elements = sorted(list(elements)) chempot_matrix = np.asarray([get_padded_composition(mu, elements) for mu in chempot_stoichs]) try: solution = np.asarray(get_padded_composition(stoich, elements)) except RuntimeError: raise RuntimeError('Stoichiometry {} could not be created from chemical potentials {}: missing chempot' .format(stoich, chempot_stoichs)) try: num_chempots, residuals, _, _ = np.linalg.lstsq(chempot_matrix.T, solution, rcond=None) except np.linalg.LinAlgError: raise RuntimeError('Stoichiometry {} could not be created from chemical potentials {}: numpy LinAlg error' .format(stoich, chempot_stoichs)) # check if lstsq actually found a "solution" if np.abs(np.sum(residuals)) > EPS: raise RuntimeError('Stoichiometry {} could not be created from chemical potentials {}' .format(stoich, chempot_stoichs)) # round output array based on user-specified precision if precision is not None: num_chempots[np.where(np.abs(num_chempots) < EPS)] = 0.0 for i, val in enumerate(num_chempots): if np.abs(val - round(val, precision)) < EPS: num_chempots[i] = round(val, precision) # check for sensible numbers in output if np.min(np.sign(num_chempots)) == -1: raise RuntimeError('Stoichiometry {} could not be created from chemical potentials {}' .format(stoich, chempot_stoichs)) return num_chempots.tolist()
[docs]def get_stoich(atom_types): """ Return integer stoichiometry from atom_types list. Parameters: atom_types (list): list of element symbols of each atom. Returns: list: matador-style stoichiometry, e.g. [['Li', 1], ['P', 2]]. """ from collections import defaultdict from math import gcd stoich = defaultdict(float) for atom in atom_types: if atom not in stoich: stoich[atom] = 0 stoich[atom] += 1 gcd_val = 0 for atom in atom_types: if gcd_val == 0: gcd_val = stoich[atom] else: gcd_val = gcd(stoich[atom], gcd_val) # convert stoichiometry to tuple for fryan temp_stoich = [] try: for key, value in stoich.items(): if float(value) / gcd_val % 1 != 0: temp_stoich.append([key, float(value) / gcd_val]) else: temp_stoich.append([key, value / gcd_val]) except AttributeError: for key, value in stoich.iteritems(): if float(value) / gcd_val % 1 != 0: temp_stoich.append([key, float(value) / gcd_val]) else: temp_stoich.append([key, value / gcd_val]) return sorted(temp_stoich)
[docs]def get_padded_composition(stoichiometry, elements): """ Return a list that contains how many of each species in elements exists in the given stoichiometry. e.g. for [['Li', 2], ['O', 1]] with elements ['O', 'Li', 'Ba'], this function will return [1, 2, 0]. Parameters: stoichiometry (list): matador-style stoichiometry, as above. elements (list): order of element labels to pick out. """ composition = [] for element in elements: if not isinstance(element, str): raise RuntimeError('Found invalid element symbol {}'.format(element)) for species in stoichiometry: if not isinstance(species, list): raise RuntimeError('Found invalid stoichiometry {}'.format(stoichiometry)) if species[0] == element: composition.append(species[1]) break elif species[0] not in elements: raise RuntimeError('Extra element {} in stoichiometry'.format(species[0])) else: composition.append(0) return composition
[docs]def get_ratios_from_stoichiometry(stoichiometry): """ Get a dictionary of pairwise atomic ratios. Parameters: stoichiometry (list): matador-style stoichiometry. Returns: dict: dictionary of pairwise ratios, e.g. for K8SnP4, ratio_dict = {'KSn': 8, 'KP': 2, 'SnP': 0.25, 'SnK': 0.125, 'PSn': 4, 'PK': 0.5}. """ ratio_dict = dict() for _, elem_i in enumerate(stoichiometry): for _, elem_j in enumerate(stoichiometry): if elem_j[0] != elem_i[0]: ratio_dict[elem_i[0]+elem_j[0]] = round(float(elem_i[1]) / elem_j[1], 3) return ratio_dict
[docs]def get_stoich_from_formula(formula: str, sort=True): """ Convert formula string, e.g. Li2TiP4 into a matador-style stoichiometry, e.g. [['Li', 2], ['Ti', 1], ['P', 4]]. Parameters: formula (str): chemical formula of compound Returns: list: sorted matador-style stoichiometry. """ from math import gcd import re parsed_elements = parse_element_string(formula, stoich=True) elements = [] fraction = [] for i, _ in enumerate(parsed_elements): if not bool(re.search(r'\d', parsed_elements[i])): elements.append(parsed_elements[i]) try: fraction.append(float(parsed_elements[i + 1])) except(ValueError, IndexError): fraction.append(1.0) gcd_val = 0 for frac in fraction: if gcd_val == 0: gcd_val = frac else: gcd_val = gcd(int(frac), int(gcd_val)) fraction = np.asarray(fraction) fraction /= gcd_val stoich = [[elements[ind], fraction[ind]] for ind, _ in enumerate(elements)] if sort: return sorted(stoich) return stoich
[docs]def parse_element_string(elements_str, stoich=False): """ Parse element query string with macros. Has to parse braces too, and throw an error if brackets are unmatched. e.g. Parameters: '[VII][Fe,Ru,Os][I]' Returns: ['[VII]', '[Fe,Ru,Os]', '[I]'] e.g.2 Parameters: '[VII]2[Fe,Ru,Os][I]' Returns: ['[VII]2', '[Fe,Ru,Os]', '[I]'] Parameters: elements_str: str, chemical formula, including macros. Keyword arguments: stoich: bool, parse as a stoichiometry, i.e. check for numbers Raises: RuntimeError: if the composition contains unmatched brackets. Returns: list: split list of elements contained in input """ import re valid = False for char in elements_str: if char not in ['[', ']', '{', '}', ',', ':'] and not char.isalnum(): raise RuntimeError('Illegal character {} detected in query.'.format(char)) valid = False for char in elements_str: if char.isupper(): valid = True break if not valid: raise RuntimeError('Composition must contain at least one upper case character.') elements = [elem for elem in re.split(r'([A-Z][a-z]*)', elements_str) if elem] if stoich: tmp_stoich = elements for ind, strng in enumerate(elements): if not any(char.isdigit() for char in strng): tmp_stoich[ind] = [strng] else: tmp_stoich[ind] = [elem for elem in re.split(r'([0-9]+)', strng) if elem] elements = [item for sublist in tmp_stoich for item in sublist] # split macros while '[' in elements or '{' in elements or '][' in elements or '}{' in elements or ']{' in elements or '}[' in elements: tmp_stoich = list(elements) cleaned = True while cleaned: for ind, tmp in enumerate(tmp_stoich): if tmp == '][': del tmp_stoich[ind] tmp_stoich.insert(ind, '[') tmp_stoich.insert(ind, ']') cleaned = True elif tmp == '}{': del tmp_stoich[ind] tmp_stoich.insert(ind, '{') tmp_stoich.insert(ind, '}') cleaned = True elif tmp == ']{': del tmp_stoich[ind] tmp_stoich.insert(ind, '{') tmp_stoich.insert(ind, ']') cleaned = True elif tmp == '}[': del tmp_stoich[ind] tmp_stoich.insert(ind, '[') tmp_stoich.insert(ind, '}') cleaned = True elif ind == len(tmp_stoich) - 1: cleaned = False for ind, tmp in enumerate(tmp_stoich): if tmp == '[': end_bracket = False while not end_bracket: if tmp_stoich[ind + 1] == ']': end_bracket = True tmp_stoich[ind] += tmp_stoich[ind + 1] del tmp_stoich[ind + 1] for ind, tmp in enumerate(tmp_stoich): if tmp == '{': end_bracket = False while not end_bracket: if tmp_stoich[ind + 1] == '}': end_bracket = True tmp_stoich[ind] += tmp_stoich[ind + 1] del tmp_stoich[ind + 1] if ']' in tmp_stoich: tmp_stoich.remove(']') if '}' in tmp_stoich: tmp_stoich.remove('}') if '' in tmp_stoich: tmp_stoich.remove('') elements = tmp_stoich for elem in elements: if '}[' in elem or ']{' in elem: raise RuntimeError('Unmatched brackets in query string') return elements
[docs]def get_root_source(source): """ Get the main file source from a doc's source list. Parameters: source (str/list/dict): contents of doc['source'] or the doc itself. Returns: str: "root" filename, e.g. if source = ['KP.cell', 'KP.param', 'KP_specific_structure.res'] then root = 'KP_specific_structure'. """ try: sources = copy.deepcopy(source['source']) except (KeyError, TypeError): sources = copy.deepcopy(source) if isinstance(source, str): return source src_list = set() for src in sources: if any([src.endswith(ext) for ext in ['.res', '.castep', '.history', '.history.gz', '.phonon', '.phonon_dos', '.bands', '.cif']]): src_list.add('.'.join(src.split('/')[-1].split('.')[0:-1])) elif 'OQMD' in src.upper(): src_list.add(src) elif 'MP-' in src.upper(): src_list.add(src) elif len(sources) == 1: src_list.add(src) elif src == 'command_line': src_list.add('command line') if len(src_list) > 1: raise RuntimeError('Ambiguous root source {}'.format(sources)) if len(src_list) < 1: raise RuntimeError('Unable to find root source from {}'.format(sources)) return list(src_list)[0]
[docs]def get_formula_from_stoich(stoich, elements=None, tex=False, sort=True, latex_sub_style=''): """ Get the chemical formula of a structure from its matador stoichiometry. Parameters: stoich (list): matador-style stoichiometry. Keyword arguments: elements (list): list of element symbols to enforce order. tex (bool): whether to print a LaTeX-compatibile string. latex_sub_style (str): a string to wrap subscripts in, e.g. r"\\mathrm" or r"\\text" (default is blank). Returns: str: the string representation of the chemical formula. """ form = '' if not isinstance(stoich, list): stoich = stoich.tolist() if sort: stoich = sorted(stoich) if elements is not None: for targ_elem in elements: for elem in stoich: if elem[0] == targ_elem: if elem[1] == 1: form += elem[0] elif int(elem[1]) != 0: if tex: if elem[1] % 1 == 0: elem[1] = int(elem[1]) form += r'{}$_{}{{{}}}$'.format(elem[0], latex_sub_style, elem[1]) else: form += elem[0] + str(int(elem[1])) assert form != '' else: for elem in stoich: if elem[1] == 1: form += elem[0] elif int(elem[1]) != 0: if tex: if elem[1] % 1 == 0: elem[1] = int(elem[1]) form += '{}$_{}{{{}}}$'.format(elem[0], latex_sub_style, elem[1]) else: form += elem[0] + str(int(elem[1])) return form