# coding: utf-8
"""
This module implements the `CurvatureAnalyze` class to perform curvature
analyses on molecular or periodic structures.
"""
import numpy as np
import pandas as pd
from pymatgen.core import Molecule, Structure
from pymatgen.core.bonds import obtain_all_bond_lengths
from .core import VertexAtom, TrivalentVertex, POAV1, POAV2
__author__ = "Germain Salvato-Vallverdu"
__copyright__ = "University of Pau and Pays Adour"
__email__ = "germain.vallverdu@univ-pau.fr"
__all__ = ["CurvatureAnalyzer"]
[docs]class CurvatureAnalyzer:
""" This class provides helpful methods to analyze the local curvature
on all atoms of a given structure. The structure is either a molecule or
a periodic structure. Once the structure is read, the class determines the
connectivity of the structure in order to define all vertices. The
connectivity is defined on a distance criterion.
"""
def __init__(self, structure, bond_tol=0.2, rcut=2.5, bond_order=None):
""" The class needs a pymatgen.Structure or pymatgen.Molecule object as
first argument. The other arguments are used to defined if two atoms are
bonded or not.
Args:
structure (Structure, Molecule): A Structure or Molecule pymatgen
objects
bond_tol (float): Tolerance used to determine if two atoms are
bonded. Look at `pymatgen.core.CovalentBond.is_bonded`.
rcut (float): Cutoff distance in case the bond is not not known
bond_order (dict): Not yet implemented
"""
if isinstance(structure, (Molecule, Structure)):
self.structure = structure
else:
raise TypeError("structure must a Molecule or Structure pymatgen"
" object. type(structure) is: " + str(type(structure)))
self.bond_tol = bond_tol
self.rcut = rcut
self.bond_order = bond_order
# compute distance matrix one time. You must call only one time
# structure.distance_matrix to save computational time
self._distance_matrix = self.structure.distance_matrix
# look for bonds and set vertices
self._vertices = []
self._bonds = set()
self._vertices_idx = []
self._get_vertex()
# fill a DataFrame with datas
self._data = pd.DataFrame([])
self._compute_data()
@property
def vertices(self):
""" List of vertices associated to each atom of the molecule """
return self._vertices
@property
def bonds(self):
""" Set of tuples of bonded atom index """
return self._bonds
@property
def vertices_idx(self):
r""" List of tuples of the indexes of atoms in each vetex. The first
index is atom A, the following are atoms of :math:`\star(A)`. """
return self._vertices_idx
@property
def data(self):
"""
Return a Data Frame that contains all the geometric and hybridization
data.
"""
return self._data
@property
def distance_matrix(self):
""" Returns the distance matrix between all atoms. For periodic
structures, this returns the nearest image distances. """
return self._distance_matrix
def _get_vertex(self):
""" Look for all vertex defined as atoms bonded to at least
3 neighbors and set up a list of VertexAtom object."""
vertices = list()
vertices_idx = list()
bonds = set()
errors = set()
for isite, site_i in enumerate(self.structure):
atom_A = site_i.coords
star_a = list()
vertex_idx = [isite]
for jsite, site_j in enumerate(self.structure):
if isite == jsite:
continue
# check if i and j are bonded
distance = self._distance_matrix[isite, jsite]
bonded = False
try:
# look for bond length database of pymatgen
# equivalent to CovalentBonds.is_bonded but avoid to compute
# two times the bond length
ref_distances = obtain_all_bond_lengths(site_i.specie,
site_j.specie)
# TODO: use ref_distances from a bond order
for rcut in ref_distances.values():
if distance < (1 + self.bond_tol) * rcut:
bonded = True
except ValueError as e:
errors.add(str(e))
bonded = distance <= self.rcut
# increment *(A) if i and j are bonded
if bonded:
star_a.append(site_j.coords)
vertex_idx.append(jsite)
bonds.add(tuple(sorted([isite, jsite])))
# set up VertexAtom objects
if len(star_a) >= 3:
vertices.append(VertexAtom(atom_A, star_a))
vertices_idx.append(tuple(vertex_idx))
else:
vertices.append(None)
vertices_idx.append(tuple(vertex_idx))
self._vertices = vertices
self._vertices_idx = vertices_idx
self._bonds = bonds
if errors:
print("errors\n", "\n".join(errors))
print("Default cutoff of {} was used for the above bond".format(self.rcut))
def _compute_data(self):
""" Compute geometric and hybridation data for all vertex in the
structure and store them in a DataFrame. """
data = list()
nan_array = np.empty(3)
nan_array.fill(np.nan)
for vertex, vertex_idx in zip(self.vertices, self.vertices_idx):
if vertex is None:
vdata = {}
vdata["n_star_A"] = 0
else:
if len(vertex.star_a) == 3:
vertex = TrivalentVertex(vertex.a, vertex.star_a)
try:
poav1 = POAV1(vertex=vertex)
poav2 = POAV2(vertex=vertex)
except ValueError as e:
print("Unable to compute all data.",
vertex.as_dict(radians=False))
print(e)
vdata = {
**poav1.as_dict(
radians=False, include_vertex=True, list_obj=True),
**poav2.as_dict(radians=False, list_obj=True)
}
else:
vdata = vertex.as_dict(radians=False, list_obj=True)
ia = vertex_idx[0]
vdata.update(atom_idx=ia, species=self.structure[ia].specie.symbol)
distances = [self.distance_matrix[ia, j] for j in vertex_idx[1:]]
vdata.update({"ave. neighb. dist.": np.mean(distances)})
data.append(vdata)
self._data = pd.DataFrame(data)
[docs] @staticmethod
def from_file(path, periodic=None):
""" Returns a CurvatureAnalyze object from the structure at the
given path. This method relies on the file format supported with
pymatgen Molecule and Structure classes.
Supported formats for periodic structure include CIF, POSCAR/CONTCAR,
CHGCAR, LOCPOT, vasprun.xml, CSSR, Netcdf and pymatgen’s JSON serialized
structures.
Supported formats for molecule include include xyz, gaussian input
(gjf|g03|g09|com|inp), Gaussian output (.out|and pymatgen’s JSON
serialized molecules.
Args:
path (str): Path to the structure file
periodic (bool): if True, assume that the file correspond to a
periodic structure. Default is None. The method tries to read
the file, first from the Molecule class and second from the
Structure class of pymatgen.
"""
if periodic is None:
# try to read as a molecule
try:
structure = Molecule.from_file(path)
except ValueError as e1:
print("Cannot read file as a molecule.")
# Try to read as a periodic structure
try:
structure = Structure.from_file(path)
except ValueError as e2:
print("Cannot read file as a periodic structure.")
print("Try as a molecule, error:", e1)
print("Try as a structure, error:", e2)
raise ValueError(
"Unable to load structure from file '%s'" % path)
elif periodic:
# Structure object
structure = Structure.from_file(path)
else:
# Molecule object
structure = Molecule.from_file(path)
print("Read structure, done.")
return CurvatureAnalyzer(structure)
[docs] def get_molecular_data(self):
"""
Set up a model data dictionnary that contains species, coordinates and
bonds of the structure. This dictionnary can be used as model data for
further visulization in bio-dash.
"""
# set up json file
model_data = {"atoms": [], "bonds": []}
# structure part
for iat, site in enumerate(self.structure):
name = "%s%d" % (site.specie.symbol, iat + 1)
model_data["atoms"].append({"name": name,
"serial": iat,
"element": site.specie.symbol,
"positions": site.coords.tolist()})
# bonds part
for bond in self.bonds:
iat, jat = bond
model_data["bonds"].append(
{"atom1_index": iat, "atom2_index": jat}
)
return model_data