"""
functions.py
A python package for analyzing and visualizing molecular files. For MolSSI Workshop.
Handles the primary functions
"""
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D # noqa: F401
[docs]def calculate_distance(rA, rB):
"""
Calculate the distance between two points.
Parameters
----------
rA, rB : np.ndarray
The coordinates of each point.
Returns
-------
dist : float
The distance between the two points.
Examples
--------
>>> r1 = np.array([0, 0, 0])
>>> r2 = np.array([0, 0.1, 0])
>>> calculate_distance(r1, r2)
0.1
"""
d = rA - rB
dist = np.linalg.norm(d)
return dist
def open_pdb(f_loc):
# This function reads in a pdb file and returns the atom names and coordinates.
with open(f_loc) as f:
data = f.readlines()
c = []
sym = []
for l in data:
if "ATOM" in l[0:6] or "HETATM" in l[0:6]:
sym.append(l[76:79].strip())
c2 = [float(x) for x in l[30:55].split()]
c.append(c2)
coords = np.array(c)
return sym, coords
atomic_weights = {
"H": 1.00784,
"C": 12.0107,
"N": 14.0067,
"O": 15.999,
"P": 30.973762,
"F": 18.998403,
"Cl": 35.453,
"Br": 79.904,
}
def open_xyz(file_location):
# Open an xyz file and return symbols and coordinates.
xyz_file = np.genfromtxt(fname=file_location, skip_header=2, dtype="unicode")
symbols = xyz_file[:, 0]
coords = xyz_file[:, 1:]
coords = coords.astype(np.float)
return symbols, coords
def write_xyz(file_location, symbols, coordinates):
# Write an xyz file given a file location, symbols, and coordinates.
num_atoms = len(symbols)
with open(file_location, "w+") as f:
f.write("{}\n".format(num_atoms))
f.write("XYZ file\n")
for i in range(num_atoms):
f.write(
"{}\t{}\t{}\t{}\n".format(
symbols[i], coordinates[i, 0], coordinates[i, 1], coordinates[i, 2]
)
)
def draw_molecule(coordinates, symbols, draw_bonds=None, save_location=None, dpi=300):
# Draw a picture of a molecule using matplotlib.
# Create figure
fig = plt.figure()
ax = fig.add_subplot(111, projection="3d")
# Get colors - based on atom name
colors = []
for atom in symbols:
colors.append(atom_colors[atom])
size = np.array(plt.rcParams["lines.markersize"] ** 2) * 200 / (len(coordinates))
ax.scatter(
coordinates[:, 0],
coordinates[:, 1],
coordinates[:, 2],
marker="o",
edgecolors="k",
facecolors=colors,
alpha=1,
s=size,
)
# Draw bonds
if draw_bonds:
for atoms, bond_length in draw_bonds.items():
atom1 = atoms[0]
atom2 = atoms[1]
ax.plot(
coordinates[[atom1, atom2], 0],
coordinates[[atom1, atom2], 1],
coordinates[[atom1, atom2], 2],
color="k",
)
# Save figure
if save_location:
plt.savefig(save_location, dpi=dpi, graph_min=0, graph_max=2)
return ax
def calculate_angle(rA, rB, rC, degrees=False):
# Calculate the angle between three points. Answer is given in radians by default, but can be given in degrees
# by setting degrees=True
AB = rB - rA
BC = rB - rC
theta = np.arccos(np.dot(AB, BC) / (np.linalg.norm(AB) * np.linalg.norm(BC)))
if degrees:
return np.degrees(theta)
else:
return theta
def bond_histogram(bond_list, save_location=None, dpi=300, graph_min=0, graph_max=2):
# Draw a histogram of bond lengths based on a bond_list (output from build_bond_list function)
lengths = []
for atoms, bond_length in bond_list.items():
lengths.append(bond_length)
bins = np.linspace(graph_min, graph_max)
fig = plt.figure()
ax = fig.add_subplot(111)
plt.xlabel("Bond Length (angstrom)")
plt.ylabel("Number of Bonds")
ax.hist(lengths, bins=bins)
# Save figure
if save_location:
plt.savefig(save_location, dpi=dpi)
return ax
def build_bond_list(coordinates, max_bond=1.5, min_bond=0):
"""
Calculate bonds in a molecule base on a distance criteria.
The pairwise distance between atoms is computed. If it is in the range
`min_bond` to `max_bond`, the atoms are counted as bonded.
Parameters
----------
coordinates : array-like
The coordinates of the atoms.
max_bond : float (optional)
The maximum distance for two points to be considered bonded. The default
is 1.5
min_bond : float (optional)
The minimum distance for two points to be considered bonded. The default
is 0.
Returns
-------
bonds : dict
A dictionary where the keys are tuples of the bonded atom indices, and the
associated values are the bond length.
"""
if min_bond < 0:
raise ValueError("Bond length can not be less than zero.")
if len(coordinates) < 1:
raise ValueError("Bond list can not be calculated for coordinate length less than 1.")
# Find the bonds in a molecule
bonds = {}
num_atoms = len(coordinates)
for atom1 in range(num_atoms):
for atom2 in range(atom1, num_atoms):
distance = calculate_distance(coordinates[atom1], coordinates[atom2])
if distance > min_bond and distance < max_bond:
bonds[(atom1, atom2)] = distance
return bonds
def calculate_molecular_mass(symbols):
"""Calculate the mass of a molecule.
Parameters
----------
symbols : list
A list of elements.
Returns
-------
mass : float
The mass of the molecule
"""
mass = 0
for atom in symbols:
mass += atomic_weights[atom]
return mass
[docs]def calculate_center_of_mass(symbols, coordinates):
"""Calculate the center of mass of a molecule.
The center of mass is weighted by each atom's weight.
Parameters
----------
symbols : list
A list of elements for the molecule
coordinates : np.ndarray
The coordinates of the molecule.
Returns
-------
center_of_mass: np.ndarray
The center of mass of the molecule.
Notes
-----
The center of mass is calculated with the formula
.. math:: \\vec{R}=\\frac{1}{M} \\sum_{i=1}^{n} m_{i}\\vec{r_{}i}
"""
total_mass = calculate_molecular_mass(symbols)
mass_array = np.zeros([len(symbols), 1])
for i in range(len(symbols)):
mass_array[i] = atomic_weights[symbols[i]]
center_of_mass = sum(coordinates * mass_array) / total_mass
return center_of_mass
atom_colors = {
"H": "white",
"C": "#D3D3D3",
"N": "#add8e6",
"O": "red",
"P": "#FFA500",
"F": "#FFFFE0",
"Cl": "#98FB98",
"Br": "#F4A460",
"S": "yellow",
}
[docs]def canvas(with_attribution=True):
"""
Placeholder function to show example docstring (NumPy format)
Replace this function and doc string for your own project
Parameters
----------
with_attribution : bool, Optional, default: True
Set whether or not to display who the quote is from
Returns
-------
quote : str
Compiled string including quote and optional attribution
"""
quote = "The code is but a canvas to our imagination."
if with_attribution:
quote += "\n\t- Adapted from Henry David Thoreau"
return quote
if __name__ == "__main__":
# Do something if this file is invoked on its own
print(canvas())