Source code for molecool.functions

"""
functions.py
A python package for analyzing and visualizing molecular files. For MolSSI Workshop.

Handles the primary functions
"""

import numpy as np
import matplotlib.pyplot as plt

from mpl_toolkits.mplot3d import Axes3D  # noqa: F401


[docs]def calculate_distance(rA, rB): """ Calculate the distance between two points. Parameters ---------- rA, rB : np.ndarray The coordinates of each point. Returns ------- dist : float The distance between the two points. Examples -------- >>> r1 = np.array([0, 0, 0]) >>> r2 = np.array([0, 0.1, 0]) >>> calculate_distance(r1, r2) 0.1 """ d = rA - rB dist = np.linalg.norm(d) return dist
def open_pdb(f_loc): # This function reads in a pdb file and returns the atom names and coordinates. with open(f_loc) as f: data = f.readlines() c = [] sym = [] for l in data: if "ATOM" in l[0:6] or "HETATM" in l[0:6]: sym.append(l[76:79].strip()) c2 = [float(x) for x in l[30:55].split()] c.append(c2) coords = np.array(c) return sym, coords atomic_weights = { "H": 1.00784, "C": 12.0107, "N": 14.0067, "O": 15.999, "P": 30.973762, "F": 18.998403, "Cl": 35.453, "Br": 79.904, } def open_xyz(file_location): # Open an xyz file and return symbols and coordinates. xyz_file = np.genfromtxt(fname=file_location, skip_header=2, dtype="unicode") symbols = xyz_file[:, 0] coords = xyz_file[:, 1:] coords = coords.astype(np.float) return symbols, coords def write_xyz(file_location, symbols, coordinates): # Write an xyz file given a file location, symbols, and coordinates. num_atoms = len(symbols) with open(file_location, "w+") as f: f.write("{}\n".format(num_atoms)) f.write("XYZ file\n") for i in range(num_atoms): f.write( "{}\t{}\t{}\t{}\n".format( symbols[i], coordinates[i, 0], coordinates[i, 1], coordinates[i, 2] ) ) def draw_molecule(coordinates, symbols, draw_bonds=None, save_location=None, dpi=300): # Draw a picture of a molecule using matplotlib. # Create figure fig = plt.figure() ax = fig.add_subplot(111, projection="3d") # Get colors - based on atom name colors = [] for atom in symbols: colors.append(atom_colors[atom]) size = np.array(plt.rcParams["lines.markersize"] ** 2) * 200 / (len(coordinates)) ax.scatter( coordinates[:, 0], coordinates[:, 1], coordinates[:, 2], marker="o", edgecolors="k", facecolors=colors, alpha=1, s=size, ) # Draw bonds if draw_bonds: for atoms, bond_length in draw_bonds.items(): atom1 = atoms[0] atom2 = atoms[1] ax.plot( coordinates[[atom1, atom2], 0], coordinates[[atom1, atom2], 1], coordinates[[atom1, atom2], 2], color="k", ) # Save figure if save_location: plt.savefig(save_location, dpi=dpi, graph_min=0, graph_max=2) return ax def calculate_angle(rA, rB, rC, degrees=False): # Calculate the angle between three points. Answer is given in radians by default, but can be given in degrees # by setting degrees=True AB = rB - rA BC = rB - rC theta = np.arccos(np.dot(AB, BC) / (np.linalg.norm(AB) * np.linalg.norm(BC))) if degrees: return np.degrees(theta) else: return theta def bond_histogram(bond_list, save_location=None, dpi=300, graph_min=0, graph_max=2): # Draw a histogram of bond lengths based on a bond_list (output from build_bond_list function) lengths = [] for atoms, bond_length in bond_list.items(): lengths.append(bond_length) bins = np.linspace(graph_min, graph_max) fig = plt.figure() ax = fig.add_subplot(111) plt.xlabel("Bond Length (angstrom)") plt.ylabel("Number of Bonds") ax.hist(lengths, bins=bins) # Save figure if save_location: plt.savefig(save_location, dpi=dpi) return ax def build_bond_list(coordinates, max_bond=1.5, min_bond=0): """ Calculate bonds in a molecule base on a distance criteria. The pairwise distance between atoms is computed. If it is in the range `min_bond` to `max_bond`, the atoms are counted as bonded. Parameters ---------- coordinates : array-like The coordinates of the atoms. max_bond : float (optional) The maximum distance for two points to be considered bonded. The default is 1.5 min_bond : float (optional) The minimum distance for two points to be considered bonded. The default is 0. Returns ------- bonds : dict A dictionary where the keys are tuples of the bonded atom indices, and the associated values are the bond length. """ if min_bond < 0: raise ValueError("Bond length can not be less than zero.") if len(coordinates) < 1: raise ValueError("Bond list can not be calculated for coordinate length less than 1.") # Find the bonds in a molecule bonds = {} num_atoms = len(coordinates) for atom1 in range(num_atoms): for atom2 in range(atom1, num_atoms): distance = calculate_distance(coordinates[atom1], coordinates[atom2]) if distance > min_bond and distance < max_bond: bonds[(atom1, atom2)] = distance return bonds def calculate_molecular_mass(symbols): """Calculate the mass of a molecule. Parameters ---------- symbols : list A list of elements. Returns ------- mass : float The mass of the molecule """ mass = 0 for atom in symbols: mass += atomic_weights[atom] return mass
[docs]def calculate_center_of_mass(symbols, coordinates): """Calculate the center of mass of a molecule. The center of mass is weighted by each atom's weight. Parameters ---------- symbols : list A list of elements for the molecule coordinates : np.ndarray The coordinates of the molecule. Returns ------- center_of_mass: np.ndarray The center of mass of the molecule. Notes ----- The center of mass is calculated with the formula .. math:: \\vec{R}=\\frac{1}{M} \\sum_{i=1}^{n} m_{i}\\vec{r_{}i} """ total_mass = calculate_molecular_mass(symbols) mass_array = np.zeros([len(symbols), 1]) for i in range(len(symbols)): mass_array[i] = atomic_weights[symbols[i]] center_of_mass = sum(coordinates * mass_array) / total_mass return center_of_mass
atom_colors = { "H": "white", "C": "#D3D3D3", "N": "#add8e6", "O": "red", "P": "#FFA500", "F": "#FFFFE0", "Cl": "#98FB98", "Br": "#F4A460", "S": "yellow", }
[docs]def canvas(with_attribution=True): """ Placeholder function to show example docstring (NumPy format) Replace this function and doc string for your own project Parameters ---------- with_attribution : bool, Optional, default: True Set whether or not to display who the quote is from Returns ------- quote : str Compiled string including quote and optional attribution """ quote = "The code is but a canvas to our imagination." if with_attribution: quote += "\n\t- Adapted from Henry David Thoreau" return quote
if __name__ == "__main__": # Do something if this file is invoked on its own print(canvas())