Skip to content

Q-Chem 6 + Misc Updates #770

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 43 commits into from
Mar 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
8a1be9b
Save scratch for ForceFW
samblau Jun 29, 2022
a419a87
Add grad file parsing + tests
samblau Jun 30, 2022
f40c0de
rename precise_gradients
samblau Jun 30, 2022
7dd65fd
avoid saving tons of scratch when parsing grads
samblau Jul 4, 2022
329bb4e
fix missing import
samblau Jul 4, 2022
a113f28
Merge branch 'main' into qchem
samblau Jul 17, 2022
c6f8e06
Q-Chem version 6
samblau Aug 1, 2022
d15a44e
fix testing against my pymatgen
samblau Aug 1, 2022
0eca0d7
Fix dft_level and tests with new defaults
samblau Aug 1, 2022
0455c12
remove custom max_displacement for now
samblau Aug 1, 2022
70e8036
New max_displacement, fix tests
samblau Aug 6, 2022
681d724
Merge branch 'main' into qchem
samblau Sep 11, 2022
4643cd1
expose save_scratch for FFopt FW
samblau Dec 2, 2022
e1f79e8
Parse HESS file during FFopt
samblau Dec 3, 2022
b04db6b
modify mem_total dynamically via node info
samblau Dec 3, 2022
737664b
bug fix
samblau Dec 3, 2022
67fd7a7
fix bug
samblau Dec 3, 2022
f98f588
further reduce memory w.r.t. node total
samblau Dec 3, 2022
95597bd
prev_hess
samblau Dec 15, 2022
4a1799b
fix missing import
samblau Dec 15, 2022
92cf2a2
write to writelines
samblau Dec 15, 2022
2e7e2e4
dealing with 132.0 files
samblau Dec 16, 2022
c282386
missing struct import
samblau Dec 16, 2022
7370ccf
allow 132.0.gz
samblau Dec 18, 2022
e3c756e
parse grad from 131.0 file as well plus new test
samblau Dec 23, 2022
3cb17a7
Fix tests given new parsing and features
samblau Dec 23, 2022
c142bf6
add hess parsing test
samblau Dec 23, 2022
e3ca65c
dipoles and gap_info into output for sp/force
samblau Dec 24, 2022
82d46cc
Avoid empty prev_hess
samblau Feb 6, 2023
34b9c8c
avoid freq parsing for single atoms
samblau Feb 26, 2023
4e2141c
debugging
samblau Feb 26, 2023
8c7d860
Merge branch 'main' into qchem
samblau Feb 28, 2023
96efb4a
Use main pymatgen to be sure
samblau Feb 28, 2023
379df8a
fix typo
samblau Feb 28, 2023
1f36034
Update pymatgen version
samblau Feb 28, 2023
b979209
pymatgen version
samblau Feb 28, 2023
7d7cfb1
Trying to debug parse_outputs taking so long
samblau Mar 7, 2023
8faacef
More logging statements
samblau Mar 7, 2023
5cf0aa4
more logger statements
samblau Mar 7, 2023
5d0a25d
Update pymatgen and custodian versions
samblau Mar 11, 2023
8cd5e05
typo
samblau Mar 11, 2023
bca7cd5
Merge branch 'main' into qchem
samblau Mar 13, 2023
30c1028
fix multiple_outputs drone issue
samblau Mar 13, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
export PATH=$HOME/miniconda3/bin:$PATH
conda create --name test_env python=3.8
source activate test_env
conda install -c conda-forge openbabel pymatgen
conda install -c conda-forge openbabel gcc_linux-64
pip install -r requirements-ci.txt
pip install .
no_output_timeout: 1h
Expand Down
17 changes: 9 additions & 8 deletions atomate/qchem/drones.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,9 @@ def generate_doc(self, dir_name, qcinput_files, qcoutput_files, multirun):
if d["output"]["job_type"] in ["freq", "frequency"]:
d["output"]["frequencies"] = d_calc_final["frequencies"]
# Note: for single-atom freq calcs, this key may not exist
d["output"]["frequency_modes"] = d_calc_final.get("frequency_mode_vectors", [])
d["output"]["frequency_modes"] = d_calc_final.get(
"frequency_mode_vectors", []
)
d["output"]["enthalpy"] = d_calc_final["total_enthalpy"]
d["output"]["entropy"] = d_calc_final["total_entropy"]
if d["input"]["job_type"] in ["opt", "optimization", "ts"]:
Expand Down Expand Up @@ -275,12 +277,11 @@ def generate_doc(self, dir_name, qcinput_files, qcoutput_files, multirun):
if d_calc_final["CDS_gradients"] is not None:
d["output"]["CDS_gradients"] = d_calc_final["CDS_gradients"][0]

if d["output"]["job_type"] == "force":
d["output"]["gradients"] = d_calc_final["gradients"][0]
if d_calc_final["pcm_gradients"] is not None:
d["output"]["pcm_gradients"] = d_calc_final["pcm_gradients"][0]
if d_calc_final["CDS_gradients"] is not None:
d["output"]["CDS_gradients"] = d_calc_final["CDS_gradients"][0]
if d["output"]["job_type"] in ["force", "sp"]:
d["output"]["dipoles"] = d_calc_final["dipoles"]
if "gap_info" in d_calc_final:
if d_calc_final["gap_info"] is not None:
d["output"]["gap_info"] = d_calc_final["gap_info"]

opt_trajectory = []
calcs = copy.deepcopy(d["calcs_reversed"])
Expand Down Expand Up @@ -451,7 +452,7 @@ def process_qchem_multirun(dir_name, input_files, output_files):
qchem_input_file = os.path.join(dir_name, input_files.get(key))
qchem_output_file = os.path.join(dir_name, output_files.get(key))
multi_out = QCOutput.multiple_outputs_from_file(
QCOutput, qchem_output_file, keep_sub_files=False
filename=qchem_output_file, keep_sub_files=False
)
multi_in = QCInput.from_multi_jobs_file(qchem_input_file)
for ii, out in enumerate(multi_out):
Expand Down
136 changes: 134 additions & 2 deletions atomate/qchem/firetasks/parse_outputs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import json
import os
import shutil
import struct
from monty.io import zopen

from fireworks import FiretaskBase, FWAction, explicit_serialize
from fireworks.utilities.fw_serializers import DATETIME_HANDLER
Expand Down Expand Up @@ -50,6 +53,8 @@ class QChemToDb(FiretaskBase):
"calc_loc",
"input_file",
"output_file",
"parse_grad_file",
"parse_hess_file",
"additional_fields",
"db_file",
"fw_spec_field",
Expand Down Expand Up @@ -84,13 +89,123 @@ def run_task(self, fw_spec):
multirun=multirun,
)

logger.info("Drone finished!")

# parse the GRAD file, if desired and if it is present
if self.get("parse_grad_file", False):
grad_file = None
if os.path.exists(os.path.join(calc_dir, "GRAD.gz")):
grad_file = os.path.join(calc_dir, "GRAD.gz")
elif os.path.exists(os.path.join(calc_dir, "GRAD")):
grad_file = os.path.join(calc_dir, "GRAD")
elif os.path.exists(os.path.join(calc_dir, "scratch/GRAD.gz")):
grad_file = os.path.join(calc_dir, "scratch/GRAD.gz")
elif os.path.exists(os.path.join(calc_dir, "scratch/GRAD")):
grad_file = os.path.join(calc_dir, "scratch/GRAD")
elif os.path.exists(os.path.join(calc_dir, "131.0.gz")):
grad_file = os.path.join(calc_dir, "131.0.gz")
elif os.path.exists(os.path.join(calc_dir, "131.0")):
grad_file = os.path.join(calc_dir, "131.0")
elif os.path.exists(os.path.join(calc_dir, "scratch/131.0.gz")):
grad_file = os.path.join(calc_dir, "scratch/131.0.gz")
elif os.path.exists(os.path.join(calc_dir, "scratch/131.0")):
grad_file = os.path.join(calc_dir, "scratch/131.0")

if grad_file is None:
task_doc["warnings"]["grad_file_missing"] = True
else:
logger.info("Parsing gradient file")
grad = []
if grad_file[-5:] == "131.0" or grad_file[-8:] == "131.0.gz":
tmp_grad_data = []
logger.info("Gradient file is binary")
with zopen(grad_file, mode="rb") as file:
binary = file.read()
for ii in range(int(len(binary) / 8)):
tmp_grad_data.append(
struct.unpack("d", binary[ii * 8 : (ii + 1) * 8])[0]
)
grad = []
for ii in range(int(len(tmp_grad_data) / 3)):
grad.append(
[
float(tmp_grad_data[ii * 3]),
float(tmp_grad_data[ii * 3 + 1]),
float(tmp_grad_data[ii * 3 + 2]),
]
)
else:
logger.info("Gradient file is text")
with zopen(grad_file, mode="rt", encoding="ISO-8859-1") as f:
lines = f.readlines()
for line in lines:
split_line = line.split()
if len(split_line) == 3:
grad.append(
[
float(split_line[0]),
float(split_line[1]),
float(split_line[2]),
]
)
task_doc["output"]["precise_gradients"] = grad
logger.info("Done parsing gradient. Now removing scratch directory")
if os.path.exists(os.path.join(calc_dir, "scratch")):
shutil.rmtree(os.path.join(calc_dir, "scratch"))

# parse the HESS file(s), if desired and if one or multiple are present
if self.get("parse_hess_file", False):
hess_files = []
for filename in os.listdir(calc_dir):
if "HESS" in filename:
hess_files.append(filename)
elif filename == "scratch":
for subfilename in os.listdir(os.path.join(calc_dir, "scratch")):
if "HESS" in subfilename:
hess_files.append("scratch/" + subfilename)
elif subfilename == "132.0" or subfilename == "132.0.gz":
hess_files.append("scratch/" + subfilename)

if len(hess_files) == 0:
task_doc["warnings"]["hess_file_missing"] = True
else:
logger.info("Parsing Hessian file")
hess_data = {}
for hess_name in hess_files:
if hess_name[-5:] == "132.0" or hess_name[-8:] == "132.0.gz":
logger.info("Hessian file is binary")
tmp_hess_data = []
with zopen(
os.path.join(calc_dir, hess_name), mode="rb"
) as file:
binary = file.read()
for ii in range(int(len(binary) / 8)):
tmp_hess_data.append(
struct.unpack("d", binary[ii * 8 : (ii + 1) * 8])[0]
)
hess_data[hess_name] = tmp_hess_data
else:
logger.info("Hessian file is text")
with zopen(
os.path.join(calc_dir, hess_name),
mode="rt",
encoding="ISO-8859-1",
) as f:
hess_data[hess_name] = f.readlines()
task_doc["output"]["hess_data"] = hess_data
logger.info("Done parsing Hessian. Now removing scratch directory")
if os.path.exists(os.path.join(calc_dir, "scratch")):
shutil.rmtree(os.path.join(calc_dir, "scratch"))

# Check for additional keys to set based on the fw_spec
logger.info("Updating fw_spec_field")
if self.get("fw_spec_field"):
task_doc.update(
{self.get("fw_spec_field"): fw_spec.get(self.get("fw_spec_field"))}
)

# Update fw_spec with final/optimized structure
logger.info("Updating charges in spec")
update_spec = {}
if task_doc.get("output").get("optimized_molecule"):
update_spec["prev_calc_molecule"] = task_doc["output"]["optimized_molecule"]
Expand All @@ -101,14 +216,17 @@ def run_task(self, fw_spec):
update_spec["prev_calc_esp"] = task_doc["output"]["ESP"]

# get the database connection
logger.info("Get database connection")
db_file = env_chk(self.get("db_file"), fw_spec)

# db insertion or taskdoc dump
if not db_file:
with open(os.path.join(calc_dir, "task.json"), "w") as f:
f.write(json.dumps(task_doc, default=DATETIME_HANDLER))
else:
logger.info("Connecting to QChemCalcDb")
mmdb = QChemCalcDb.from_db_file(db_file, admin=True)
logger.info("Starting task_doc insert")
t_id = mmdb.insert(task_doc)
logger.info(f"Finished parsing with task_id: {t_id}")

Expand Down Expand Up @@ -195,12 +313,26 @@ def run_task(self, fw_spec):
task_doc_clean["orig"]["molecule"]["spin_multiplicity"] = 1
task_doc_clean["output"]["initial_molecule"]["charge"] = 1
task_doc_clean["output"]["initial_molecule"]["spin_multiplicity"] = 1
task_doc_clean["output"]["initial_molecule"]["sites"] = [{'name': 'H', 'species': [{'element': 'H', 'occu': 1}], 'xyz': [0.0, 0.0, 0.0], 'properties': {}}]
task_doc_clean["output"]["initial_molecule"]["sites"] = [
{
"name": "H",
"species": [{"element": "H", "occu": 1}],
"xyz": [0.0, 0.0, 0.0],
"properties": {},
}
]
task_doc_clean["output"]["mulliken"] = [+1.000000]
task_doc_clean["output"]["resp"] = [+1.000000]
task_doc_clean["output"]["optimized_molecule"]["charge"] = 1
task_doc_clean["output"]["optimized_molecule"]["spin_multiplicity"] = 1
task_doc_clean["output"]["optimized_molecule"]["sites"] = [{'name': 'H', 'species': [{'element': 'H', 'occu': 1}], 'xyz': [0.0, 0.0, 0.0], 'properties': {}}]
task_doc_clean["output"]["optimized_molecule"]["sites"] = [
{
"name": "H",
"species": [{"element": "H", "occu": 1}],
"xyz": [0.0, 0.0, 0.0],
"properties": {},
}
]
task_doc_clean["output"]["final_energy"] = (
task_doc_2["output"]["final_energy"] - task_doc_1["output"]["final_energy"]
)
Expand Down
85 changes: 85 additions & 0 deletions atomate/qchem/firetasks/tests/test_parse_outputs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import os
import shutil
import unittest

from pymatgen.core import Molecule
from pymatgen.io.qchem.inputs import QCInput

from atomate.qchem.firetasks.parse_outputs import QChemToDb
from atomate.utils.testing import AtomateTest

from monty.serialization import loadfn

__author__ = "Samuel Blau"
__email__ = "[email protected]"

module_dir = os.path.dirname(os.path.abspath(__file__))


class TestParseOutputQChem_grads(AtomateTest):
def setUp(self, lpad=False):
super().setUp(lpad=False)

def tearDown(self):
pass

def test_parse_grad_good(self):
my_calc_dir = os.path.join(module_dir, "..", "..", "test_files","parse_grad_good")
ft = QChemToDb(calc_dir=my_calc_dir, parse_grad_file=True)
ft.run_task({})
task_doc = loadfn(os.path.join(my_calc_dir,"task.json"))
self.assertEqual(task_doc["output"]["final_energy"], -274.6893362188)
self.assertEqual(len(task_doc["output"]["precise_gradients"]), 10)
self.assertEqual(task_doc["output"]["precise_gradients"][0], [0.0090906486788787, 0.016150932052898, 0.0054568671405536])
self.assertEqual(task_doc["output"]["precise_gradients"][-1], [0.0014495621906601, -0.0018570062958895, 0.0012478282193499])
os.remove(os.path.join(my_calc_dir, "task.json"))

def test_parse_grad_131(self):
my_calc_dir = os.path.join(module_dir, "..", "..", "test_files","tmqm_grad_pcm")
ft = QChemToDb(calc_dir=my_calc_dir, parse_grad_file=True)
ft.run_task({})
task_doc = loadfn(os.path.join(my_calc_dir,"task.json"))
self.assertEqual(task_doc["output"]["final_energy"], -2791.8404057999)
self.assertEqual(len(task_doc["output"]["precise_gradients"]), 25)
self.assertEqual(task_doc["output"]["precise_gradients"][0], [-2.7425178677332305e-05, 1.8017443772144412e-06, -2.3689773769176685e-06])
self.assertEqual(task_doc["output"]["precise_gradients"][-1], [0.0028753270363098644, -0.000392640066359285, 0.004405091870637312])
os.remove(os.path.join(my_calc_dir, "task.json"))

def test_parse_grad_bad(self):
my_calc_dir = os.path.join(module_dir, "..", "..", "test_files","parse_grad_bad")
ft = QChemToDb(calc_dir=my_calc_dir, parse_grad_file=True)
ft.run_task({})
task_doc = loadfn(os.path.join(my_calc_dir,"task.json"))
self.assertEqual(task_doc["warnings"]["grad_file_missing"], True)
os.remove(os.path.join(my_calc_dir, "task.json"))


class TestParseOutputQChem_hess(AtomateTest):
def setUp(self, lpad=False):
os.makedirs(os.path.join(module_dir, "..", "..", "test_files", "freq_save_hess", "scratch"))
shutil.copyfile(
os.path.join(module_dir, "..", "..", "test_files", "freq_save_hess", "BUP_scratch", "132.0"),
os.path.join(module_dir, "..", "..", "test_files", "freq_save_hess", "scratch", "132.0"),
)
shutil.copyfile(
os.path.join(module_dir, "..", "..", "test_files", "freq_save_hess", "BUP_scratch", "HESS"),
os.path.join(module_dir, "..", "..", "test_files", "freq_save_hess", "scratch", "HESS"),
)
super().setUp(lpad=False)

def test_parse_hess(self):
my_calc_dir = os.path.join(module_dir, "..", "..", "test_files","freq_save_hess")
ft = QChemToDb(calc_dir=my_calc_dir, parse_hess_file=True)
ft.run_task({})
task_doc = loadfn(os.path.join(my_calc_dir,"task.json"))
self.assertEqual(task_doc["output"]["final_energy"], -151.3244603665)
self.assertEqual(task_doc["output"]["hess_data"]["scratch/132.0"][0],0.12636293260949633)
self.assertEqual(task_doc["output"]["hess_data"]["scratch/132.0"][-2],-0.2025032138024329)
self.assertEqual(task_doc["output"]["hess_data"]["scratch/HESS"][-2], ' -0.175476533300377 -0.202503213802433 0.205623571433770 \n')
os.remove(os.path.join(my_calc_dir, "task.json"))

def tearDown(self):
pass

if __name__ == "__main__":
unittest.main()
12 changes: 8 additions & 4 deletions atomate/qchem/firetasks/tests/test_run_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,10 +543,14 @@ def test_RunQChemFake(self):
).data
this_out = QCOutput("mol.qout").data
for key in ref_out:
try:
self.assertEqual(ref_out[key], this_out[key])
except ValueError:
np.testing.assert_array_equal(ref_out[key], this_out[key])
if key == "dipoles":
self.assertEqual(ref_out[key]["total"], this_out[key]["total"])
np.testing.assert_array_equal(ref_out[key]["dipole"], this_out[key]["dipole"])
else:
try:
self.assertEqual(ref_out[key], this_out[key])
except ValueError:
np.testing.assert_array_equal(ref_out[key], this_out[key])
for filename in os.listdir(
os.path.join(module_dir, "..", "..", "test_files", "real_run")
):
Expand Down
Loading