Skip to content

Added protein-RNA docking example 1A1T #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38,406 changes: 38,406 additions & 0 deletions tutorials/0.9.3/rna_docking/1a1t.pdb

Large diffs are not rendered by default.

867 changes: 867 additions & 0 deletions tutorials/0.9.3/rna_docking/data/1A1T_A.pdb

Large diffs are not rendered by default.

651 changes: 651 additions & 0 deletions tutorials/0.9.3/rna_docking/data/1A1T_B.pdb

Large diffs are not rendered by default.

85 changes: 85 additions & 0 deletions tutorials/0.9.3/rna_docking/data/get_top_structures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#!/usr/bin/env python3
import os, re
import argparse
import pandas as pd
from io import StringIO

def parseResultsList(filepath:str) -> pd.DataFrame:
"""Parse solutions.list file into pandas data frame"""

with open(filepath) as file:
filelines = file.readlines()

result = "".join(map(
lambda line: "\t".join(filter(
lambda x: x,
(
line.replace(
pat[0],
pat[0].replace(" ", "")
) if (pat:=re.findall("\((.*?)\)", line)) else line
).split(" ")
)),
filelines
))

df = pd.read_csv(
StringIO(result),
sep="\t"
)

df["Coordinates"] = (
df["Coordinates"]
.apply(
lambda x: tuple(
float(el)
for el in x[1:-1].split(",")
)
)
)

return df.sort_values("Scoring", ascending=False)

if __name__ =="__main__":

parser = argparse.ArgumentParser()
parser.add_argument("input_solutions_file")
parser.add_argument("-n", default=10)
args = parser.parse_args()

solutionsFilePath = os.path.abspath(
args.input_solutions_file
)
parentPath = os.path.dirname(
solutionsFilePath
)
finalResultsPath = os.path.join(
parentPath,
"top_structures"
)
os.mkdir(finalResultsPath)

df = parseResultsList(solutionsFilePath).head(n=args.n).reset_index(drop=True)

top_structures = df.apply(
lambda row: os.path.join(
parentPath,
"swarm_%s" % row["Swarm"],
row["PDB"]
),
axis=1
).reset_index()

top_structures.apply(
lambda row: os.rename(
row[0],
os.path.join(
finalResultsPath,
"%s_%s" % (
row["index"],
os.path.basename(row[0])
)
)
),
axis=1
)
74 changes: 74 additions & 0 deletions tutorials/0.9.3/rna_docking/data/purge_prime_H.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/usr/bin/env python3
import os
import argparse
from lightdock.pdbutil.PDBIO import read_atom_line

def _format_atom_name(atom_name):
"""Format ATOM name with correct padding"""
if len(atom_name) == 4:
return atom_name
else:
return " %s" % atom_name


def write_atom_line(atom, output):
"""Writes a PDB file format line to output."""
if atom.__class__.__name__ == "HetAtom":
atom_type = "HETATM"
else:
atom_type = "ATOM "
line = "%6s%5d %-4s%-1s%3s%2s%4d%1s %8.3f%8.3f%8.3f%6.2f%6.2f%12s\n" % (
atom_type,
atom.number,
_format_atom_name(atom.name),
atom.alternative,
atom.residue_name,
atom.chain_id,
atom.residue_number,
atom.residue_insertion,
atom.x,
atom.y,
atom.z,
atom.occupancy,
atom.b_factor,
atom.element,
)
output.write(line)

def _add_RNA_tag(atom):
"""Adds R tag to RNA residues"""
if not atom.residue_name.startswith("R"):
atom.residue_name = "R" + atom.residue_name

return atom

def _remove_RNA_tag(atom):
"""Removes R tag to RNA residues"""
if atom.residue_name.startswith("R"):
atom.residue_name = atom.residue_name[1:]

return atom


if __name__ == "__main__":

parser = argparse.ArgumentParser()
parser.add_argument("input_pdb_file")
parser.add_argument("output_pdb_file")
args = parser.parse_args()

with open(args.input_pdb_file) as ih:
with open(args.output_pdb_file, 'w') as oh:

for line in ih:
line = line.rstrip(os.linesep)

if line.startswith("ATOM "):
atom = read_atom_line(line)

if atom.name in ("HO'3", "HO'5"):
continue

write_atom_line(atom, oh)
else:
oh.write(line + os.linesep)
91 changes: 91 additions & 0 deletions tutorials/0.9.3/rna_docking/data/reduce_to_amber.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#!/usr/bin/env python3

"""
ATOM 23 H5' DG B 1 -15.347 -3.940 -5.934 1.00 0.00 H new
ATOM 24 H5'' DG B 1 -15.852 -4.851 -7.094 1.00 0.00 H new
ATOM 25 H4' DG B 1 -15.214 -3.222 -8.238 1.00 0.00 H new
ATOM 26 H3' DG B 1 -13.483 -5.075 -8.732 1.00 0.00 H new
ATOM 27 H2' DG B 1 -11.785 -4.224 -7.287 1.00 0.00 H new
ATOM 28 H2'' DG B 1 -11.314 -3.732 -8.690 1.00 0.00 H new
ATOM 29 H1' DG B 1 -12.430 -1.717 -8.491 1.00 0.00 H new
ATOM 30 H8 DG B 1 -10.897 -3.080 -5.447 1.00 0.00 H new
ATOM 31 H1 DG B 1 -10.070 2.951 -6.160 1.00 0.00 H new
ATOM 32 H21 DG B 1 -12.087 2.806 -8.716 1.00 0.00 H new
ATOM 33 H22 DG B 1 -11.235 3.696 -7.879 1.00 0.00 H new
"""

import os
import argparse
from lightdock.scoring.dna.data.amber import atoms_per_residue
from lightdock.pdbutil.PDBIO import read_atom_line


def _format_atom_name(atom_name):
"""Format ATOM name with correct padding"""
if len(atom_name) == 4:
return atom_name
else:
return " %s" % atom_name


def write_atom_line(atom, output):
"""Writes a PDB file format line to output."""
if atom.__class__.__name__ == "HetAtom":
atom_type = "HETATM"
else:
atom_type = "ATOM "
line = "%6s%5d %-4s%-1s%3s%2s%4d%1s %8.3f%8.3f%8.3f%6.2f%6.2f%12s\n" % (
atom_type,
atom.number,
_format_atom_name(atom.name),
atom.alternative,
atom.residue_name,
atom.chain_id,
atom.residue_number,
atom.residue_insertion,
atom.x,
atom.y,
atom.z,
atom.occupancy,
atom.b_factor,
atom.element,
)
output.write(line)


translation = {
"H5'": "H5'1",
"H5''": "H5'2",
"H2'": "H2'1",
"H2''": "H2'2",
"HO2'": "HO'2", # These ones were added by Lucas in November 2022
"HO3'": "HO'3", #
"HO5'": "HO'5" #
}


if __name__ == "__main__":

parser = argparse.ArgumentParser()
parser.add_argument("input_pdb_file")
parser.add_argument("output_pdb_file")
args = parser.parse_args()

with open(args.input_pdb_file) as ih:
with open(args.output_pdb_file, 'w') as oh:
for line in ih:
line = line.rstrip(os.linesep)
if line.startswith("ATOM "):
atom = read_atom_line(line)
if atom.residue_name not in atoms_per_residue:
print(f"Not supported atom: {atom.residue_name}.{atom.name}")
else:
if atom.name not in atoms_per_residue[atom.residue_name] and atom.is_hydrogen():
try:
atom.name = translation[atom.name]
write_atom_line(atom, oh)
except KeyError:
print(f"Atom not found in mapping: {atom.residue_name}.{atom.name}")
else:
write_atom_line(atom, oh)

77 changes: 77 additions & 0 deletions tutorials/0.9.3/rna_docking/data/retag_rna.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/usr/bin/env python3
import os
import argparse
from lightdock.pdbutil.PDBIO import read_atom_line

def _format_atom_name(atom_name):
"""Format ATOM name with correct padding"""
if len(atom_name) == 4:
return atom_name
else:
return " %s" % atom_name


def write_atom_line(atom, output):
"""Writes a PDB file format line to output."""
if atom.__class__.__name__ == "HetAtom":
atom_type = "HETATM"
else:
atom_type = "ATOM "
line = "%6s%5d %-4s%-1s%3s%2s%4d%1s %8.3f%8.3f%8.3f%6.2f%6.2f%12s\n" % (
atom_type,
atom.number,
_format_atom_name(atom.name),
atom.alternative,
atom.residue_name,
atom.chain_id,
atom.residue_number,
atom.residue_insertion,
atom.x,
atom.y,
atom.z,
atom.occupancy,
atom.b_factor,
atom.element,
)
output.write(line)

def _add_RNA_tag(atom):
"""Adds R tag to RNA residues"""
if not atom.residue_name.startswith("R"):
atom.residue_name = "R" + atom.residue_name

return atom

def _remove_RNA_tag(atom):
"""Removes R tag to RNA residues"""
if atom.residue_name.startswith("R"):
atom.residue_name = atom.residue_name[1:]

return atom


if __name__ == "__main__":

parser = argparse.ArgumentParser()
parser.add_argument("input_pdb_file", help="Input pdb file")
parser.add_argument("output_pdb_file", help="Output pdb file")
parser.add_argument("-r", default=False, action=argparse.BooleanOptionalAction, help="Removes `R` tag instead")
args = parser.parse_args()

with open(args.input_pdb_file) as ih:
with open(args.output_pdb_file, 'w') as oh:

for line in ih:
line = line.rstrip(os.linesep)

if line.startswith("ATOM "):
atom = read_atom_line(line)

if args.r:
atom = _remove_RNA_tag(atom)
else:
atom = _add_RNA_tag(atom)

write_atom_line(atom, oh)
else:
oh.write(line + os.linesep)
Binary file added tutorials/0.9.3/rna_docking/data/simulation.tgz
Binary file not shown.
Loading