-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdf_to_pdb.py
More file actions
113 lines (88 loc) · 3.83 KB
/
df_to_pdb.py
File metadata and controls
113 lines (88 loc) · 3.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
from Bio.PDB import PDBIO, Structure, Model, Chain, Residue, Atom
from Bio.PDB.Polypeptide import is_aa
import numpy as np
"""
convert a dataframe containing atom_infos into a pdb file
"""
def convert_dataframe_to_pdb(data, output_pdb_file):
"""
Converts a DataFrame containing atom information back into a PDB file.
:param data: DataFrame with columns `atom_name`, `resname`, `residue`, `chain`, `x`, `y`, `z`, `element`.
:param output_pdb_file: Path to save the output PDB file.
"""
# Create a new structure
structure = Structure.Structure("converted_structure")
# Group data by chain for hierarchical reconstruction
grouped = data.groupby("chain")
# Create a single model
model = Model.Model(0)
structure.add(model)
for chain_id, chain_data in grouped:
chain = Chain.Chain(chain_id)
model.add(chain)
for res_id, res_data in chain_data.groupby("residue"):
resname = res_data["resname"].iloc[0]
hetfield = " " if is_aa(resname, standard=True) else "H"
icode = " " # Insertion code
residue = Residue.Residue((hetfield, res_id, icode), resname, chain_id)
chain.add(residue)
for _, atom_data in res_data.iterrows():
atom_name = atom_data["atom_name"]
coord = np.array([atom_data["x"], atom_data["y"], atom_data["z"]], dtype=float)
bfactor = 0.0 # Default B-factor
occupancy = 1.0 # Default occupancy
element = atom_data["element"]
# Create an atom and add it to the residue
atom = Atom.Atom(atom_name, coord, bfactor, occupancy, " ", atom_name, element)
residue.add(atom)
# Save the structure to a PDB file
io = PDBIO()
io.set_structure(structure)
io.save(output_pdb_file)
def df_to_pdb(df, output_file, model_num=1):
"""
Convert DataFrame to PDB format and write to file.
Parameters:
-----------
df : pandas.DataFrame
DataFrame containing atomic coordinates with columns:
atom_name, resname, residue, type, chain, x, y, z
output_file : str
Path to output PDB file
model_num : int
Model number to use in PDB file
"""
with open(output_file, 'w') as f:
# Write header
f.write(f"MODEL {model_num}\n")
# Write atomic coordinates
atom_num = 1
for idx, row in df.iterrows():
# Format atom name with proper spacing
atom_name = row['atom_name'].ljust(4)
if len(atom_name) < 4:
atom_name = ' ' + atom_name
# Format residue name with proper spacing
resname = row['resname'].rjust(3)
# Create PDB line in standard format
# ATOM/HETATM num atomname resname chain resnum x y z occupancy temp_factor element
line = (f"{'ATOM':6s}" # Record type
f"{atom_num:5d}" # Atom serial number
f" {atom_name:<4s}" # Atom name
f" {resname:3s}" # Residue name
f" {row['chain']:1s}" # Chain identifier
f"{int(row['residue']):4d}" # Residue sequence number
f" " # Code for insertion of residues
f"{row['x']:8.3f}" # X coordinate
f"{row['y']:8.3f}" # Y coordinate
f"{row['z']:8.3f}" # Z coordinate
f" 1.00" # Occupancy
f" 0.00" # Temperature factor
f" " # Blank space
f"{row['atom_name'][0]:>2s}" # Element symbol
f"\n")
f.write(line)
atom_num += 1
# Write footer
f.write("ENDMDL\n")
f.write("END\n")