Ticket #4342: ligand_utils.py

File ligand_utils.py, 11.8 KB (added by Tristan Croll, 5 years ago)

Added by email2trac

Line 
1def recluster_ligands(session, model):
2 from chimerax.atomic import Residue, Residues, concatenate
3 m = model
4 polymer = m.residues[m.residues.polymer_types!=Residue.PT_NONE]
5 non_polymer = m.residues[m.residues.polymer_types==Residue.PT_NONE]
6 # Ligands bound to polymeric residues should keep their existing chain IDs
7 unbound, bound_map = sort_bound_and_unbound_ligands(non_polymer)
8 chain_map, unclassified = cluster_unbound_ligands(m, unbound)
9 non_polymer.chain_ids = 'Xtmp'
10 m.renumber_residues(non_polymer, 0)
11 assign_bound_ligand_chain_ids_and_residue_numbers(m, bound_map)
12 assign_unbound_residues(m, chain_map, unclassified)
13
14def assign_unbound_residues(m, chain_map, unclassified):
15 from chimerax.atomic import Residue, concatenate
16 for cid, residues in chain_map.items():
17 first_ligand_number = next_available_ligand_number(m, cid)
18 residues.chain_ids = cid+'tmp'
19 residues = concatenate([residues[residues.names!='HOH'], residues[residues.names=='HOH']])
20 m.renumber_residues(residues, first_ligand_number)
21 residues.chain_ids=cid
22 if len(unclassified):
23 # Try reclassifying with a more permissive cutoff, now that all the other ligands
24 # are assigned to chains
25 chain_map, unclassified = cluster_unbound_ligands(m, unclassified, cutoff=8)
26 assign_unbound_residues(m, chain_map, [])
27 if len(unclassified):
28 session = m.session
29 warn_str = ('{} residues could not be automatically assigned to chains. '
30 'These have been given the chain ID UNK.').format(len(unclassified))
31 session.logger.warning(warn_str)
32 unclassified.chain_ids='UNK'
33 m.renumber_residues(unclassified, 1)
34
35def find_duplicate_residues(m):
36 seen=set()
37 duplicates=[]
38 for r in reversed(m.residues):
39 sig = (r.chain_id, r.number)
40 if sig in seen:
41 duplicates.append(r)
42 seen.add(sig)
43 return duplicates
44
45
46
47_metal_weight=5
48_carbon_weight=0.5
49
50def cluster_unbound_ligands(model, unbound, cutoff=5):
51 from chimerax.geometry import find_close_points
52 from chimerax.atomic import Residue, Residues
53 from collections import defaultdict
54 import numpy
55 m = model
56 chain_ids = m.residues.unique_chain_ids
57 other_residues = m.residues.subtract(unbound)
58 #polymeric = m.residues[m.residues.polymer_types!=Residue.PT_NONE]
59 ligand_atoms = unbound.atoms[unbound.atoms.element_names != 'H']
60 chain_map = {}
61 for cid in chain_ids:
62 cres = other_residues[other_residues.chain_ids==cid]
63 catoms = cres.atoms[cres.atoms.element_names!='H']
64 ci, li = find_close_points(catoms.coords, ligand_atoms.coords, cutoff)
65 close_ligand_atoms = ligand_atoms[li]
66 weights = numpy.ones(len(close_ligand_atoms), numpy.double)
67 weights[close_ligand_atoms.element_names == 'C'] = _carbon_weight
68 weights[close_ligand_atoms.elements.is_metal] = _metal_weight
69 chain_map[cid] = Weighted_Counter([a.residue for a in close_ligand_atoms], weights)
70 unclassified = []
71 closest_chain_map = defaultdict(list)
72 for r in unbound:
73 max_atoms = 0
74 closest = None
75 for cid in chain_ids:
76 close = chain_map[cid].get(r, None)
77 if close is not None:
78 if close > max_atoms:
79 closest = cid
80 max_atoms = close
81 if closest is not None:
82 closest_chain_map[closest].append(r)
83 else:
84 unclassified.append(r)
85 return {cid: Residues(residues) for cid, residues in closest_chain_map.items()}, Residues(unclassified)
86
87def create_merged_residue(residues, base_residue=None, new_name=None):
88 if base_residue is None:
89 base_residue = residues[0]
90 if base_residue not in residues:
91 raise TypeError('base_residue must be a member of residues!')
92 if new_name is None:
93 new_name = base_residue.name
94 from chimerax.atomic import Residues
95 from chimerax.atomic.struct_edit import add_atom, add_bond
96 residues = Residues(residues)
97 seen = set()
98 for r in residues:
99 for n in r.neighbors:
100 seen.add(n)
101 if not all([r in seen for r in residues]):
102 raise TypeError('All residues must be connected through covalent bonds!')
103 atoms = residues.atoms
104 other_residues = residues.subtract(Residues([base_residue]))
105 bonds = atoms.intra_bonds
106 external_bonds = []
107 for r in seen:
108 if r not in other_residues:
109 for n in r.neighbors:
110 if n in residues:
111 external_bonds.extend(r.bonds_between(n))
112 next_atom_number = highest_atom_number(base_residue.atoms)+1
113
114 atom_map = {}
115 other_atoms = other_residues.atoms
116 # Do heavy atoms first, so we can assign hydrogen names based on them
117 other_heavy_atoms = other_atoms[other_atoms.element_names!='H']
118 for a in other_heavy_atoms:
119 name = a.element.name + str(next_atom_number)
120 next_atom_number += 1
121 atom_map[a] = add_atom(name, a.element, base_residue, a.coord, occupancy=a.occupancy, bfactor=a.bfactor)
122 # Add the hydrogens
123 for a, na in atom_map.items():
124 base_number = na.name[1:]
125 i=1
126 for n in a.neighbors:
127 if n.element.name=='H':
128 name = 'H'+base_number+str(i)
129 i += 1
130 add_atom(name, n.element, base_residue, n.coord, bonded_to=na, occupancy=n.occupancy, bfactor=n.bfactor)
131 # Add all internal bonds
132 for a, na in atom_map.items():
133 for n in a.neighbors:
134 nn = atom_map.get(n, None)
135 if nn is not None and nn not in na.neighbors:
136 add_bond(na, nn)
137 # Now we just need to add the bonds between base_residue and the merged portion, and any
138 # other bonds between the merged portion and the wider model. To avoid valence errors we'll need
139 # to first delete the existing bonds.
140 for b in external_bonds:
141 atoms = b.atoms
142 if atoms[1] in atom_map.keys():
143 atoms = atoms[::-1]
144 na = atom_map[atoms[0]]
145 a = atoms[1]
146 b.delete()
147 add_bond(na, a)
148 other_residues.delete()
149 base_residue.name=new_name
150
151
152
153
154
155def highest_atom_number(atoms):
156 import re
157 atoms = atoms[atoms.element_names!='H']
158 highest=0
159 for a in atoms:
160 name = a.name
161 number = re.findall(r'\d+', name)
162 if len(number):
163 num = int(number[0])
164 if num > highest:
165 highest=num
166 return highest
167
168
169
170
171
172
173
174
175class Weighted_Counter(dict):
176 def __init__(self, elements, weights):
177 super().__init__(self)
178 for e, w in zip(elements, weights):
179 v = self.get(e, 0)
180 v += w
181 self[e] = v
182
183def next_available_ligand_number(m, chain_id):
184 from chimerax.atomic import Residue
185 cres = m.residues[m.residues.chain_ids==chain_id]
186 lres = cres[cres.polymer_types==Residue.PT_NONE]
187 if len(lres):
188 return max(lres.numbers)+1
189 pres = cres[cres.polymer_types!=Residue.PT_NONE]
190 if len(pres):
191 max_polymeric_residue_number = max(pres.numbers)
192 else:
193 return 0
194 first_ligand_number = round(max_polymeric_residue_number+1000,-3)
195 if first_ligand_number - max_polymeric_residue_number < 100:
196 first_ligand_number += 1000
197 return first_ligand_number
198
199known_sugars = ["NGC","NGA","RM4","FCB","GLC","GCS","GTR","GAL","BDR","RIB","BDF","BGC","BXX","XYZ","FUL","FUB","Z6H","PSV","A2G","LDY","RIP","SHD","NDG","ARB","SOE","SLB","BM3","LFR","Z6J","GIV","PA1","ABE","AHR","XXR","ARA","AFD","ADA","IDR","MAN","RAM","32O","NAG","GUP","T6T","G6D","FUC","Z9N","BMA","XYP","FCA","BDP","TYV","BM7","GCU","GXL","XYS","HSY","LXC","FRU","WOO","ALL","0MK","SIA","BXY","64K","GL0","GLA"]
200def assign_bound_ligand_chain_ids_and_residue_numbers(m, bound_map):
201 # The wwPDB steering committee has dictated that for protein-linked glycans,
202 # the following rules apply:
203 # - if the modelled portion of the glycan is a single residue, it should have
204 # the same chain ID as the protein.
205 # - if more than one residue, it should have a unique chain ID.
206 from chimerax.atomic import Residues, Residue, concatenate
207 import numpy
208 for cid, bound_residues in bound_map.items():
209 first_ligand_number = next_available_ligand_number(m, cid)
210 new_glycan_cid = []
211 assign_to_chain = []
212 groups = independent_groupings(bound_residues)
213 for g in groups:
214 if len(g)==1:
215 assign_to_chain.append(g)
216 elif any (r.name in known_sugars for r in g):
217 new_glycan_cid.append(g)
218 else:
219 assign_to_chain.append(g)
220 new_glycan_cid = list(sorted(new_glycan_cid, key=lambda g:linked_polymer_residue(g).number))
221 assign_to_chain = list(sorted(assign_to_chain, key=lambda g:linked_polymer_residue(g).number))
222 for i,g in enumerate(new_glycan_cid):
223 gid = cid+'glyc'+str(i)
224 residues = sort_glycan_residues(g)
225 residues.chain_ids=gid
226 m.renumber_residues(residues, 1)
227 if len(assign_to_chain):
228 assign_to_chain = concatenate([Residues(g) for g in assign_to_chain])
229 assign_to_chain.chain_ids = 'XXtmp'
230 m.renumber_residues(assign_to_chain, first_ligand_number)
231 assign_to_chain.chain_ids = cid
232
233
234def sort_glycan_residues(residues):
235 from chimerax.atomic import Residues
236 polymer_stem_res = linked_polymer_residue(residues)
237 for r in polymer_stem_res.neighbors:
238 if r in residues:
239 break
240 order = [r]
241 def _sort_walk(r, residues, order):
242 bonds = [r.bonds_between(n)[0] for n in r.neighbors if n in residues and n not in order]
243 ordered_bonds = []
244 for b in bonds:
245 atoms = b.atoms
246 if atoms[0].residue == r:
247 ordered_bonds.append(atoms)
248 else:
249 ordered_bonds.append(atoms[::-1])
250 ordered_bonds = list(sorted(ordered_bonds, key=lambda b:
251 (int(b[0].name[-1]))))
252 for b in ordered_bonds:
253 next_r = b[1].residue
254 order.append(next_r)
255 _sort_walk(next_r, residues, order)
256 _sort_walk(r, residues, order)
257 return Residues(order)
258
259
260
261
262def linked_polymer_residue(residue_group):
263 from chimerax.atomic import Residue
264 for r in residue_group:
265 for n in r.neighbors:
266 if n.polymer_type != Residue.PT_NONE:
267 return n
268 return None
269
270def independent_groupings(residues):
271 residues = list(residues)
272 groups = []
273 while len(residues) > 0:
274 r = residues[0]
275 connected = set([r])
276 walk_tree(r, residues, connected)
277 groups.append(connected)
278 for r in connected:
279 residues.remove(r)
280
281 return groups
282
283def walk_tree(r, residues, connected):
284 for n in r.neighbors:
285 if n in connected or n not in residues:
286 continue
287 connected.add(n)
288 walk_tree(n, residues, connected)
289
290def sort_bound_and_unbound_ligands(residues):
291 unbound = []
292 from collections import defaultdict
293 bound_map = defaultdict(set)
294 for r in residues:
295 if not len(r.neighbors):
296 unbound.append(r)
297 cid = bound_to_polymer(r)
298 if cid is not None:
299 bound_map[cid].add(r)
300 else:
301 unbound.append(r)
302 from chimerax.atomic import Residues
303 return (Residues(unbound), {cid: Residues(rset) for cid, rset in bound_map.items()})
304
305def bound_to_polymer(residue, seen=None):
306 from chimerax.atomic import Residue
307 if seen is None:
308 seen=set()
309 seen.add(residue)
310 for n in residue.neighbors:
311 if n in seen:
312 continue
313 if n.polymer_type!=Residue.PT_NONE:
314 return n.chain_id
315 seen.add(n)
316 return bound_to_polymer(n, seen)
317 return None
318
319