Context Navigation

Back to Ticket #4342

Ticket #4342: ligand_utils.py

File ligand_utils.py, 11.8 KB (added by Tristan Croll, 5 years ago)
Added by email2trac

Line
1	def recluster_ligands(session, model):
2	from chimerax.atomic import Residue, Residues, concatenate
3	m = model
4	polymer = m.residues[m.residues.polymer_types!=Residue.PT_NONE]
5	non_polymer = m.residues[m.residues.polymer_types==Residue.PT_NONE]
6	# Ligands bound to polymeric residues should keep their existing chain IDs
7	unbound, bound_map = sort_bound_and_unbound_ligands(non_polymer)
8	chain_map, unclassified = cluster_unbound_ligands(m, unbound)
9	non_polymer.chain_ids = 'Xtmp'
10	m.renumber_residues(non_polymer, 0)
11	assign_bound_ligand_chain_ids_and_residue_numbers(m, bound_map)
12	assign_unbound_residues(m, chain_map, unclassified)
13
14	def assign_unbound_residues(m, chain_map, unclassified):
15	from chimerax.atomic import Residue, concatenate
16	for cid, residues in chain_map.items():
17	first_ligand_number = next_available_ligand_number(m, cid)
18	residues.chain_ids = cid+'tmp'
19	residues = concatenate([residues[residues.names!='HOH'], residues[residues.names=='HOH']])
20	m.renumber_residues(residues, first_ligand_number)
21	residues.chain_ids=cid
22	if len(unclassified):
23	# Try reclassifying with a more permissive cutoff, now that all the other ligands
24	# are assigned to chains
25	chain_map, unclassified = cluster_unbound_ligands(m, unclassified, cutoff=8)
26	assign_unbound_residues(m, chain_map, [])
27	if len(unclassified):
28	session = m.session
29	warn_str = ('{} residues could not be automatically assigned to chains. '
30	'These have been given the chain ID UNK.').format(len(unclassified))
31	session.logger.warning(warn_str)
32	unclassified.chain_ids='UNK'
33	m.renumber_residues(unclassified, 1)
34
35	def find_duplicate_residues(m):
36	seen=set()
37	duplicates=[]
38	for r in reversed(m.residues):
39	sig = (r.chain_id, r.number)
40	if sig in seen:
41	duplicates.append(r)
42	seen.add(sig)
43	return duplicates
44
45
46
47	_metal_weight=5
48	_carbon_weight=0.5
49
50	def cluster_unbound_ligands(model, unbound, cutoff=5):
51	from chimerax.geometry import find_close_points
52	from chimerax.atomic import Residue, Residues
53	from collections import defaultdict
54	import numpy
55	m = model
56	chain_ids = m.residues.unique_chain_ids
57	other_residues = m.residues.subtract(unbound)
58	#polymeric = m.residues[m.residues.polymer_types!=Residue.PT_NONE]
59	ligand_atoms = unbound.atoms[unbound.atoms.element_names != 'H']
60	chain_map = {}
61	for cid in chain_ids:
62	cres = other_residues[other_residues.chain_ids==cid]
63	catoms = cres.atoms[cres.atoms.element_names!='H']
64	ci, li = find_close_points(catoms.coords, ligand_atoms.coords, cutoff)
65	close_ligand_atoms = ligand_atoms[li]
66	weights = numpy.ones(len(close_ligand_atoms), numpy.double)
67	weights[close_ligand_atoms.element_names == 'C'] = _carbon_weight
68	weights[close_ligand_atoms.elements.is_metal] = _metal_weight
69	chain_map[cid] = Weighted_Counter([a.residue for a in close_ligand_atoms], weights)
70	unclassified = []
71	closest_chain_map = defaultdict(list)
72	for r in unbound:
73	max_atoms = 0
74	closest = None
75	for cid in chain_ids:
76	close = chain_map[cid].get(r, None)
77	if close is not None:
78	if close > max_atoms:
79	closest = cid
80	max_atoms = close
81	if closest is not None:
82	closest_chain_map[closest].append(r)
83	else:
84	unclassified.append(r)
85	return {cid: Residues(residues) for cid, residues in closest_chain_map.items()}, Residues(unclassified)
86
87	def create_merged_residue(residues, base_residue=None, new_name=None):
88	if base_residue is None:
89	base_residue = residues[0]
90	if base_residue not in residues:
91	raise TypeError('base_residue must be a member of residues!')
92	if new_name is None:
93	new_name = base_residue.name
94	from chimerax.atomic import Residues
95	from chimerax.atomic.struct_edit import add_atom, add_bond
96	residues = Residues(residues)
97	seen = set()
98	for r in residues:
99	for n in r.neighbors:
100	seen.add(n)
101	if not all([r in seen for r in residues]):
102	raise TypeError('All residues must be connected through covalent bonds!')
103	atoms = residues.atoms
104	other_residues = residues.subtract(Residues([base_residue]))
105	bonds = atoms.intra_bonds
106	external_bonds = []
107	for r in seen:
108	if r not in other_residues:
109	for n in r.neighbors:
110	if n in residues:
111	external_bonds.extend(r.bonds_between(n))
112	next_atom_number = highest_atom_number(base_residue.atoms)+1
113
114	atom_map = {}
115	other_atoms = other_residues.atoms
116	# Do heavy atoms first, so we can assign hydrogen names based on them
117	other_heavy_atoms = other_atoms[other_atoms.element_names!='H']
118	for a in other_heavy_atoms:
119	name = a.element.name + str(next_atom_number)
120	next_atom_number += 1
121	atom_map[a] = add_atom(name, a.element, base_residue, a.coord, occupancy=a.occupancy, bfactor=a.bfactor)
122	# Add the hydrogens
123	for a, na in atom_map.items():
124	base_number = na.name[1:]
125	i=1
126	for n in a.neighbors:
127	if n.element.name=='H':
128	name = 'H'+base_number+str(i)
129	i += 1
130	add_atom(name, n.element, base_residue, n.coord, bonded_to=na, occupancy=n.occupancy, bfactor=n.bfactor)
131	# Add all internal bonds
132	for a, na in atom_map.items():
133	for n in a.neighbors:
134	nn = atom_map.get(n, None)
135	if nn is not None and nn not in na.neighbors:
136	add_bond(na, nn)
137	# Now we just need to add the bonds between base_residue and the merged portion, and any
138	# other bonds between the merged portion and the wider model. To avoid valence errors we'll need
139	# to first delete the existing bonds.
140	for b in external_bonds:
141	atoms = b.atoms
142	if atoms[1] in atom_map.keys():
143	atoms = atoms[::-1]
144	na = atom_map[atoms[0]]
145	a = atoms[1]
146	b.delete()
147	add_bond(na, a)
148	other_residues.delete()
149	base_residue.name=new_name
150
151
152
153
154
155	def highest_atom_number(atoms):
156	import re
157	atoms = atoms[atoms.element_names!='H']
158	highest=0
159	for a in atoms:
160	name = a.name
161	number = re.findall(r'\d+', name)
162	if len(number):
163	num = int(number[0])
164	if num > highest:
165	highest=num
166	return highest
167
168
169
170
171
172
173
174
175	class Weighted_Counter(dict):
176	def __init__(self, elements, weights):
177	super().__init__(self)
178	for e, w in zip(elements, weights):
179	v = self.get(e, 0)
180	v += w
181	self[e] = v
182
183	def next_available_ligand_number(m, chain_id):
184	from chimerax.atomic import Residue
185	cres = m.residues[m.residues.chain_ids==chain_id]
186	lres = cres[cres.polymer_types==Residue.PT_NONE]
187	if len(lres):
188	return max(lres.numbers)+1
189	pres = cres[cres.polymer_types!=Residue.PT_NONE]
190	if len(pres):
191	max_polymeric_residue_number = max(pres.numbers)
192	else:
193	return 0
194	first_ligand_number = round(max_polymeric_residue_number+1000,-3)
195	if first_ligand_number - max_polymeric_residue_number < 100:
196	first_ligand_number += 1000
197	return first_ligand_number
198
199	known_sugars = ["NGC","NGA","RM4","FCB","GLC","GCS","GTR","GAL","BDR","RIB","BDF","BGC","BXX","XYZ","FUL","FUB","Z6H","PSV","A2G","LDY","RIP","SHD","NDG","ARB","SOE","SLB","BM3","LFR","Z6J","GIV","PA1","ABE","AHR","XXR","ARA","AFD","ADA","IDR","MAN","RAM","32O","NAG","GUP","T6T","G6D","FUC","Z9N","BMA","XYP","FCA","BDP","TYV","BM7","GCU","GXL","XYS","HSY","LXC","FRU","WOO","ALL","0MK","SIA","BXY","64K","GL0","GLA"]
200	def assign_bound_ligand_chain_ids_and_residue_numbers(m, bound_map):
201	# The wwPDB steering committee has dictated that for protein-linked glycans,
202	# the following rules apply:
203	# - if the modelled portion of the glycan is a single residue, it should have
204	# the same chain ID as the protein.
205	# - if more than one residue, it should have a unique chain ID.
206	from chimerax.atomic import Residues, Residue, concatenate
207	import numpy
208	for cid, bound_residues in bound_map.items():
209	first_ligand_number = next_available_ligand_number(m, cid)
210	new_glycan_cid = []
211	assign_to_chain = []
212	groups = independent_groupings(bound_residues)
213	for g in groups:
214	if len(g)==1:
215	assign_to_chain.append(g)
216	elif any (r.name in known_sugars for r in g):
217	new_glycan_cid.append(g)
218	else:
219	assign_to_chain.append(g)
220	new_glycan_cid = list(sorted(new_glycan_cid, key=lambda g:linked_polymer_residue(g).number))
221	assign_to_chain = list(sorted(assign_to_chain, key=lambda g:linked_polymer_residue(g).number))
222	for i,g in enumerate(new_glycan_cid):
223	gid = cid+'glyc'+str(i)
224	residues = sort_glycan_residues(g)
225	residues.chain_ids=gid
226	m.renumber_residues(residues, 1)
227	if len(assign_to_chain):
228	assign_to_chain = concatenate([Residues(g) for g in assign_to_chain])
229	assign_to_chain.chain_ids = 'XXtmp'
230	m.renumber_residues(assign_to_chain, first_ligand_number)
231	assign_to_chain.chain_ids = cid
232
233
234	def sort_glycan_residues(residues):
235	from chimerax.atomic import Residues
236	polymer_stem_res = linked_polymer_residue(residues)
237	for r in polymer_stem_res.neighbors:
238	if r in residues:
239	break
240	order = [r]
241	def _sort_walk(r, residues, order):
242	bonds = [r.bonds_between(n)[0] for n in r.neighbors if n in residues and n not in order]
243	ordered_bonds = []
244	for b in bonds:
245	atoms = b.atoms
246	if atoms[0].residue == r:
247	ordered_bonds.append(atoms)
248	else:
249	ordered_bonds.append(atoms[::-1])
250	ordered_bonds = list(sorted(ordered_bonds, key=lambda b:
251	(int(b[0].name[-1]))))
252	for b in ordered_bonds:
253	next_r = b[1].residue
254	order.append(next_r)
255	_sort_walk(next_r, residues, order)
256	_sort_walk(r, residues, order)
257	return Residues(order)
258
259
260
261
262	def linked_polymer_residue(residue_group):
263	from chimerax.atomic import Residue
264	for r in residue_group:
265	for n in r.neighbors:
266	if n.polymer_type != Residue.PT_NONE:
267	return n
268	return None
269
270	def independent_groupings(residues):
271	residues = list(residues)
272	groups = []
273	while len(residues) > 0:
274	r = residues[0]
275	connected = set([r])
276	walk_tree(r, residues, connected)
277	groups.append(connected)
278	for r in connected:
279	residues.remove(r)
280
281	return groups
282
283	def walk_tree(r, residues, connected):
284	for n in r.neighbors:
285	if n in connected or n not in residues:
286	continue
287	connected.add(n)
288	walk_tree(n, residues, connected)
289
290	def sort_bound_and_unbound_ligands(residues):
291	unbound = []
292	from collections import defaultdict
293	bound_map = defaultdict(set)
294	for r in residues:
295	if not len(r.neighbors):
296	unbound.append(r)
297	cid = bound_to_polymer(r)
298	if cid is not None:
299	bound_map[cid].add(r)
300	else:
301	unbound.append(r)
302	from chimerax.atomic import Residues
303	return (Residues(unbound), {cid: Residues(rset) for cid, rset in bound_map.items()})
304
305	def bound_to_polymer(residue, seen=None):
306	from chimerax.atomic import Residue
307	if seen is None:
308	seen=set()
309	seen.add(residue)
310	for n in residue.neighbors:
311	if n in seen:
312	continue
313	if n.polymer_type!=Residue.PT_NONE:
314	return n.chain_id
315	seen.add(n)
316	return bound_to_polymer(n, seen)
317	return None
318
319

Download in other formats:

Original Format