| 1 | def recluster_ligands(session, model):
|
|---|
| 2 | from chimerax.atomic import Residue, Residues, concatenate
|
|---|
| 3 | m = model
|
|---|
| 4 | polymer = m.residues[m.residues.polymer_types!=Residue.PT_NONE]
|
|---|
| 5 | non_polymer = m.residues[m.residues.polymer_types==Residue.PT_NONE]
|
|---|
| 6 | # Ligands bound to polymeric residues should keep their existing chain IDs
|
|---|
| 7 | unbound, bound_map = sort_bound_and_unbound_ligands(non_polymer)
|
|---|
| 8 | chain_map, unclassified = cluster_unbound_ligands(m, unbound)
|
|---|
| 9 | non_polymer.chain_ids = 'Xtmp'
|
|---|
| 10 | m.renumber_residues(non_polymer, 0)
|
|---|
| 11 | assign_bound_ligand_chain_ids_and_residue_numbers(m, bound_map)
|
|---|
| 12 | assign_unbound_residues(m, chain_map, unclassified)
|
|---|
| 13 |
|
|---|
| 14 | def assign_unbound_residues(m, chain_map, unclassified):
|
|---|
| 15 | from chimerax.atomic import Residue, concatenate
|
|---|
| 16 | for cid, residues in chain_map.items():
|
|---|
| 17 | first_ligand_number = next_available_ligand_number(m, cid)
|
|---|
| 18 | residues.chain_ids = cid+'tmp'
|
|---|
| 19 | residues = concatenate([residues[residues.names!='HOH'], residues[residues.names=='HOH']])
|
|---|
| 20 | m.renumber_residues(residues, first_ligand_number)
|
|---|
| 21 | residues.chain_ids=cid
|
|---|
| 22 | if len(unclassified):
|
|---|
| 23 | # Try reclassifying with a more permissive cutoff, now that all the other ligands
|
|---|
| 24 | # are assigned to chains
|
|---|
| 25 | chain_map, unclassified = cluster_unbound_ligands(m, unclassified, cutoff=8)
|
|---|
| 26 | assign_unbound_residues(m, chain_map, [])
|
|---|
| 27 | if len(unclassified):
|
|---|
| 28 | session = m.session
|
|---|
| 29 | warn_str = ('{} residues could not be automatically assigned to chains. '
|
|---|
| 30 | 'These have been given the chain ID UNK.').format(len(unclassified))
|
|---|
| 31 | session.logger.warning(warn_str)
|
|---|
| 32 | unclassified.chain_ids='UNK'
|
|---|
| 33 | m.renumber_residues(unclassified, 1)
|
|---|
| 34 |
|
|---|
| 35 | def find_duplicate_residues(m):
|
|---|
| 36 | seen=set()
|
|---|
| 37 | duplicates=[]
|
|---|
| 38 | for r in reversed(m.residues):
|
|---|
| 39 | sig = (r.chain_id, r.number)
|
|---|
| 40 | if sig in seen:
|
|---|
| 41 | duplicates.append(r)
|
|---|
| 42 | seen.add(sig)
|
|---|
| 43 | return duplicates
|
|---|
| 44 |
|
|---|
| 45 |
|
|---|
| 46 |
|
|---|
| 47 | _metal_weight=5
|
|---|
| 48 | _carbon_weight=0.5
|
|---|
| 49 |
|
|---|
| 50 | def cluster_unbound_ligands(model, unbound, cutoff=5):
|
|---|
| 51 | from chimerax.geometry import find_close_points
|
|---|
| 52 | from chimerax.atomic import Residue, Residues
|
|---|
| 53 | from collections import defaultdict
|
|---|
| 54 | import numpy
|
|---|
| 55 | m = model
|
|---|
| 56 | chain_ids = m.residues.unique_chain_ids
|
|---|
| 57 | other_residues = m.residues.subtract(unbound)
|
|---|
| 58 | #polymeric = m.residues[m.residues.polymer_types!=Residue.PT_NONE]
|
|---|
| 59 | ligand_atoms = unbound.atoms[unbound.atoms.element_names != 'H']
|
|---|
| 60 | chain_map = {}
|
|---|
| 61 | for cid in chain_ids:
|
|---|
| 62 | cres = other_residues[other_residues.chain_ids==cid]
|
|---|
| 63 | catoms = cres.atoms[cres.atoms.element_names!='H']
|
|---|
| 64 | ci, li = find_close_points(catoms.coords, ligand_atoms.coords, cutoff)
|
|---|
| 65 | close_ligand_atoms = ligand_atoms[li]
|
|---|
| 66 | weights = numpy.ones(len(close_ligand_atoms), numpy.double)
|
|---|
| 67 | weights[close_ligand_atoms.element_names == 'C'] = _carbon_weight
|
|---|
| 68 | weights[close_ligand_atoms.elements.is_metal] = _metal_weight
|
|---|
| 69 | chain_map[cid] = Weighted_Counter([a.residue for a in close_ligand_atoms], weights)
|
|---|
| 70 | unclassified = []
|
|---|
| 71 | closest_chain_map = defaultdict(list)
|
|---|
| 72 | for r in unbound:
|
|---|
| 73 | max_atoms = 0
|
|---|
| 74 | closest = None
|
|---|
| 75 | for cid in chain_ids:
|
|---|
| 76 | close = chain_map[cid].get(r, None)
|
|---|
| 77 | if close is not None:
|
|---|
| 78 | if close > max_atoms:
|
|---|
| 79 | closest = cid
|
|---|
| 80 | max_atoms = close
|
|---|
| 81 | if closest is not None:
|
|---|
| 82 | closest_chain_map[closest].append(r)
|
|---|
| 83 | else:
|
|---|
| 84 | unclassified.append(r)
|
|---|
| 85 | return {cid: Residues(residues) for cid, residues in closest_chain_map.items()}, Residues(unclassified)
|
|---|
| 86 |
|
|---|
| 87 | def create_merged_residue(residues, base_residue=None, new_name=None):
|
|---|
| 88 | if base_residue is None:
|
|---|
| 89 | base_residue = residues[0]
|
|---|
| 90 | if base_residue not in residues:
|
|---|
| 91 | raise TypeError('base_residue must be a member of residues!')
|
|---|
| 92 | if new_name is None:
|
|---|
| 93 | new_name = base_residue.name
|
|---|
| 94 | from chimerax.atomic import Residues
|
|---|
| 95 | from chimerax.atomic.struct_edit import add_atom, add_bond
|
|---|
| 96 | residues = Residues(residues)
|
|---|
| 97 | seen = set()
|
|---|
| 98 | for r in residues:
|
|---|
| 99 | for n in r.neighbors:
|
|---|
| 100 | seen.add(n)
|
|---|
| 101 | if not all([r in seen for r in residues]):
|
|---|
| 102 | raise TypeError('All residues must be connected through covalent bonds!')
|
|---|
| 103 | atoms = residues.atoms
|
|---|
| 104 | other_residues = residues.subtract(Residues([base_residue]))
|
|---|
| 105 | bonds = atoms.intra_bonds
|
|---|
| 106 | external_bonds = []
|
|---|
| 107 | for r in seen:
|
|---|
| 108 | if r not in other_residues:
|
|---|
| 109 | for n in r.neighbors:
|
|---|
| 110 | if n in residues:
|
|---|
| 111 | external_bonds.extend(r.bonds_between(n))
|
|---|
| 112 | next_atom_number = highest_atom_number(base_residue.atoms)+1
|
|---|
| 113 |
|
|---|
| 114 | atom_map = {}
|
|---|
| 115 | other_atoms = other_residues.atoms
|
|---|
| 116 | # Do heavy atoms first, so we can assign hydrogen names based on them
|
|---|
| 117 | other_heavy_atoms = other_atoms[other_atoms.element_names!='H']
|
|---|
| 118 | for a in other_heavy_atoms:
|
|---|
| 119 | name = a.element.name + str(next_atom_number)
|
|---|
| 120 | next_atom_number += 1
|
|---|
| 121 | atom_map[a] = add_atom(name, a.element, base_residue, a.coord, occupancy=a.occupancy, bfactor=a.bfactor)
|
|---|
| 122 | # Add the hydrogens
|
|---|
| 123 | for a, na in atom_map.items():
|
|---|
| 124 | base_number = na.name[1:]
|
|---|
| 125 | i=1
|
|---|
| 126 | for n in a.neighbors:
|
|---|
| 127 | if n.element.name=='H':
|
|---|
| 128 | name = 'H'+base_number+str(i)
|
|---|
| 129 | i += 1
|
|---|
| 130 | add_atom(name, n.element, base_residue, n.coord, bonded_to=na, occupancy=n.occupancy, bfactor=n.bfactor)
|
|---|
| 131 | # Add all internal bonds
|
|---|
| 132 | for a, na in atom_map.items():
|
|---|
| 133 | for n in a.neighbors:
|
|---|
| 134 | nn = atom_map.get(n, None)
|
|---|
| 135 | if nn is not None and nn not in na.neighbors:
|
|---|
| 136 | add_bond(na, nn)
|
|---|
| 137 | # Now we just need to add the bonds between base_residue and the merged portion, and any
|
|---|
| 138 | # other bonds between the merged portion and the wider model. To avoid valence errors we'll need
|
|---|
| 139 | # to first delete the existing bonds.
|
|---|
| 140 | for b in external_bonds:
|
|---|
| 141 | atoms = b.atoms
|
|---|
| 142 | if atoms[1] in atom_map.keys():
|
|---|
| 143 | atoms = atoms[::-1]
|
|---|
| 144 | na = atom_map[atoms[0]]
|
|---|
| 145 | a = atoms[1]
|
|---|
| 146 | b.delete()
|
|---|
| 147 | add_bond(na, a)
|
|---|
| 148 | other_residues.delete()
|
|---|
| 149 | base_residue.name=new_name
|
|---|
| 150 |
|
|---|
| 151 |
|
|---|
| 152 |
|
|---|
| 153 |
|
|---|
| 154 |
|
|---|
| 155 | def highest_atom_number(atoms):
|
|---|
| 156 | import re
|
|---|
| 157 | atoms = atoms[atoms.element_names!='H']
|
|---|
| 158 | highest=0
|
|---|
| 159 | for a in atoms:
|
|---|
| 160 | name = a.name
|
|---|
| 161 | number = re.findall(r'\d+', name)
|
|---|
| 162 | if len(number):
|
|---|
| 163 | num = int(number[0])
|
|---|
| 164 | if num > highest:
|
|---|
| 165 | highest=num
|
|---|
| 166 | return highest
|
|---|
| 167 |
|
|---|
| 168 |
|
|---|
| 169 |
|
|---|
| 170 |
|
|---|
| 171 |
|
|---|
| 172 |
|
|---|
| 173 |
|
|---|
| 174 |
|
|---|
| 175 | class Weighted_Counter(dict):
|
|---|
| 176 | def __init__(self, elements, weights):
|
|---|
| 177 | super().__init__(self)
|
|---|
| 178 | for e, w in zip(elements, weights):
|
|---|
| 179 | v = self.get(e, 0)
|
|---|
| 180 | v += w
|
|---|
| 181 | self[e] = v
|
|---|
| 182 |
|
|---|
| 183 | def next_available_ligand_number(m, chain_id):
|
|---|
| 184 | from chimerax.atomic import Residue
|
|---|
| 185 | cres = m.residues[m.residues.chain_ids==chain_id]
|
|---|
| 186 | lres = cres[cres.polymer_types==Residue.PT_NONE]
|
|---|
| 187 | if len(lres):
|
|---|
| 188 | return max(lres.numbers)+1
|
|---|
| 189 | pres = cres[cres.polymer_types!=Residue.PT_NONE]
|
|---|
| 190 | if len(pres):
|
|---|
| 191 | max_polymeric_residue_number = max(pres.numbers)
|
|---|
| 192 | else:
|
|---|
| 193 | return 0
|
|---|
| 194 | first_ligand_number = round(max_polymeric_residue_number+1000,-3)
|
|---|
| 195 | if first_ligand_number - max_polymeric_residue_number < 100:
|
|---|
| 196 | first_ligand_number += 1000
|
|---|
| 197 | return first_ligand_number
|
|---|
| 198 |
|
|---|
| 199 | known_sugars = ["NGC","NGA","RM4","FCB","GLC","GCS","GTR","GAL","BDR","RIB","BDF","BGC","BXX","XYZ","FUL","FUB","Z6H","PSV","A2G","LDY","RIP","SHD","NDG","ARB","SOE","SLB","BM3","LFR","Z6J","GIV","PA1","ABE","AHR","XXR","ARA","AFD","ADA","IDR","MAN","RAM","32O","NAG","GUP","T6T","G6D","FUC","Z9N","BMA","XYP","FCA","BDP","TYV","BM7","GCU","GXL","XYS","HSY","LXC","FRU","WOO","ALL","0MK","SIA","BXY","64K","GL0","GLA"]
|
|---|
| 200 | def assign_bound_ligand_chain_ids_and_residue_numbers(m, bound_map):
|
|---|
| 201 | # The wwPDB steering committee has dictated that for protein-linked glycans,
|
|---|
| 202 | # the following rules apply:
|
|---|
| 203 | # - if the modelled portion of the glycan is a single residue, it should have
|
|---|
| 204 | # the same chain ID as the protein.
|
|---|
| 205 | # - if more than one residue, it should have a unique chain ID.
|
|---|
| 206 | from chimerax.atomic import Residues, Residue, concatenate
|
|---|
| 207 | import numpy
|
|---|
| 208 | for cid, bound_residues in bound_map.items():
|
|---|
| 209 | first_ligand_number = next_available_ligand_number(m, cid)
|
|---|
| 210 | new_glycan_cid = []
|
|---|
| 211 | assign_to_chain = []
|
|---|
| 212 | groups = independent_groupings(bound_residues)
|
|---|
| 213 | for g in groups:
|
|---|
| 214 | if len(g)==1:
|
|---|
| 215 | assign_to_chain.append(g)
|
|---|
| 216 | elif any (r.name in known_sugars for r in g):
|
|---|
| 217 | new_glycan_cid.append(g)
|
|---|
| 218 | else:
|
|---|
| 219 | assign_to_chain.append(g)
|
|---|
| 220 | new_glycan_cid = list(sorted(new_glycan_cid, key=lambda g:linked_polymer_residue(g).number))
|
|---|
| 221 | assign_to_chain = list(sorted(assign_to_chain, key=lambda g:linked_polymer_residue(g).number))
|
|---|
| 222 | for i,g in enumerate(new_glycan_cid):
|
|---|
| 223 | gid = cid+'glyc'+str(i)
|
|---|
| 224 | residues = sort_glycan_residues(g)
|
|---|
| 225 | residues.chain_ids=gid
|
|---|
| 226 | m.renumber_residues(residues, 1)
|
|---|
| 227 | if len(assign_to_chain):
|
|---|
| 228 | assign_to_chain = concatenate([Residues(g) for g in assign_to_chain])
|
|---|
| 229 | assign_to_chain.chain_ids = 'XXtmp'
|
|---|
| 230 | m.renumber_residues(assign_to_chain, first_ligand_number)
|
|---|
| 231 | assign_to_chain.chain_ids = cid
|
|---|
| 232 |
|
|---|
| 233 |
|
|---|
| 234 | def sort_glycan_residues(residues):
|
|---|
| 235 | from chimerax.atomic import Residues
|
|---|
| 236 | polymer_stem_res = linked_polymer_residue(residues)
|
|---|
| 237 | for r in polymer_stem_res.neighbors:
|
|---|
| 238 | if r in residues:
|
|---|
| 239 | break
|
|---|
| 240 | order = [r]
|
|---|
| 241 | def _sort_walk(r, residues, order):
|
|---|
| 242 | bonds = [r.bonds_between(n)[0] for n in r.neighbors if n in residues and n not in order]
|
|---|
| 243 | ordered_bonds = []
|
|---|
| 244 | for b in bonds:
|
|---|
| 245 | atoms = b.atoms
|
|---|
| 246 | if atoms[0].residue == r:
|
|---|
| 247 | ordered_bonds.append(atoms)
|
|---|
| 248 | else:
|
|---|
| 249 | ordered_bonds.append(atoms[::-1])
|
|---|
| 250 | ordered_bonds = list(sorted(ordered_bonds, key=lambda b:
|
|---|
| 251 | (int(b[0].name[-1]))))
|
|---|
| 252 | for b in ordered_bonds:
|
|---|
| 253 | next_r = b[1].residue
|
|---|
| 254 | order.append(next_r)
|
|---|
| 255 | _sort_walk(next_r, residues, order)
|
|---|
| 256 | _sort_walk(r, residues, order)
|
|---|
| 257 | return Residues(order)
|
|---|
| 258 |
|
|---|
| 259 |
|
|---|
| 260 |
|
|---|
| 261 |
|
|---|
| 262 | def linked_polymer_residue(residue_group):
|
|---|
| 263 | from chimerax.atomic import Residue
|
|---|
| 264 | for r in residue_group:
|
|---|
| 265 | for n in r.neighbors:
|
|---|
| 266 | if n.polymer_type != Residue.PT_NONE:
|
|---|
| 267 | return n
|
|---|
| 268 | return None
|
|---|
| 269 |
|
|---|
| 270 | def independent_groupings(residues):
|
|---|
| 271 | residues = list(residues)
|
|---|
| 272 | groups = []
|
|---|
| 273 | while len(residues) > 0:
|
|---|
| 274 | r = residues[0]
|
|---|
| 275 | connected = set([r])
|
|---|
| 276 | walk_tree(r, residues, connected)
|
|---|
| 277 | groups.append(connected)
|
|---|
| 278 | for r in connected:
|
|---|
| 279 | residues.remove(r)
|
|---|
| 280 |
|
|---|
| 281 | return groups
|
|---|
| 282 |
|
|---|
| 283 | def walk_tree(r, residues, connected):
|
|---|
| 284 | for n in r.neighbors:
|
|---|
| 285 | if n in connected or n not in residues:
|
|---|
| 286 | continue
|
|---|
| 287 | connected.add(n)
|
|---|
| 288 | walk_tree(n, residues, connected)
|
|---|
| 289 |
|
|---|
| 290 | def sort_bound_and_unbound_ligands(residues):
|
|---|
| 291 | unbound = []
|
|---|
| 292 | from collections import defaultdict
|
|---|
| 293 | bound_map = defaultdict(set)
|
|---|
| 294 | for r in residues:
|
|---|
| 295 | if not len(r.neighbors):
|
|---|
| 296 | unbound.append(r)
|
|---|
| 297 | cid = bound_to_polymer(r)
|
|---|
| 298 | if cid is not None:
|
|---|
| 299 | bound_map[cid].add(r)
|
|---|
| 300 | else:
|
|---|
| 301 | unbound.append(r)
|
|---|
| 302 | from chimerax.atomic import Residues
|
|---|
| 303 | return (Residues(unbound), {cid: Residues(rset) for cid, rset in bound_map.items()})
|
|---|
| 304 |
|
|---|
| 305 | def bound_to_polymer(residue, seen=None):
|
|---|
| 306 | from chimerax.atomic import Residue
|
|---|
| 307 | if seen is None:
|
|---|
| 308 | seen=set()
|
|---|
| 309 | seen.add(residue)
|
|---|
| 310 | for n in residue.neighbors:
|
|---|
| 311 | if n in seen:
|
|---|
| 312 | continue
|
|---|
| 313 | if n.polymer_type!=Residue.PT_NONE:
|
|---|
| 314 | return n.chain_id
|
|---|
| 315 | seen.add(n)
|
|---|
| 316 | return bound_to_polymer(n, seen)
|
|---|
| 317 | return None
|
|---|
| 318 |
|
|---|
| 319 |
|
|---|