| 1 |
|
|---|
| 2 | def merge_fragment(target_model, residues, chain_id=None, renumber_from=None,
|
|---|
| 3 | anchor_n=None, anchor_c=None, transform=None):
|
|---|
| 4 | '''
|
|---|
| 5 | Copy the atoms from a fragment into the current model, optionally reassigning
|
|---|
| 6 | chain ID and numbers. If alternate conformers are present, only the active
|
|---|
| 7 | one will be copied.
|
|---|
| 8 |
|
|---|
| 9 | * Args:
|
|---|
| 10 |
|
|---|
| 11 | - target_model: the model to copy into
|
|---|
| 12 | - residues: the residues to be copied. Isotropic B-factors will be
|
|---|
| 13 | copied, but aniso_u records will be ignored.
|
|---|
| 14 | - chain_id: if provided, all residues must be from one chain. The copied
|
|---|
| 15 | residues will be given this chain ID.
|
|---|
| 16 | - renumber_from: if provided, all residues will be renumbered with an
|
|---|
| 17 | offset of (lowest residue number - renumber_from)
|
|---|
| 18 | - anchor_n: an amino acid residue or None. If provided, the first amino
|
|---|
| 19 | acid residue in the fragment will be linked to this one. Throws an
|
|---|
| 20 | error if anchor_n has another residue linked at its C-terminus.
|
|---|
| 21 | - anchor_c: an amino acid residue or None. If provided, the last amino
|
|---|
| 22 | acid residue in the fragment will be linked to this one. Throws an
|
|---|
| 23 | error if anchor_c has another residue linked at its C-terminus.
|
|---|
| 24 | - transform: a Place or None. If provided, the atoms will be placed at
|
|---|
| 25 | the transformed coordinates.
|
|---|
| 26 | '''
|
|---|
| 27 | from chimerax.core.errors import UserError
|
|---|
| 28 | us = residues.unique_structures
|
|---|
| 29 | if len(us) != 1:
|
|---|
| 30 | raise UserError('All residues to be copied must be from the same model!')
|
|---|
| 31 | if (chain_id or anchor_n or anchor_c or (renumber_from is not None)) \
|
|---|
| 32 | and len(residues.unique_chain_ids) != 1:
|
|---|
| 33 | raise UserError('If reassigning chain ID, renumbering or specifying '
|
|---|
| 34 | 'N- and/or C-terminal anchors, all residues to be copied must be '
|
|---|
| 35 | 'from a single chain!')
|
|---|
| 36 | from chimerax.atomic import Residue, Residues, Atoms
|
|---|
| 37 | if (anchor_n or anchor_c):
|
|---|
| 38 | protein_residues = residues[residues.polymer_types==Residue.PT_AMINO]
|
|---|
| 39 | if not len(protein_residues):
|
|---|
| 40 | raise UserError('N- and/or C-terminal anchors were specified, but '
|
|---|
| 41 | 'the copied selection does not contain any amino acid residues!')
|
|---|
| 42 |
|
|---|
| 43 | import numpy
|
|---|
| 44 | fm = us[0]
|
|---|
| 45 | m = target_model
|
|---|
| 46 | tpbg = m.pseudobond_group('missing structure')
|
|---|
| 47 | residues = Residues(sorted(residues, key=lambda r: (r.chain_id, r.number, r.insertion_code)))
|
|---|
| 48 | atoms = residues.atoms
|
|---|
| 49 | coords = atoms.coords
|
|---|
| 50 | atom_map = {}
|
|---|
| 51 | if renumber_from:
|
|---|
| 52 | offset = residues[0].number - renumber_from
|
|---|
| 53 | else:
|
|---|
| 54 | offset = 0
|
|---|
| 55 |
|
|---|
| 56 | def new_residue_number(r):
|
|---|
| 57 | if r in residues:
|
|---|
| 58 | return r.number-offset
|
|---|
| 59 | return r.number
|
|---|
| 60 | def new_chain_id(r):
|
|---|
| 61 | if r in residues and chain_id:
|
|---|
| 62 | return chain_id
|
|---|
| 63 | return r.chain_id
|
|---|
| 64 | merged_residues = m.residues.merge(residues)
|
|---|
| 65 | merged_residues = Residues(sorted(merged_residues,
|
|---|
| 66 | key=lambda r: (new_chain_id(r), new_residue_number(r), r.insertion_code)
|
|---|
| 67 | ))
|
|---|
| 68 | new_residue_mask = numpy.in1d(merged_residues, residues)
|
|---|
| 69 | new_residue_indices = numpy.argwhere(new_residue_mask).ravel()
|
|---|
| 70 | existing_residue_mask = numpy.in1d(merged_residues, m.residues)
|
|---|
| 71 | existing_residue_indices = numpy.argwhere(existing_residue_mask).ravel()
|
|---|
| 72 |
|
|---|
| 73 | insertion_point_map = {}
|
|---|
| 74 |
|
|---|
| 75 | if chain_id is not None:
|
|---|
| 76 | cids = [chain_id]
|
|---|
| 77 | else:
|
|---|
| 78 | cids = residues.unique_chain_ids
|
|---|
| 79 | for cid in cids:
|
|---|
| 80 | existing_residues = m.residues[m.residues.chain_ids==cid]
|
|---|
| 81 | if not len(existing_residues):
|
|---|
| 82 | insertion_point_map[cid] = None
|
|---|
| 83 | continue
|
|---|
| 84 | existing_residue_numbers = numpy.array([str(r.number)+r.insertion_code for r in existing_residues])
|
|---|
| 85 | cres = residues[residues.chain_ids==cid]
|
|---|
| 86 | new_residue_numbers = numpy.array([str(r.number-offset)+r.insertion_code for r in cres])
|
|---|
| 87 |
|
|---|
| 88 | duplicate_flags = numpy.in1d(new_residue_numbers, existing_residue_numbers)
|
|---|
| 89 | if numpy.any(duplicate_flags):
|
|---|
| 90 | dup_residues = cres[duplicate_flags]
|
|---|
| 91 | err_str = ('The requested merge could not be completed because the '
|
|---|
| 92 | 'following residues in chain {} (after applying any renumbering) '
|
|---|
| 93 | 'will have the same residue numbers as existing residues in '
|
|---|
| 94 | 'the target: {}'
|
|---|
| 95 | ).format(cid, ', '.join(str(r.number)+r.insertion_code for r in dup_residues))
|
|---|
| 96 | raise UserError(err_str)
|
|---|
| 97 |
|
|---|
| 98 | chain_mask = (numpy.array([new_chain_id(r) for r in merged_residues]) == cid)
|
|---|
| 99 | new_r_in_chain_mask = numpy.logical_and(chain_mask, new_residue_mask)
|
|---|
| 100 | insertion_point_map[cid] = None
|
|---|
| 101 | if numpy.any(new_r_in_chain_mask):
|
|---|
| 102 | last_new_r_index = numpy.argwhere(new_r_in_chain_mask)[-1]
|
|---|
| 103 | greater_indices = existing_residue_indices[existing_residue_indices > last_new_r_index]
|
|---|
| 104 | if len(greater_indices):
|
|---|
| 105 | insertion_point_map[cid] = merged_residues[greater_indices[0]]
|
|---|
| 106 |
|
|---|
| 107 |
|
|---|
| 108 | current_residue = None
|
|---|
| 109 |
|
|---|
| 110 | first_index = new_residue_indices[0]
|
|---|
| 111 | if first_index > 0:
|
|---|
| 112 | prev_res = merged_residues[first_index-1]
|
|---|
| 113 | else:
|
|---|
| 114 | prev_res = None
|
|---|
| 115 | prev_new_res = None
|
|---|
| 116 |
|
|---|
| 117 | for merged_index, r in zip(new_residue_indices, residues):
|
|---|
| 118 | if chain_id:
|
|---|
| 119 | cid = chain_id
|
|---|
| 120 | else:
|
|---|
| 121 | cid = r.chain_id
|
|---|
| 122 | precedes = insertion_point_map[cid]
|
|---|
| 123 | insertion_code = r.insertion_code
|
|---|
| 124 | if insertion_code=='':
|
|---|
| 125 | insertion_code = ' '
|
|---|
| 126 | nr = m.new_residue(r.name, cid, r.number-offset, insert=insertion_code,
|
|---|
| 127 | precedes=precedes)
|
|---|
| 128 | nr.ribbon_hide_backbone = r.ribbon_hide_backbone
|
|---|
| 129 | nr.ribbon_display = r.ribbon_display
|
|---|
| 130 | nr.ribbon_color = r.ribbon_color
|
|---|
| 131 | nr.ss_type = r.ss_type
|
|---|
| 132 |
|
|---|
| 133 | for a in r.atoms:
|
|---|
| 134 | na = atom_map[a] = m.new_atom(a.name, a.element)
|
|---|
| 135 | na.coord = a.coord
|
|---|
| 136 | na.bfactor = a.bfactor
|
|---|
| 137 | na.aniso_u6 = a.aniso_u6
|
|---|
| 138 | na.occupancy = a.occupancy
|
|---|
| 139 | na.draw_mode = a.draw_mode
|
|---|
| 140 | na.color = a.color
|
|---|
| 141 | na.display = a.display
|
|---|
| 142 | nr.add_atom(na)
|
|---|
| 143 | for n in a.neighbors:
|
|---|
| 144 | nn = atom_map.get(n, None)
|
|---|
| 145 | if nn is not None:
|
|---|
| 146 | m.new_bond(na, nn)
|
|---|
| 147 |
|
|---|
| 148 |
|
|---|
| 149 | if prev_res is not None:
|
|---|
| 150 | if (
|
|---|
| 151 | r.polymer_type==Residue.PT_AMINO and prev_res not in nr.neighbors
|
|---|
| 152 | and prev_res.chain_id == cid
|
|---|
| 153 | and prev_res.polymer_type == Residue.PT_AMINO):
|
|---|
| 154 | if precedes is not None:
|
|---|
| 155 | if precedes.chain_id == cid and precedes.polymer_type == Residue.PT_AMINO:
|
|---|
| 156 | ratoms = prev_res.atoms.merge(precedes.atoms)
|
|---|
| 157 | tpbg.pseudobonds[tpbg.pseudobonds.between_atoms(ratoms)].delete()
|
|---|
| 158 | pc = prev_res.find_atom('C')
|
|---|
| 159 | nn = nr.find_atom('N')
|
|---|
| 160 | if pc and nn:
|
|---|
| 161 | tpbg.new_pseudobond(pc, nn)
|
|---|
| 162 | if precedes is not None and precedes.polymer_type==Residue.PT_AMINO and precedes.chain_id==cid:
|
|---|
| 163 | nc = nr.find_atom('C')
|
|---|
| 164 | pn = precedes.find_atom('N')
|
|---|
| 165 | if nc and pn:
|
|---|
| 166 | tpbg.new_pseudobond(nc, pn)
|
|---|
| 167 | prev_res = nr
|
|---|
| 168 | new_atoms = Atoms(list(atom_map.values()))
|
|---|
| 169 | if transform is not None:
|
|---|
| 170 | # Using Atoms.transform() rather than simply transforming the coords,
|
|---|
| 171 | # because this also correctly transforms any anisotropic B-factors.
|
|---|
| 172 | new_atoms.transform(transform)
|
|---|
| 173 | if anchor_n:
|
|---|
| 174 | anchor_atom = anchor_n.find_atom('C')
|
|---|
| 175 | link_atom = atom_map[protein_residues[0].find_atom('N')]
|
|---|
| 176 | _remove_excess_terminal_atoms(anchor_atom)
|
|---|
| 177 | _remove_excess_terminal_atoms(link_atom)
|
|---|
| 178 | m.new_bond(anchor_atom, link_atom)
|
|---|
| 179 | for r in new_atoms.unique_residues:
|
|---|
| 180 | tpbg.pseudobonds[tpbg.pseudobonds.between_atoms(anchor_n.atoms.merge(r.atoms))].delete()
|
|---|
| 181 | if anchor_c:
|
|---|
| 182 | anchor_atom = anchor_c.find_atom('N')
|
|---|
| 183 | link_atom = atom_map[protein_residues[-1].find_atom('C')]
|
|---|
| 184 | _remove_excess_terminal_atoms(anchor_atom)
|
|---|
| 185 | _remove_excess_terminal_atoms(link_atom)
|
|---|
| 186 | m.new_bond(anchor_atom, link_atom)
|
|---|
| 187 | for r in new_atoms.unique_residues:
|
|---|
| 188 | tpbg.pseudobonds[tpbg.pseudobonds.between_atoms(anchor_c.atoms.merge(r.atoms))].delete()
|
|---|
| 189 | new_atoms.displays=True
|
|---|
| 190 | return new_atoms
|
|---|
| 191 |
|
|---|
| 192 |
|
|---|
| 193 | def _remove_excess_terminal_atoms(atom):
|
|---|
| 194 | if atom.name=='C':
|
|---|
| 195 | for n in atom.neighbors:
|
|---|
| 196 | if n.name=='OXT':
|
|---|
| 197 | n.delete()
|
|---|
| 198 | elif atom.name=='N':
|
|---|
| 199 | from chimerax.atomic import Atoms
|
|---|
| 200 | neighbors = Atoms(atom.neighbors)
|
|---|
| 201 | hydrogens = neighbors[neighbors.element_names == 'H']
|
|---|
| 202 | if not len(hydrogens):
|
|---|
| 203 | return
|
|---|
| 204 | h = hydrogens[hydrogens.names=='H']
|
|---|
| 205 | if len(h) == 1:
|
|---|
| 206 | hydrogens.subtract(h).delete()
|
|---|
| 207 | return
|
|---|
| 208 | elif len(hydrogens)>1:
|
|---|
| 209 | hydrogens[1:].delete()
|
|---|
| 210 | hydrogens[0].name='H'
|
|---|