Ticket #3084: merge.py

File merge.py, 8.9 KB (added by Tristan Croll, 5 years ago)

Added by email2trac

Line 
1
2def merge_fragment(target_model, residues, chain_id=None, renumber_from=None,
3 anchor_n=None, anchor_c=None, transform=None):
4 '''
5 Copy the atoms from a fragment into the current model, optionally reassigning
6 chain ID and numbers. If alternate conformers are present, only the active
7 one will be copied.
8
9 * Args:
10
11 - target_model: the model to copy into
12 - residues: the residues to be copied. Isotropic B-factors will be
13 copied, but aniso_u records will be ignored.
14 - chain_id: if provided, all residues must be from one chain. The copied
15 residues will be given this chain ID.
16 - renumber_from: if provided, all residues will be renumbered with an
17 offset of (lowest residue number - renumber_from)
18 - anchor_n: an amino acid residue or None. If provided, the first amino
19 acid residue in the fragment will be linked to this one. Throws an
20 error if anchor_n has another residue linked at its C-terminus.
21 - anchor_c: an amino acid residue or None. If provided, the last amino
22 acid residue in the fragment will be linked to this one. Throws an
23 error if anchor_c has another residue linked at its C-terminus.
24 - transform: a Place or None. If provided, the atoms will be placed at
25 the transformed coordinates.
26 '''
27 from chimerax.core.errors import UserError
28 us = residues.unique_structures
29 if len(us) != 1:
30 raise UserError('All residues to be copied must be from the same model!')
31 if (chain_id or anchor_n or anchor_c or (renumber_from is not None)) \
32 and len(residues.unique_chain_ids) != 1:
33 raise UserError('If reassigning chain ID, renumbering or specifying '
34 'N- and/or C-terminal anchors, all residues to be copied must be '
35 'from a single chain!')
36 from chimerax.atomic import Residue, Residues, Atoms
37 if (anchor_n or anchor_c):
38 protein_residues = residues[residues.polymer_types==Residue.PT_AMINO]
39 if not len(protein_residues):
40 raise UserError('N- and/or C-terminal anchors were specified, but '
41 'the copied selection does not contain any amino acid residues!')
42
43 import numpy
44 fm = us[0]
45 m = target_model
46 tpbg = m.pseudobond_group('missing structure')
47 residues = Residues(sorted(residues, key=lambda r: (r.chain_id, r.number, r.insertion_code)))
48 atoms = residues.atoms
49 coords = atoms.coords
50 atom_map = {}
51 if renumber_from:
52 offset = residues[0].number - renumber_from
53 else:
54 offset = 0
55
56 def new_residue_number(r):
57 if r in residues:
58 return r.number-offset
59 return r.number
60 def new_chain_id(r):
61 if r in residues and chain_id:
62 return chain_id
63 return r.chain_id
64 merged_residues = m.residues.merge(residues)
65 merged_residues = Residues(sorted(merged_residues,
66 key=lambda r: (new_chain_id(r), new_residue_number(r), r.insertion_code)
67 ))
68 new_residue_mask = numpy.in1d(merged_residues, residues)
69 new_residue_indices = numpy.argwhere(new_residue_mask).ravel()
70 existing_residue_mask = numpy.in1d(merged_residues, m.residues)
71 existing_residue_indices = numpy.argwhere(existing_residue_mask).ravel()
72
73 insertion_point_map = {}
74
75 if chain_id is not None:
76 cids = [chain_id]
77 else:
78 cids = residues.unique_chain_ids
79 for cid in cids:
80 existing_residues = m.residues[m.residues.chain_ids==cid]
81 if not len(existing_residues):
82 insertion_point_map[cid] = None
83 continue
84 existing_residue_numbers = numpy.array([str(r.number)+r.insertion_code for r in existing_residues])
85 cres = residues[residues.chain_ids==cid]
86 new_residue_numbers = numpy.array([str(r.number-offset)+r.insertion_code for r in cres])
87
88 duplicate_flags = numpy.in1d(new_residue_numbers, existing_residue_numbers)
89 if numpy.any(duplicate_flags):
90 dup_residues = cres[duplicate_flags]
91 err_str = ('The requested merge could not be completed because the '
92 'following residues in chain {} (after applying any renumbering) '
93 'will have the same residue numbers as existing residues in '
94 'the target: {}'
95 ).format(cid, ', '.join(str(r.number)+r.insertion_code for r in dup_residues))
96 raise UserError(err_str)
97
98 chain_mask = (numpy.array([new_chain_id(r) for r in merged_residues]) == cid)
99 new_r_in_chain_mask = numpy.logical_and(chain_mask, new_residue_mask)
100 insertion_point_map[cid] = None
101 if numpy.any(new_r_in_chain_mask):
102 last_new_r_index = numpy.argwhere(new_r_in_chain_mask)[-1]
103 greater_indices = existing_residue_indices[existing_residue_indices > last_new_r_index]
104 if len(greater_indices):
105 insertion_point_map[cid] = merged_residues[greater_indices[0]]
106
107
108 current_residue = None
109
110 first_index = new_residue_indices[0]
111 if first_index > 0:
112 prev_res = merged_residues[first_index-1]
113 else:
114 prev_res = None
115 prev_new_res = None
116
117 for merged_index, r in zip(new_residue_indices, residues):
118 if chain_id:
119 cid = chain_id
120 else:
121 cid = r.chain_id
122 precedes = insertion_point_map[cid]
123 insertion_code = r.insertion_code
124 if insertion_code=='':
125 insertion_code = ' '
126 nr = m.new_residue(r.name, cid, r.number-offset, insert=insertion_code,
127 precedes=precedes)
128 nr.ribbon_hide_backbone = r.ribbon_hide_backbone
129 nr.ribbon_display = r.ribbon_display
130 nr.ribbon_color = r.ribbon_color
131 nr.ss_type = r.ss_type
132
133 for a in r.atoms:
134 na = atom_map[a] = m.new_atom(a.name, a.element)
135 na.coord = a.coord
136 na.bfactor = a.bfactor
137 na.aniso_u6 = a.aniso_u6
138 na.occupancy = a.occupancy
139 na.draw_mode = a.draw_mode
140 na.color = a.color
141 na.display = a.display
142 nr.add_atom(na)
143 for n in a.neighbors:
144 nn = atom_map.get(n, None)
145 if nn is not None:
146 m.new_bond(na, nn)
147
148
149 if prev_res is not None:
150 if (
151 r.polymer_type==Residue.PT_AMINO and prev_res not in nr.neighbors
152 and prev_res.chain_id == cid
153 and prev_res.polymer_type == Residue.PT_AMINO):
154 if precedes is not None:
155 if precedes.chain_id == cid and precedes.polymer_type == Residue.PT_AMINO:
156 ratoms = prev_res.atoms.merge(precedes.atoms)
157 tpbg.pseudobonds[tpbg.pseudobonds.between_atoms(ratoms)].delete()
158 pc = prev_res.find_atom('C')
159 nn = nr.find_atom('N')
160 if pc and nn:
161 tpbg.new_pseudobond(pc, nn)
162 if precedes is not None and precedes.polymer_type==Residue.PT_AMINO and precedes.chain_id==cid:
163 nc = nr.find_atom('C')
164 pn = precedes.find_atom('N')
165 if nc and pn:
166 tpbg.new_pseudobond(nc, pn)
167 prev_res = nr
168 new_atoms = Atoms(list(atom_map.values()))
169 if transform is not None:
170 # Using Atoms.transform() rather than simply transforming the coords,
171 # because this also correctly transforms any anisotropic B-factors.
172 new_atoms.transform(transform)
173 if anchor_n:
174 anchor_atom = anchor_n.find_atom('C')
175 link_atom = atom_map[protein_residues[0].find_atom('N')]
176 _remove_excess_terminal_atoms(anchor_atom)
177 _remove_excess_terminal_atoms(link_atom)
178 m.new_bond(anchor_atom, link_atom)
179 for r in new_atoms.unique_residues:
180 tpbg.pseudobonds[tpbg.pseudobonds.between_atoms(anchor_n.atoms.merge(r.atoms))].delete()
181 if anchor_c:
182 anchor_atom = anchor_c.find_atom('N')
183 link_atom = atom_map[protein_residues[-1].find_atom('C')]
184 _remove_excess_terminal_atoms(anchor_atom)
185 _remove_excess_terminal_atoms(link_atom)
186 m.new_bond(anchor_atom, link_atom)
187 for r in new_atoms.unique_residues:
188 tpbg.pseudobonds[tpbg.pseudobonds.between_atoms(anchor_c.atoms.merge(r.atoms))].delete()
189 new_atoms.displays=True
190 return new_atoms
191
192
193def _remove_excess_terminal_atoms(atom):
194 if atom.name=='C':
195 for n in atom.neighbors:
196 if n.name=='OXT':
197 n.delete()
198 elif atom.name=='N':
199 from chimerax.atomic import Atoms
200 neighbors = Atoms(atom.neighbors)
201 hydrogens = neighbors[neighbors.element_names == 'H']
202 if not len(hydrogens):
203 return
204 h = hydrogens[hydrogens.names=='H']
205 if len(h) == 1:
206 hydrogens.subtract(h).delete()
207 return
208 elif len(hydrogens)>1:
209 hydrogens[1:].delete()
210 hydrogens[0].name='H'