1 | # Copyright 2021 DeepMind Technologies Limited
|
---|
2 | #
|
---|
3 | # Licensed under the Apache License, Version 2.0 (the "License");
|
---|
4 | # you may not use this file except in compliance with the License.
|
---|
5 | # You may obtain a copy of the License at
|
---|
6 | #
|
---|
7 | # http://www.apache.org/licenses/LICENSE-2.0
|
---|
8 | #
|
---|
9 | # Unless required by applicable law or agreed to in writing, software
|
---|
10 | # distributed under the License is distributed on an "AS IS" BASIS,
|
---|
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
---|
12 | # See the License for the specific language governing permissions and
|
---|
13 | # limitations under the License.
|
---|
14 |
|
---|
15 | """Singularity launch script for Alphafold singularity image."""
|
---|
16 |
|
---|
17 | import os
|
---|
18 | import pathlib
|
---|
19 | import signal
|
---|
20 | from typing import Tuple
|
---|
21 |
|
---|
22 | from absl import app
|
---|
23 | from absl import flags
|
---|
24 | from absl import logging
|
---|
25 |
|
---|
26 | flags.DEFINE_bool(
|
---|
27 | 'use_gpu', True, 'Enable NVIDIA runtime to run with GPUs.')
|
---|
28 | flags.DEFINE_string(
|
---|
29 | 'gpu_devices', os.environ.get('SGE_GPU', '0'),
|
---|
30 | 'Comma separated list GPU identifiers to set environment variable CUDA_VISIBLE_DEVICES.')
|
---|
31 | flags.DEFINE_list(
|
---|
32 | 'fasta_paths', None, 'Paths to FASTA files, each containing a prediction '
|
---|
33 | 'target that will be folded one after another. If a FASTA file contains '
|
---|
34 | 'multiple sequences, then it will be folded as a multimer. Paths should be '
|
---|
35 | 'separated by commas. All FASTA paths must have a unique basename as the '
|
---|
36 | 'basename is used to name the output directories for each prediction.')
|
---|
37 | flags.DEFINE_list(
|
---|
38 | 'is_prokaryote_list', None, 'Optional for multimer system, not used by the '
|
---|
39 | 'single chain system. This list should contain a boolean for each fasta '
|
---|
40 | 'specifying true where the target complex is from a prokaryote, and false '
|
---|
41 | 'where it is not, or where the origin is unknown. These values determine '
|
---|
42 | 'the pairing method for the MSA.')
|
---|
43 | flags.DEFINE_string(
|
---|
44 | 'output_dir', '/tmp/alphafold',
|
---|
45 | 'Path to a directory that will store the results.')
|
---|
46 | flags.DEFINE_string(
|
---|
47 | 'data_dir', '/wynton/group/databases/alphafold_CASP14',
|
---|
48 | 'Path to directory with supporting data: AlphaFold parameters and genetic '
|
---|
49 | 'and template databases. Set to the target of download_all_databases.sh.')
|
---|
50 | flags.DEFINE_string(
|
---|
51 | 'singularity_image_path', '/wynton/home/ferrin/goddard/alphafold_singularity/alphafold21.sif', 'Path to the AlphaFold singularity image.')
|
---|
52 | flags.DEFINE_string(
|
---|
53 | 'max_template_date', '2100-01-01',
|
---|
54 | 'Maximum template release date to consider (ISO-8601 format: YYYY-MM-DD). '
|
---|
55 | 'Important if folding historical test sets.')
|
---|
56 | flags.DEFINE_enum(
|
---|
57 | 'db_preset', 'full_dbs', ['full_dbs', 'reduced_dbs'],
|
---|
58 | 'Choose preset MSA database configuration - smaller genetic database '
|
---|
59 | 'config (reduced_dbs) or full genetic database config (full_dbs)')
|
---|
60 | flags.DEFINE_enum(
|
---|
61 | 'model_preset', 'monomer',
|
---|
62 | ['monomer', 'monomer_casp14', 'monomer_ptm', 'multimer'],
|
---|
63 | 'Choose preset model configuration - the monomer model, the monomer model '
|
---|
64 | 'with extra ensembling, monomer model with pTM head, or multimer model')
|
---|
65 | flags.DEFINE_boolean(
|
---|
66 | 'benchmark', False,
|
---|
67 | 'Run multiple JAX model evaluations to obtain a timing that excludes the '
|
---|
68 | 'compilation time, which should be more indicative of the time required '
|
---|
69 | 'for inferencing many proteins.')
|
---|
70 | flags.DEFINE_boolean(
|
---|
71 | 'use_precomputed_msas', False,
|
---|
72 | 'Whether to read MSAs that have been written to disk. WARNING: This will '
|
---|
73 | 'not check if the sequence, database or configuration have changed.')
|
---|
74 |
|
---|
75 | FLAGS = flags.FLAGS
|
---|
76 |
|
---|
77 | _ROOT_MOUNT_DIRECTORY = '/mnt/'
|
---|
78 |
|
---|
79 | '''
|
---|
80 | def _create_mount(mount_name: str, path: str) -> Tuple[types.Mount, str]:
|
---|
81 | path = os.path.abspath(path)
|
---|
82 | source_path = os.path.dirname(path)
|
---|
83 | target_path = os.path.join(_ROOT_MOUNT_DIRECTORY, mount_name)
|
---|
84 | logging.info('Mounting %s -> %s', source_path, target_path)
|
---|
85 | mount = types.Mount(target_path, source_path, type='bind', read_only=True)
|
---|
86 | return mount, os.path.join(target_path, os.path.basename(path))
|
---|
87 | '''
|
---|
88 |
|
---|
89 | def main(argv):
|
---|
90 | if len(argv) > 1:
|
---|
91 | raise app.UsageError('Too many command-line arguments.')
|
---|
92 |
|
---|
93 | # You can individually override the following paths if you have placed the
|
---|
94 | # data in locations other than the FLAGS.data_dir.
|
---|
95 |
|
---|
96 | # Path to the Uniref90 database for use by JackHMMER.
|
---|
97 | uniref90_database_path = os.path.join(
|
---|
98 | FLAGS.data_dir, 'uniref90', 'uniref90.fasta')
|
---|
99 |
|
---|
100 | # Path to the Uniprot database for use by JackHMMER.
|
---|
101 | uniprot_database_path = os.path.join(
|
---|
102 | FLAGS.data_dir, 'uniprot', 'uniprot.fasta')
|
---|
103 |
|
---|
104 | # Path to the MGnify database for use by JackHMMER.
|
---|
105 | mgnify_database_path = os.path.join(
|
---|
106 | FLAGS.data_dir, 'mgnify', 'mgy_clusters_2018_12.fa')
|
---|
107 |
|
---|
108 | # Path to the BFD database for use by HHblits.
|
---|
109 | bfd_database_path = os.path.join(
|
---|
110 | FLAGS.data_dir, 'bfd',
|
---|
111 | 'bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt')
|
---|
112 |
|
---|
113 | # Path to the Small BFD database for use by JackHMMER.
|
---|
114 | small_bfd_database_path = os.path.join(
|
---|
115 | FLAGS.data_dir, 'small_bfd', 'bfd-first_non_consensus_sequences.fasta')
|
---|
116 |
|
---|
117 | # Path to the Uniclust30 database for use by HHblits.
|
---|
118 | uniclust30_database_path = os.path.join(
|
---|
119 | FLAGS.data_dir, 'uniclust30', 'uniclust30_2018_08', 'uniclust30_2018_08')
|
---|
120 |
|
---|
121 | # Path to the PDB70 database for use by HHsearch.
|
---|
122 | pdb70_database_path = os.path.join(FLAGS.data_dir, 'pdb70', 'pdb70')
|
---|
123 |
|
---|
124 | # Path to the PDB seqres database for use by hmmsearch.
|
---|
125 | pdb_seqres_database_path = os.path.join(
|
---|
126 | FLAGS.data_dir, 'pdb_seqres', 'pdb_seqres.txt')
|
---|
127 |
|
---|
128 | # Path to a directory with template mmCIF structures, each named <pdb_id>.cif.
|
---|
129 | template_mmcif_dir = os.path.join(FLAGS.data_dir, 'pdb_mmcif', 'mmcif_files')
|
---|
130 |
|
---|
131 | # Path to a file mapping obsolete PDB IDs to their replacements.
|
---|
132 | obsolete_pdbs_path = os.path.join(FLAGS.data_dir, 'pdb_mmcif', 'obsolete.dat')
|
---|
133 |
|
---|
134 | alphafold_path = pathlib.Path(__file__).parent.parent
|
---|
135 | data_dir_path = pathlib.Path(FLAGS.data_dir)
|
---|
136 | if alphafold_path == data_dir_path or alphafold_path in data_dir_path.parents:
|
---|
137 | raise app.UsageError(
|
---|
138 | f'The download directory {FLAGS.data_dir} should not be a subdirectory '
|
---|
139 | f'in the AlphaFold repository directory. If it is, the Docker build is '
|
---|
140 | f'slow since the large databases are copied during the image creation.')
|
---|
141 |
|
---|
142 | mounts = []
|
---|
143 | command_args = []
|
---|
144 |
|
---|
145 | # Mount each fasta path as a unique target directory.
|
---|
146 | target_fasta_paths = []
|
---|
147 | for i, fasta_path in enumerate(FLAGS.fasta_paths):
|
---|
148 | # mount, target_path = _create_mount(f'fasta_path_{i}', fasta_path)
|
---|
149 | # mounts.append(mount)
|
---|
150 | # target_fasta_paths.append(target_path)
|
---|
151 | target_fasta_paths.append(fasta_path)
|
---|
152 | command_args.append(f'--fasta_paths={",".join(target_fasta_paths)}')
|
---|
153 |
|
---|
154 | database_paths = [
|
---|
155 | ('uniref90_database_path', uniref90_database_path),
|
---|
156 | ('mgnify_database_path', mgnify_database_path),
|
---|
157 | ('data_dir', FLAGS.data_dir),
|
---|
158 | ('template_mmcif_dir', template_mmcif_dir),
|
---|
159 | ('obsolete_pdbs_path', obsolete_pdbs_path),
|
---|
160 | ]
|
---|
161 |
|
---|
162 | if FLAGS.model_preset == 'multimer':
|
---|
163 | database_paths.append(('uniprot_database_path', uniprot_database_path))
|
---|
164 | database_paths.append(('pdb_seqres_database_path',
|
---|
165 | pdb_seqres_database_path))
|
---|
166 | else:
|
---|
167 | database_paths.append(('pdb70_database_path', pdb70_database_path))
|
---|
168 |
|
---|
169 | if FLAGS.db_preset == 'reduced_dbs':
|
---|
170 | database_paths.append(('small_bfd_database_path', small_bfd_database_path))
|
---|
171 | else:
|
---|
172 | database_paths.extend([
|
---|
173 | ('uniclust30_database_path', uniclust30_database_path),
|
---|
174 | ('bfd_database_path', bfd_database_path),
|
---|
175 | ])
|
---|
176 | for name, path in database_paths:
|
---|
177 | if path:
|
---|
178 | # mount, target_path = _create_mount(name, path)
|
---|
179 | # mounts.append(mount)
|
---|
180 | # command_args.append(f'--{name}={target_path}')
|
---|
181 | command_args.append(f'--{name}={path}')
|
---|
182 |
|
---|
183 | output_target_path = os.path.abspath('output')
|
---|
184 | # output_target_path = os.path.join(_ROOT_MOUNT_DIRECTORY, 'output')
|
---|
185 | # mounts.append(types.Mount(output_target_path, FLAGS.output_dir, type='bind'))
|
---|
186 | # mounts.append((output_target_path, FLAGS.output_dir))
|
---|
187 |
|
---|
188 | command_args.extend([
|
---|
189 | f'--output_dir={output_target_path}',
|
---|
190 | f'--max_template_date={FLAGS.max_template_date}',
|
---|
191 | f'--db_preset={FLAGS.db_preset}',
|
---|
192 | f'--model_preset={FLAGS.model_preset}',
|
---|
193 | f'--benchmark={FLAGS.benchmark}',
|
---|
194 | f'--use_precomputed_msas={FLAGS.use_precomputed_msas}',
|
---|
195 | '--logtostderr',
|
---|
196 | ])
|
---|
197 |
|
---|
198 | if FLAGS.is_prokaryote_list:
|
---|
199 | command_args.append(
|
---|
200 | f'--is_prokaryote_list={",".join(FLAGS.is_prokaryote_list)}')
|
---|
201 |
|
---|
202 | env_vars = {
|
---|
203 | 'CUDA_VISIBLE_DEVICES': FLAGS.gpu_devices,
|
---|
204 | # The following flags allow us to make predictions on proteins that
|
---|
205 | # would typically be too long to fit into GPU memory.
|
---|
206 | 'TF_FORCE_UNIFIED_MEMORY': '1',
|
---|
207 | 'XLA_PYTHON_CLIENT_MEM_FRACTION': '4.0',
|
---|
208 | }
|
---|
209 | print ('Running Alphafold with args:\n%s\nenvironment:\n%s'
|
---|
210 | % ('\n'.join(command_args),
|
---|
211 | '\n'.join('%s=%s' % (key,value) for key,value in env_vars.items())))
|
---|
212 |
|
---|
213 | env_vals = ' '.join('%s=%s' % (key,value) for key,value in env_vars.items())
|
---|
214 | cmd = ('env %s singularity run --nv -B "%s" -B "%s" %s %s' %
|
---|
215 | (env_vals, FLAGS.data_dir, os.getcwd(), FLAGS.singularity_image_path,
|
---|
216 | ' '.join(command_args)))
|
---|
217 | print (cmd)
|
---|
218 |
|
---|
219 | '''
|
---|
220 | client = docker.from_env()
|
---|
221 | container = client.containers.run(
|
---|
222 | image=FLAGS.docker_image_name,
|
---|
223 | command=command_args,
|
---|
224 | runtime='nvidia' if FLAGS.use_gpu else None,
|
---|
225 | remove=True,
|
---|
226 | detach=True,
|
---|
227 | mounts=mounts,
|
---|
228 | environment={
|
---|
229 | 'NVIDIA_VISIBLE_DEVICES': FLAGS.gpu_devices,
|
---|
230 | # The following flags allow us to make predictions on proteins that
|
---|
231 | # would typically be too long to fit into GPU memory.
|
---|
232 | 'TF_FORCE_UNIFIED_MEMORY': '1',
|
---|
233 | 'XLA_PYTHON_CLIENT_MEM_FRACTION': '4.0',
|
---|
234 | })
|
---|
235 |
|
---|
236 | # Add signal handler to ensure CTRL+C also stops the running container.
|
---|
237 | signal.signal(signal.SIGINT,
|
---|
238 | lambda unused_sig, unused_frame: container.kill())
|
---|
239 |
|
---|
240 | for line in container.logs(stream=True):
|
---|
241 | logging.info(line.strip().decode('utf-8'))
|
---|
242 | '''
|
---|
243 |
|
---|
244 | if __name__ == '__main__':
|
---|
245 | flags.mark_flags_as_required([
|
---|
246 | 'fasta_paths',
|
---|
247 | ])
|
---|
248 | app.run(main)
|
---|