1 | #!/usr/bin/env python3
|
---|
2 | import os
|
---|
3 | import shutil
|
---|
4 | import argparse
|
---|
5 |
|
---|
6 | def is_well_formatted(pdb_path):
|
---|
7 | """
|
---|
8 | Returns True if no atom serial number (columns 7â11) is
|
---|
9 | repeated in the ATOM/HETATM records of this PDB.
|
---|
10 | """
|
---|
11 | seen = set()
|
---|
12 | with open(pdb_path, 'r') as f:
|
---|
13 | for line in f:
|
---|
14 | if line.startswith(('ATOM ', 'HETATM')):
|
---|
15 | try:
|
---|
16 | serial = int(line[6:11])
|
---|
17 | except ValueError:
|
---|
18 | # malformed line or non-integer serial â reject
|
---|
19 | return False
|
---|
20 | if serial in seen:
|
---|
21 | return False
|
---|
22 | seen.add(serial)
|
---|
23 | return True
|
---|
24 |
|
---|
25 | def main():
|
---|
26 | parser = argparse.ArgumentParser(
|
---|
27 | description="Keep only well-formed PDBs (no duplicate atom serials)."
|
---|
28 | )
|
---|
29 | parser.add_argument('pdb_dir',
|
---|
30 | help="Directory containing .pdb files to filter")
|
---|
31 | args = parser.parse_args()
|
---|
32 | pdb_dir = os.path.abspath(args.pdb_dir)
|
---|
33 |
|
---|
34 | bad_dir = os.path.join(pdb_dir, 'bad')
|
---|
35 | os.makedirs(bad_dir, exist_ok=True)
|
---|
36 |
|
---|
37 | for fn in os.listdir(pdb_dir):
|
---|
38 | if not fn.lower().endswith('.pdb'):
|
---|
39 | continue
|
---|
40 | full = os.path.join(pdb_dir, fn)
|
---|
41 | if not is_well_formatted(full):
|
---|
42 | print(f"â malformed: {fn}")
|
---|
43 | shutil.move(full, os.path.join(bad_dir, fn))
|
---|
44 | else:
|
---|
45 | print(f"â OK: {fn}")
|
---|
46 |
|
---|
47 | if __name__ == '__main__':
|
---|
48 | main()
|
---|
49 |
|
---|