#!/usr/bin/env python3
import os
import shutil
import argparse

def is_well_formatted(pdb_path):
    """
    Returns True if no atom serial number (columns 7–11) is
    repeated in the ATOM/HETATM records of this PDB.
    """
    seen = set()
    with open(pdb_path, 'r') as f:
        for line in f:
            if line.startswith(('ATOM  ', 'HETATM')):
                try:
                    serial = int(line[6:11])
                except ValueError:
                    # malformed line or non-integer serial → reject
                    return False
                if serial in seen:
                    return False
                seen.add(serial)
    return True

def main():
    parser = argparse.ArgumentParser(
        description="Keep only well-formed PDBs (no duplicate atom serials)."
    )
    parser.add_argument('pdb_dir',
                        help="Directory containing .pdb files to filter")
    args = parser.parse_args()
    pdb_dir = os.path.abspath(args.pdb_dir)

    bad_dir = os.path.join(pdb_dir, 'bad')
    os.makedirs(bad_dir, exist_ok=True)

    for fn in os.listdir(pdb_dir):
        if not fn.lower().endswith('.pdb'):
            continue
        full = os.path.join(pdb_dir, fn)
        if not is_well_formatted(full):
            print(f"✗ malformed: {fn}")
            shutil.move(full, os.path.join(bad_dir, fn))
        else:
            print(f"✓ OK:        {fn}")

if __name__ == '__main__':
    main()

