def clusterModels(modelList): from EnsembleMatch.distmat import DistanceMatrix from chimera import match # First we need to construct a full distance matrix, including # possibly identical elements fulldm = DistanceMatrix(len(modelList)) sameAs = {} atoms = [ match._coordArray(m.sortedAtoms()) for m in modelList ] for i in range(len(modelList)): mi = modelList[i] if mi in sameAs: continue ai = atoms[i] for j in range(i + 1, len(modelList)): aj = atoms[j] m = match.Match(ai, aj) if m.rms <= 0: # Detect identical elements and track them mj = modelList[j] sameAs[mj] = mi fulldm.set(i, j, m.rms) # Now we reduce the distance matrix by removing identical elements if not sameAs: # No identical elements, just use the whole thing dm = fulldm models = modelList else: models = [] indexMap = [] for i, mi in enumerate(modelList): if mi in sameAs: # Skip identical element continue models.append(mi) indexMap.append(i) dm = DistanceMatrix(len(models)) for i in range(len(models)): im = indexMap[i] for j in range(i + 1, len(models)): jm = indexMap[j] dm.set(i, j, fulldm.get(im, jm)) # Next we run the clustering algorithm from EnsembleMatch.nmrclust import NMRClust nmrc = NMRClust(dm) # Next reformat the results into something useful. # "clusterInfo" is a list of whose elements are 2-tuples of # (cluster representative model, list of models in cluster) clusterInfo = [] for cid, c in enumerate(nmrc.clusters): members = c.members() mList = [] for member in members: m = models[member] m.clusterId = cid m.clusterRep = 0 mList.append(m) rep = nmrc.representative(c) m = models[rep] m.clusterRep = len(members) clusterInfo.append((m, mList)) for mj, mi in sameAs.iteritems(): mj.clusterId = mi.clusterId mj.clusterRep = 0 rep, members = clusterInfo[mi.clusterId] rep.clusterRep += 1 members.append(mj) return clusterInfo # Here's an example on how you can use the function above. # Run this script with: # chimera --nogui --silent cluster.py # where cluster.py is the name of this file if __name__ == "chimeraOpenSandbox": import chimera # Use following line to fetch from RCSB models = chimera.openModels.open("1n9u", type="PDBID") # Use following line for your own data file # models = chimera.openModels.open("1N9U.pdb", type="PDB") ci = clusterModels(models) print len(models), "models ->", len(ci), "clusters" for rep, members in ci: print rep.oslIdent(), "(%d member)" % len(members) for m in members: print '\t', m.oslIdent()