import pandas as pd
from Bio.PDB import PDBParser
from Bio.PDB.SASA import ShrakeRupley
import Bio.PDB


def parsePDB(path):
    # full path to the pdb file
    
    
    # kdh dict
    kdh_dict = {"asp" : -3.5,
                "glu" : -3.5,
                "asn" : -3.5,
                "gln" : -3.5,
                "lys" : -3.9,
                "arg" : -4.5,
                "his" : -3.2,
                "gly" : -0.4,
                "pro" : -1.6,
                "ser" : -0.8,
                "thr" : -0.7,
                "cys" : 2.5,
                "met" : 1.9,
                "mse" : 1.9,
                "ala" : 1.8,
                "val" : 4.2,
                "ile" : 4.5,
                "leu" : 3.8,
                "phe" : 2.8,
                "trp" : -0.9,
                "tyr" : -1.3,}
    
    # create a dataframe
    column_names = ["type", "atomnumber", "atom", "resn", "chain", "resi", "coords", "occupancy", "bfactor", "sasa", "kdh", "phi", "psi"]

    # create dict
    t = {}
    
    for i in column_names:
        t[i] = []


    # calculate sasa
    # from: https://biopython.org/docs/dev/api/Bio.PDB.SASA.html
    p = PDBParser(QUIET=1)
    struct = p.get_structure("pdb", path)
    sr = ShrakeRupley()
    sr.compute(struct, level="S")

    # calculate phi/psi
    for model in Bio.PDB.PDBParser(QUIET=1).get_structure("t", "1trn.pdb") :
        dihedral = {}
        for chain in model :
            dihedral[str(chain.id)] = {}
            poly = Bio.PDB.Polypeptide.Polypeptide(chain)

            phi_psi = poly.get_phi_psi_list()
            for res_index, residue in enumerate(poly) :
                dihedral[str(chain.id)][residue.id[1]] = phi_psi[res_index]
    

    # read the pdb information
    
    with open(path) as f:
        for line in f.readlines():
            if line[0:4] == "ATOM" or line[0:6] == "HETATM":
                
                # sasa for this atom
                try:
                    sasa = struct[0][line[21].strip()][int(line[22:26].strip())][line[12:16].strip()].sasa
                except:
                    sasa = float("NaN")

                    
                # dihedral angles for this atom (even though, this is a residue property) 
                angles = dihedral[line[21].strip()][int(line[22:26].strip())]
                
                if angles[0] == None:
                    phi = "NaN"
                else:
                    phi = float(angles[0])
                if angles[1] == None:
                    psi = "NaN"
                else:
                    psi = float(angles[1])
                    
                    
                # kdh value
                if line[17:20].strip().lower() in kdh_dict:
                    kdh = kdh_dict[line[17:20].strip().lower()]
                else:
                    kdh = float("NaN")
                    
                # get the information from the pdb file itself
                atom = [line[0:6].strip(),  # atom type
                       int(line[6:11].strip()), # atom number
                       line[12:16].strip(), # atom
                       line[17:20].strip(), # resn
                       line[21].strip(), # chain
                       int(line[22:26].strip()), # resi 
                       [float(line[30:38].strip()), float(line[38:46].strip()), float(line[46:54].strip())], # coords
                       float(line[54:60].strip()), # occupancy
                       float(line[60:66].strip()), # bfactor
                       sasa, # sasa
                       kdh, # kdh
                       phi, # phi
                       psi # psi
                      ]

                # append list to dict
                for i, val in enumerate(atom):
                    t[column_names[i]].append(val)

    
    #print(t)
    df = pd.DataFrame(t, columns = column_names)
    return df

x = parsePDB("1trn.pdb")
x.to_csv("result.csv")
print(x)