import xlrd
wb = xlrd.open_workbook("JSO_MasterList_121212.xls")
sh = wb.sheet_by_name(u'Plasmids')
plasmid_number = sh.col_values(0)
plasmid_number.pop(0)
plasmid_descrip= sh.col_values(1)
plasmid_descrip.pop(0)
plasmid_selection= sh.col_values(4)
plasmid_selection.pop(0)
plasmidnum = []
plasmidsel = []
plasmiddesc = []
for value in plasmid_number:
if value != '':
value = int(value)
JSOplasmid = "JSO"+str(value)
plasmidnum.append(JSOplasmid)
index = plasmid_number.index(value)
JSOselection = plasmid_selection[index]
JSOselection = str(JSOselection)
plasmidsel.append(JSOselection)
JSOdesc = plasmid_descrip[index]
JSOdesc = str(JSOdesc)
plasmiddesc.append(JSOdesc)
dict2 = dict(zip(plasmidnum, plasmidsel))
dict3 = dict(zip(plasmidnum, plasmiddesc))
from Bio import SeqIO
handle = open("JSO_Plasmids.fasta", "rU")
id = []
seq = []
for record in SeqIO.parse(handle, "fasta") :
temp1 = str(record.id)
id.append(temp1)
temp2 =str(record.seq)
seq.append(temp2)
handle.close()
dict1 = dict(zip(id, seq))
dict4 = {}
dict5 = {}
for key in dict3:
fullstring = dict3[key].lower()
if "tdh3" in fullstring:
dict4[key] = "tdh3"
if "adh1" in fullstring:
dict4[key] = "adh1"
if "gal1" in fullstring:
dict4[key] = "gal1"
if "tpk2" in fullstring:
dict4[key] = "tpk2"
if "cyc1" in fullstring:
dict4[key] = "cyc1"
if "nop7" in fullstring:
dict4[key] = "nop7"
if "msn2" in fullstring:
dict5[key]= "msn2"
if "crz1" in fullstring:
dict5[key]= "crz1"
if "tod6" in fullstring:
dict5[key]= "tod6"
if "dot6" in fullstring:
dict5[key]= "dot6"
if "sfp1" in fullstring:
dict5[key]= "sfp1"
if "stb3" in fullstring:
dict5[key] = "stb3"
fluor = open("fluorescent_sequences.fasta", "rU")
fluorescence = {}
for record in SeqIO.parse(fluor, "fasta") :
temp1 = str(record.id)
temp2 = str(record.seq)
fluorescence[temp1] = temp2
fluor.close
dict6 = {}
for key in dict1:
for item in fluorescence:
if fluorescence[item] in dict1[key]:
dict6[key] = item
merged_list = []
for key in dict3:
if key in dict1:
seq = dict1[key]
else:
seq = "none"
if key in dict4:
prom = dict4[key]
else:
prom = "none"
if key in dict5:
gene = dict5[key]
else:
gene = "none"
if key in dict6:
fluores = dict6[key]
else:
fluores = "none"
temp = [key, dict3[key], dict2[key], seq, prom, gene, fluores]
merged_list.append(temp)
import csv
resultfile = open("compiled_info_plasmid.txt", "wb")
wr = csv.writer(resultfile, dialect = "excel")
for item in merged_list:
wr.writerow(item)