拆分PDB结构,使每条链成为一个单独的pdb结构
说明:parse_pdb_split_chain()函数用于将PDB中包含的链拆开,包括对应的alpha-helix、beta-sheet、links信息 (不足之处在于由于暂时没有遇到含有TURN的PDB结构,所以暂时没有获取对应的turn信息)。
import gzip import re def parse_pdb_split_chain(pdbgzFile,outpath): with gzip.open(pdbgzFile,rb) as pdbF: pdbcontent = pdbF.read() pdbcontent = pdbcontent.decode() pattern = re.compile(ATOMs+d+s*w+s*[A-Z]{3,4}s*(w)s*.+ ,re.MULTILINE) match = list(set(list(pattern.findall(pdbcontent)))) for chain in match: patt_helix = re.compile((HELIXs+w+s*w+s*[A-Z]{3,4}s*+chain+s*.+) ,re.MULTILINE) patt_sheet = re.compile((SHEETs+w+s*w+s*w+s*[A-Z]{3,4}s*+chain+s*.+) ,re.MULTILINE) patt_links = re.compile((LINKs+w+s*w+s*+chain+s*.+) ,re.MULTILINE) patt_cha = re.compile((ATOMs+d+s*w+s*[A-Z]{3,4}s*+chain+s*.+) ,re.MULTILINE) match_helix = patt_helix.findall(pdbcontent) match_sheet = patt_sheet.findall(pdbcontent) match_links = patt_links.findall(pdbcontent) match_cha = patt_cha.findall(pdbcontent) outfile = outpath+pdbgzFile.split(/)[-1].split(.)[0][3:].upper()+_+chain+.pdb outF = open(outfile,w) for i in range(len(match_helix)): ## alpha-helix outF.write(match_helix[i]+ ) for j in range(len(match_sheet)): ## beta-sheet outF.write(match_sheet[j]+ ) for k in range(len(match_links)): ## Links outF.write(match_links[k]+ ) for l in range(len(match_cha)): ## ATOM outF.write(match_cha[l]+ ) outF.write(TER ) outF.write(END ) outF.close()
pdbgzFile:指的是PDB的压缩文件。
outpath: 指的是结果的输出目录。
输出文件的命名方式是:PDBID_chainID.pdb