# This module handles input and output of PDB files. # It deals only with the file format, not with the # structure of any objects in such files beyond # atoms and residues. # # Written by: Konrad Hinsen # Last revision: 1996-3-5 # """This module provides classes that represent PDB files and configurations contained in PDB files. It provides access to PDB files on two levels: low-level (line by line) and high-level (residues and atoms). Class PDBFile: -------------- Creation: PDBFile(filename, mode) mode is 'r' for reading and 'w' for writing, default is 'r' If the filename ends with '.Z' or '.gz', it is automatically compressed or uncompressed using 'compress' or 'gzip'. The filename may contain the abbreviations '~' or '~user' to refer to home directories. Low-level access methods: readLine() returns the next non-blank line. The return value is a tuple whose first element contains the line type identifier. For lines of type 'ATOM' and 'HETATM', the remaining fields are encoded as numbers and strings; the position is returned as an object of class Vector. For other lines, the second element of the tuple contains the part of the line after the type field as a single string. writeComment(text) writes the supplied text into one or several comment lines. Each line of the text is prefixed with 'REMARK' and written to the file. nextResidue(name) must be called in between writeAtom calls to indicate the beginning of a new residue. The argument is the residue identifier. writeAtom(name, position) writes a HETATM line for an atom with the given name and position. The position is given as an instance of class Vector. High-level access methods: readConfiguration() reads all ATOM and HETATM lines, ignoring anything else. The result is a list of Residue objects (see below). readSequence() returns a list of residue identifiers. writeConfiguration(conf) writes the supplied configuration (a list of Residue objects) to the file. This method can be called several times with parts of a configuration; however, each residue must be written in a single call. General methods: close() *must* be called after writing to a file, should be called after reading to avoid memory leaks. Class PDBConfiguration: ----------------------- Creation: PDBConfiguration(filename) to generate a configuration by reading a file (see PDBFile). PDBConfiguration(residue_list) to generate a configuration from a given list of Residue objects (see below). PDBConfiguration() to generate an empty configuration. Access (assuming c is an instance of PDBConfiguration): len(c) returns the number of Residues in the configuration c[i] returns residue number i. c.writeToFile(filename) writes the configuration to a file. c.addResidue(residue) adds a residue to the configuration. Class Residue: -------------- Creation: Residue(name, atoms) to generate a residue with identifier 'name' and atoms from the list 'atoms', whose elements must be instances of class Atom (see below). Residue(name) to generate an empty residue with a given identifier. Access (assuming r is an instance of Residue): len(r) returns the number of atoms in the residue r[i] returns atom number i. r.addAtom(atom) adds an atom to the residue. Class Atom: ----------- Creation: Atom(name, position) to generate an atom with identifier 'name' at a given position. The position must be an instance of class Vector. Modification methods: moveBy(distance) adds the given distance (instance of class Vector) to the position. moveTo(position) changes the position. Example: -------- conf = PDBConfiguration('example.pdb') print conf for residue in conf: for atom in residue: print atom new_conf = PDBConfiguration(conf) new_conf.writeToFile('test.pdb') """ from TextFile import TextFile from Vector import Vector import string class Residue: def __init__(self, name, atoms = None): self.name = name if atoms: self.atoms = atoms else: self.atoms = [] def __len__(self): return len(self.atoms) def __getitem__(self, item): return self.atoms[item] def __str__(self): s = 'Residue ' + self.name + ':\n' for atom in self.atoms: s = s + ' ' + `atom` + '\n' return s __repr__ = __str__ def addAtom(self, atom): self.atoms.append(atom) class Atom: def __init__(self, name, position): self.name = name self.position = position def __str__(self): return 'Atom ' + self.name + ' at ' + str(self.position) __repr__ = __str__ def moveBy(self, distance): self.position = self.position + distance def moveTo(self, position): self.position = position class PDBFile: def __init__(self, filename, mode = 'r'): self.file = TextFile(filename, mode) self.atom_num = 0 self.abs_res_num = 0 self.output = string.lower(mode[0]) == 'w' def readLine(self): while 1: line = self.file.readline() if not line: return ('END','') if line[-1] == '\n': line = line[:-1] line = string.strip(line) if line: break type, line = (string.strip(line[:6]), line[6:]) if type == 'ATOM' or type == 'HETATM': return (type, string.atoi(line[:5]), string.strip(line[6:10]), string.strip(line[11:15]), string.atoi(line[16:20]), Vector(string.atof(line[24:32]), string.atof(line[32:40]), string.atof(line[40:48])), string.atof(line[48:54]), string.atof(line[54:60])) else: return (type, line) def readConfiguration(self): resnum = -1 conf = [] while 1: line = self.readLine() if line[0] == 'END': break if line[0] == 'ATOM' or line[0] == 'HETATM': if line[4] != resnum: resnum = line[4] residue = Residue(line[3]) conf.append(residue) residue.addAtom(Atom(line[2], line[5])) return conf def readSequence(self): resnum = -1 sequence = [] while 1: line = self.readLine() if line[0] == 'END': break if line[0] == 'ATOM': if line[4] > resnum: resnum = line[4] sequence.append(line[3]) return sequence def writeComment(self, text): while text: eol = string.find(text,'\n') if eol == -1: eol = len(text) self.file.write('REMARK %s \n' % text[:eol]) text = text[eol+1:] def nextResidue(self, resid): self.resid = resid self.abs_res_num = self.abs_res_num + 1 def writeAtom(self, name, position): self.atom_num = self.atom_num + 1 self.file.write('HETATM%5d ' % (self.atom_num)) if name[0:2] not in self.two_letter_elements: name = ' ' + name self.file.write(string.ljust(name,4)[0:4] + ' ') self.file.write(string.ljust(self.resid,3)[0:3] + ' ') self.file.write(' %4d ' % self.abs_res_num) self.file.write('%8.3f' % position[0]) self.file.write('%8.3f' % position[1]) self.file.write('%8.3f' % position[2]) self.file.write('%6.2f' % 0.) self.file.write('%6.2f' % 0.) self.file.write('\n') two_letter_elements = ['HE', 'LI', 'BE', 'NE', 'NA', 'MG', 'AL', 'SI', 'CL', 'AR', 'CA', 'SC', 'TI', 'CR', 'MN', 'FE', 'CO', 'NI', 'CU', 'ZN', 'GA', 'GE', 'AS', 'SE', 'BR', 'KR', 'RB', 'SR', 'ZR', 'NB', 'MO', 'TC', 'RU', 'RH', 'PD', 'AG', 'CD', 'IN', 'SN', 'SB', 'TE', 'XE', 'CS', 'BA', 'HF', 'TA', 'RE', 'OS', 'IR', 'PT', 'AU', 'HG', 'TL', 'PB', 'BI', 'PO', 'AT', 'RN', 'FR', 'RA', 'RF', 'HA', 'SG', 'NS', 'HS', 'MT', 'LA', 'CE', 'PR', 'ND', 'PM', 'SM', 'EU', 'GD', 'TB', 'DY', 'HO', 'ER', 'TM', 'YB', 'LU', 'AC', 'TH', 'PA', 'NP', 'PU', 'AM', 'CM', 'BK', 'CF', 'ES', 'FM', 'MD', 'NO', 'LR'] def writeConfiguration(self, data): for residue in data: self.nextResidue(residue.name) for atom in residue.atoms: self.writeAtom(atom.name, atom.position) def close(self): if self.output: self.file.write('END\n') self.file.close() class PDBConfiguration: def __init__(self, conf = None): if type(conf) == type(''): file = PDBFile(conf) self.residues = file.readConfiguration() file.close() elif conf: self.residues = conf else: self.residues = [] def __len__(self): return len(self.residues) def __getitem__(self, item): return self.residues[item] def __str__(self): s = '' for residue in self.residues: s = s + str(residue) return s def writeToFile(self, filename): file = PDBFile(filename, 'w') file.writeConfiguration(self.residues) file.close() def addResidue(self, residue): self.residues.append(residue)