#!/usr/local/bin/nawk -f # generate pdb ATOM records from car file # rename atom names if necessary # known chemical species: H N C O S L (lone pair) # test for iupac conformant atom names function iupac(atomname) { if ( atomname ~ /^[HCNOSL]([ABGDEZH][12]?)?$/ || atomname == "OXT" ) return(1) else return(0) } # test if first two characters indicate atomic species function check(atomname) { if ( length(atomname) == 3 && match(atomname,"[HCNOSL]") == 1 ) return(1) else return(0) } # get atomic species from atom name function species(atomname, i) { i=match(atomname,"[HCNOSL]") if (i == 0) { printf("car2pdbatom.awk: %s is not of species H C N O S\n",atomname) exit(1) } return(substr(atomname,i,1)) } BEGIN{ if ( FILENAME == "-" ) { print "usage:car2pdbatom.awk " exit } output_format="ATOM %5i %4s %3s %4s %8.3f%8.3f%8.3f%6.2f%6.2f \n" # ^ atom number # ^ atom name # ^ residue name # ^ residue number # ^ xyz coordinates # ^ occupancy # ^ temperature factor # first atom number minus one atomnumber=0 } # each line consisting of 9 fields, field 2,3 and 4 being numbers, is an atom record (NF == 9) && ($2 ~ /^-?[0-9][0-9]*\.[0-9][0-9]*(E-?[0-9][0-9]*)?$/) && ($3 ~ /^-?[0-9][0-9]*\.[0-9][0-9]*(E-?[0-9][0-9]*)?$/) && ($4 ~ /^-?[0-9][0-9]*\.[0-9][0-9]*(E-?[0-9][0-9]*)?$/)\ { atomnumber++ atomname=$1 residuename=$5 residuenumber=$6 x=$2 + 0.0 y=$3 + 0.0 z=$4 + 0.0 occupancy=1.00 temperature=0.00 # truncate residuename to 3 characters residuename = substr(residuename,1,3) # atom count within residue if ( count[residuenumber] == "" ) count[residuenumber] = 1 else count[residuenumber]++ # reformat atom name if necessary # reformat not necessary if name conforms to IUPAC # or if first 2 columns of 4 character name indicate atomic species if ( ! iupac(atomname) && ! check(atomname) ) atomname = species(atomname) count[residuenumber] pad = 3-length(atomname) if (pad == 1) atomname = atomname " " else if (pad == 2) atomname = atomname " " # output pdb atom records printf(output_format,atomnumber,atomname,residuename,residuenumber,x,y,z,occupancy,temperature) } END{ print "TER" }