CCL Home Page
Up Directory CCL g9xinp2xyz
#!/usr/local/bin/perl -w

# this takes Gaussian input file with geometry given as
# Zmat, and converts it to the XYZ format (cartesians)
# This is an educational script, hence many comments.
# The script is a filter, i.e., should be used as
#   cat name.inp | g9xinp2xyz.pl > name.xyz
# where name.inp is the Gaussian input file and
# name.xyz is a file with cartesian coordinates a la XMol
# It is assumed that G9x input file has a format:
#   % part (e.g., checkpoint specs)
#   # keywords/links part
#     empty line
#   title lines
#     empty line
#   charge multiplicity line
#   zmatrix lines
#     empty line or "Variables"
#   symbolic variables for Z-matrix (if ZMAT has symbolic parameters)
#     empty line or "Constants"
#   symbolic constants for Z-matriz (if all references were not resolved yet)
#     empty line or line starting with "-"
#
#  Written by Jan Labanowski in July 1997 
#

$keywords = "";
while($line = ) { # read next lines of keywords (route section)
  chop($line);
  if($line =~ /^\s*\%/) {
    next;
    }
  elsif($line =~ /\S/) {
    $keywords .= ' ' . $line;
    }
  elsif(($line !~ /\S/) && ($keywords eq "")) { # if empty line, but no # yet
    next;
    }
  else {
    last;
    }
  }

$title = "";
while($line = ) { # read title line(s)
  chop($line);
  if($line =~ /\S/) {
    $title .= $line . ' ';
    }
  else {
    last;
    }
  }

$line = ;   # this should be charge multiplicity line
chop($line);


$natoms = 0;    #number of atoms found initialized
while ($line = ) {  # read molecule description lines
  chop($line);
  $line =~ tr/a-z/A-Z/;
  $line =~ s/^\s+//;   # get rid of front and back spaces
  $line =~ s/$\s+//;
  if(($line !~ /\S/) ||
     ($line =~ /variab/i) ||
     ($line =~ /consta/i)) {  # if blank line or end of Zmat
    last;              # exit the loop
    }
  if($natoms == 0) {   # first atom is only an atomic symbol
    $symbol[$natoms] = $line;
    # no parameters necessary for atom 1
    ($bf[$natoms], $af[$natoms], $tf[$natoms]) =  (1, 1, 1);
    }
  else {
    # split the line into individual fields:
    (@fields) =  split(/\s+/, $line);   # split line into fields

    # assumed most standard z-matrix form, so check number of entries
    if((($#fields != 2) && ($natoms == 1)) &&
       (($#fields != 4) && ($natoms == 2)) &&
       (($#fields != 6) && ($natoms >= 3))) {
      printf STDERR "Error in Z-Matrix for atom %d\n", $natoms+1;
      exit(2);   # abort 
      }

    for($i = $#fields+1; $i <8; $i++) {  # fill up with empty strings
      $fields[$i] = "";
      }

    # parse fields  symbol_i  j  d_ij   k   theta_ijk  l  phi_ijkl 
    ($symbol[$natoms],  # atom symbol
     $bondat[$natoms],  # bonded atom
     $blen[$natoms],    # bond length
     $angat[$natoms],   # atom in an angle
     $angle[$natoms],   # angle
     $torat[$natoms],   # last atom in torsion angle
     $tors[$natoms])    # torsion angle
                          = (@fields);
    # initialize flags for required parameters to "NotFound" status (i.e., 0)
    if($natoms == 1) {
      ($bf[$natoms], $af[$natoms], $tf[$natoms]) =  (0, 1, 1);
      }
    elsif($natoms == 2) {
      ($bf[$natoms], $af[$natoms], $tf[$natoms]) =  (0, 0, 1);
      }
    else {
      ($bf[$natoms], $af[$natoms], $tf[$natoms]) =  (0, 0, 0);
      }
    
    # check if parameters are given as numbers (i.e., sign, possibly followed
    # by digits, possibly followed by period, possibly followed by digits
    # and containing at least one digit total. Set flag if number.
    if(($blen[$natoms] =~ /^[-+]?[0-9]*\.?[0-9]*$/) &&
       ($blen[$natoms] =~ /[0-9]+/)) {
      $bf[$natoms] = 1;
      }
    if(($angle[$natoms] =~ /^[-+]?[0-9]*\.?[0-9]*$/) &&
       ($angle[$natoms] =~ /[0-9]+/)) {
      $af[$natoms] = 1;
      }
    if(($tors[$natoms] =~ /^[-+]?[0-9]*\.?[0-9]*$/) &&
       ($tors[$natoms] =~ /[0-9]+/)) {
      $tf[$natoms] = 1;
      }

    # Atom numbers must be all digits
    if(($bondat[$natoms] !~ /^[0-9]+$/) ||
       (($angat[$natoms] !~ /^[0-9]+$/) && ($natoms > 1)) ||
       (($torat[$natoms] !~ /^[0-9]+$/) && ($natoms > 2))) {
      printf STDERR "Error in atom definition for %d\n", $i+1;
      exit(2); 
      }
    }
  $natoms++;
  }

# check any symbolic parameters are given in the Z-mat
$parms_given = 0;
for($i = 0; $i < $natoms; $i++) {
  if(($bf[$i] == 0) || 
     ($af[$i] == 0) || 
     ($tf[$i] == 0)) {
    $parms_given = 1;     # symbolic parameters present
    last;
    }
  }

if($parms_given == 1) { # read in symbolic parameters (variables) when present
  while($line = ) {
    $line =~ s/^\s//;        # trailing blanks out!
    $line =~ s/\s$//;
    $line =~ tr/a-z/A-Z/;    # convert to uppercase
    if(($line !~ /\S/) ||
       ($line =~ /const/i)) {  # if blank line or end of Variables
      last;              # exit the loop
      }

    ($name, $value) = @fields = split(/[ =,]+/, $line);  # retrieve fields
    if($#fields != 1) {
      die "Wrong format on Variables assignment line\n";
      }
    if(($value !~ /^[-+]?[0-9]*\.?[0-9]*$/) ||
       ($value !~ /[0-9]+/)) {
      die "Wrong number format of Variables assignment line\n";
      }


    # substitute parameter in Z-Matrix. The symbolic parameter name is
    # allowed to be preceded with a sign, so take this into account
    $parameter_found = 0;
    for($i = 0; $i < $natoms; $i++) {
      if($bf[$i] == 0) {
        if($blen[$i] =~ /^([-+]?)$name$/) {
          $sign = $1 . "1.0";
          $blen[$i] = $sign * $value;
          $parameter_found = 1;
          $bf[$i] = 1;
          }
        }

      if($af[$i] == 0) {
        if($angle[$i] =~ /^([-+]?)$name$/) {
          $sign = $1 . "1.0";
          $angle[$i] = $sign * $value;
          $parameter_found = 1;
          $af[$i] = 1;
          }
        }

      if($tf[$i] == 0) {
        if($tors[$i] =~ /^([-+]?)$name$/) {
          $sign = $1 . "1.0";
          $tors[$i] = $sign * $value;
          $parameter_found = 1;
          $tf[$i] = 1;
          }
        }
      }
    # parameter, if given, must appear at least once in Z-Matrix
    if($parameter_found == 0) {
      die "Parameter $name = $value does not appear in Z-Matrix\n";
      }

    }
  }

# check if all symbolic paramemters were reslolved
$parms_given = 0;
for($i = 0; $i < $natoms; $i++) {
  if(($bf[$i] == 0) || 
     ($af[$i] == 0) || 
     ($tf[$i] == 0)) {
    $parms_given = 1;    # symbolic parameters present
    last;
    }
  }
# if all parameters are not resloved, read in the Constants section
if($parms_given == 1) { # read in symbolic parameters (variables) when present
  while($line = ) {
    $line =~ s/^\s//;        # trailing blanks out!
    $line =~ s/\s$//;
    $line =~ tr/a-z/A-Z/;    # convert to uppercase
    if($line =~ /const/i) {
      next;
      }
    if(($line !~ /\S/) ||
       ($line =~ /^\s*\-/)) {  # if blank line or - at the front
      last;              # exit the loop
      }
    ($name, $value) = @fields = split(/[ =,]+/, $line);  # retrieve fields
    if($#fields != 1) {
      die "Wrong format of Constants assignment line\n";
      }
    if(($value !~ /^[-+]?[0-9]*\.?[0-9]*$/) ||
       ($value !~ /[0-9]+/)) {
      die "Wrong number format on Constants assignment line\n";
      }

    # substitute parameter in Z-Matrix. The symbolic parameter name is
    # allowed to be preceded with a sign, so take this into account
    $parameter_found = 0;
    for($i = 0; $i < $natoms; $i++) {
      if($bf[$i] == 0) {
        if($blen[$i] =~ /^([-+]?)$name$/) {
          $sign = $1 . "1.0";
          $blen[$i] = $sign * $value;
          $parameter_found = 1;
          $bf[$i] = 1;
          }
        }

      if($af[$i] == 0) {
        if($angle[$i] =~ /^([-+]?)$name$/) {
          $sign = $1 . "1.0";
          $angle[$i] = $sign * $value;
          $parameter_found = 1;
          $af[$i] = 1;
          }
        }

      if($tf[$i] == 0) {
        if($tors[$i] =~ /^([-+]?)$name$/) {
          $sign = $1 . "1.0";
          $tors[$i] = $sign * $value;
          $parameter_found = 1;
          $tf[$i] = 1;
          }
        }
      }

    # parameter, if given, must appear at least once in Z-Matrix
    if($parameter_found == 0) {
      die "Parameter $name = $value does not appear in Z-Matrix\n";
      }
    }
  }

# check if all symbolic paramemters were reslolved
$parms_given = 0;
for($i = 0; $i < $natoms; $i++) {
  if(($bf[$i] == 0) || 
     ($af[$i] == 0) || 
     ($tf[$i] == 0)) {
    $parms_given = 1;    # symbolic parameters present
    last;
    }
  }
if($parms_given == 1) {
  print STDERR
  "The follwing symbolic parameters are not resolved in Z-matrix:\n";
  for($i = 0; $i < $natoms; $i++) {
    if($bf[$i] == 0) {
      printf STDERR "   %s for bond length for atom %d\n", $blen[$i], $i;
      }
    if($af[$i] == 0) {
      printf STDERR "   %s for angle value for atom %d\n", $angle[$i], $i;
      }
    if($tf[$i] == 0) {
      printf STDERR "   %s for torsion for atom %d\n", $tors[$i], $i;
      }
    }
  exit(2);
  }


# output cartesian coordinates in XYZ XMol format
print STDOUT "$natoms\n";
print STDOUT "$title\n";
for ($i = 0; $i < $natoms; $i++) {
  oneatom($i+1, $bondat[$i], $blen[$i], $angat[$i], $angle[$i],
          $torat[$i], $tors[$i]);
  # note that XMol chooses quite interesting orientation
  # of the molecule
  printf STDOUT "%-3s  %12.6f  %12.6f  %12.6f\n",
    $symbol[$i],  $Zcoor[$i+1],  $Xcoor[$i+1], -$Ycoor[$i+1];
  }



# -- Takes Z-mat line, i.e., 
#        atomnumber, ibond, bondlen, iangle, bondangle, itors, torsangle
#    and updates NONLOCAL arrays Xcoor, Ycoor, Zcoor which hold cartesian
#    coordinates. It is assumed that atom numbering starts from 1
#    (not from 0 as usually in C and perl)
#    USES nonlocal array Xcoor, Ycoor, Zcoor where it stores coordinates
#    It is assumed that angles are given in DEGREEs and bondlen in the same
#    units in which cartesian coordinates will be given.
#                  
#             ibond -- iangle
#              /             \ 
#          atnum             itors
#
#  CAUTIONS -- contrary to the UNIX/PERL/C tradition, the
#  arrays XYZ start from subscript 1 (not 0), and XYZ[0] are left unused,
#  in other words, array element numbers correspond to atom numbers.

sub oneatom {
  # variables are declared local, so they do not interfere with
  # variables outside this subroutine
  local ($atnum,
         $ibond, $bondlen,
         $iangle, $bondangle,
         $itors, $torsangle);
  local ($deg2r);
  local ($rinv, $aux);
  local ($xa, $ya, $za);
  local ($xb, $yb, $zb);
  local ($xc, $yc, $zc);
  local ($xx, $yy, $zz);

  $atnum = $_[0];

  if($atnum == 2) {
    ($atnum, $ibond, $bondlen) = @_;
    }
  elsif ($atnum == 3) {
    ($atnum, $ibond, $bondlen, $iangle, $bondangle) = @_;
    }
  elsif ($atnum > 3) {
    ($atnum, $ibond, $bondlen, $iangle, $bondangle, $itors, $torsangle) = @_;
    }

  $deg2r = atan2(1.0, 1.0)*4.0/180.0;

  # first atom in the origin of coordinate system      
  if($atnum == 1) {
    $Xcoor[1] = 0.0;
    $Ycoor[1] = 0.0;
    $Zcoor[1] = 0.0;
    return;
    }

  # second atom on Z-axis, bondlen from the first atom
  if($atnum == 2) {
    $Xcoor[2] = 0.0;
    $Ycoor[2] = 0.0;
    $Zcoor[2] = $bondlen;
    return;
    }

  # bonded atom coordinates
  $x0 = $Xcoor[$ibond];
  $y0 = $Ycoor[$ibond];
  $z0 = $Zcoor[$ibond];

  if($atnum == 3) {    # third atom placed on the Oxz plane
    $Xcoor[3] = $x0+$bondlen*sin($bondangle*$deg2r);
    $Ycoor[3] = 0.0;
    $aux = $bondlen*cos($bondangle*$deg2r);
    if($ibond == 1) {
      $Zcoor[3] = $z0+$aux;
      }
    elsif ($ibond == 2) {
      $Zcoor[3] = $z0-$aux;
      }
    return;
    }


  # vector from j-->k
  $xx = $Xcoor[$iangle] - $Xcoor[$ibond];
  $yy = $Ycoor[$iangle] - $Ycoor[$ibond];
  $zz = $Zcoor[$iangle] - $Zcoor[$ibond];

  # inverted vector length
  $rinv = 1.0/sqrt($xx*$xx + $yy*$yy + $zz*$zz);

  # unit vector ibond--->iangle
  $xa = $xx*$rinv;
  $ya = $yy*$rinv;
  $za = $zz*$rinv;

  # vector from ibond--->itors  
  $xb = $Xcoor[$itors] - $Xcoor[$ibond];
  $yb = $Ycoor[$itors] - $Ycoor[$ibond];
  $zb = $Zcoor[$itors] - $Zcoor[$ibond];

# unit vector ibond--->itors
  $xc = $ya*$zb - $za*$yb;
  $yc = $za*$xb - $xa*$zb;
  $zc = $xa*$yb - $ya*$xb;

  $rinv = 1.0/sqrt($xc*$xc + $yc*$yc + $zc*$zc);

  $xc = $xc*$rinv;
  $yc = $yc*$rinv;
  $zc = $zc*$rinv;

  $xb = $yc*$za - $zc*$ya;
  $yb = $zc*$xa - $xc*$za;
  $zb = $xc*$ya - $yc*$xa;

  $zz = $bondlen*cos($bondangle*$deg2r);
  $xx = $bondlen*sin($bondangle*$deg2r)*cos($torsangle*$deg2r);
  $yy = $bondlen*sin($bondangle*$deg2r)*sin($torsangle*$deg2r);

  $Xcoor[$atnum] = $x0 + $xa*$zz + $xb*$xx + $xc*$yy;
  $Ycoor[$atnum] = $y0 + $ya*$zz + $yb*$xx + $yc*$yy;
  $Zcoor[$atnum] = $z0 + $za*$zz + $zb*$xx + $zc*$yy;

  return;
  }


Modified: Sun Jul 13 16:00:00 1997 GMT
Page accessed 9751 times since Sat Apr 17 21:22:59 1999 GMT