#!/usr/bin/perl
#
# should work with MacPERL and Perl on UNIX or MacOSX
#                  without adjusting
#
# USAGE:
$usage = " xyz2xyzBQ_0.2UNIX.pl <filename> \n";
#
# DEMO xyz input:
# 6
# COMENT xyz2xyzBQ_0.2UNIX.pl
# C   1.20  1.20 0.00 X
# C   1.00 -1.00 0.00 X
# C  -1.00  1.00 0.00 X
# C  -1.00 -1.00 0.00 X
# H   2.0   2.00 0.00
# H  -2.0  -2.00 0.00
#
# ... should produce
# 7
# COMENT xyz2xyzBQ_0.2UNIX.pl
# BQ    0.050000   0.050000   0.000000
#  C    1.200000   1.200000   0.000000
#  C    1.000000  -1.000000   0.000000
#  C   -1.000000   1.000000   0.000000
#  C   -1.000000  -1.000000   0.000000
# H     2.000000   2.000000   0.000000
# H    -2.000000  -2.000000   0.000000
#
# FUNCTION: read xyz file with marks on Atoms forming aromatic system
# FUNCTION: write xyz file with additional GhostAtoms, BQ.
#REM file name may be given as option or in a dialogue
#
# VERSION: 0.3  Alk Dransfeld 2002-12-16
$version = "xyz2xyzBQ V. 0.3 2002-12-16  by A. Dransfeld";
# HISTORY: V 0.2 2000-05-20  by A. Dransfeld";
# HISTORY: V 0.1 1996-03-22 Alk Dransfeld
#
$verbose = 1;
#
# ... MAIN at end ...
#--START subroutines required - - - - - - - - - -
#
# REQUIRED SUB set_platform
# options ()
#
# REQUIRED SUB read_my_file
# options ( $ file_name ,  $platform )
#
# REQUIRED SUB xyz2xyzBQ
# options ( _string_of_xyz_file_content_ )
#
#--END subroutines required - - - - - - - - - -
#
#__Setings and Defaults
#
# if no filename try to use
 $fn_xyz_default = 'test.xyz';
#
 $fn_xyzBQ = 'nics.xyz';
#
# ------- SUBROUTINES(start) ----------------------
#
if($verbose > 10){print "Loading SUBroutine set_platform \n";}
#_  SUBROUTINE
sub set_platform {
   # SUB_NAME:       set_platform
   # SUB_AUTHOR:     A. Dransfeld
   # SUB_VERSION:    0.2 1999-01-12
   # SUB_FUNCTION:   find out the platform UNIX/LINUX or MacOS
   # SUB_RETURNS:    $platf = the platform "MacOS" or "UNIX"
   # SUB_RETURNS:    or a message starting with "ERROR ..."
   #REM setting the $dir_separator is NOT included
   #GLOBAL: $verbose , $mode
   local ($assumed_platf) = @_;
   # check the assumed platform ... if it is provided
   local ($platf);
   local ($msg);
   local ($flag) = "FLAG-SUB_set_platform";
   #
   $_ = `pwd`; # the path comprises one ore more dir_separator's
   #
   if(defined $assumed_platf) {
      if($assumed_platf eq "MacOS") {
         #REM with dir_separator  = ":"
         if(!/\:/) { # in the path of the working dir
            if($verbose > 0) {
print "WARNING it is very UNLIKELY \nthat you are ";
print "running the script under MacOS. \nTherefore, ";
print "the platform specification is switched ";
print "to UNIX/LINUX\n";
            }
            $platf = "UNIX"; #REM with dir_separator  = "/";
         } else {
            $platf = $assumed_platf;
         }
#        #_END if(!/\:/)
      }
#     #_END if($assumed_platf eq "MacOS")
      if($assumed_platf eq "UNIX" ){
         #REM with dir_separator  = "/";
         if(!/\//) {  # in the path of the working dir
            if($verbose > 0) {
print "WARNING it is very UNLIKELY \nthat you are ";
print "running the script under UNIX/LINUX. \nTherefore,";
print "the platform specification is switched to MacOS\n";
            }
            $platf = "MacOS"; #REM with dir_separator  = ":"
         } else {
            $platf = $assumed_platf;
         }
#        #_END if(!/$dir_separator/)
      }
#     #_END if($assumed_platf eq "UNIX")
   } else {
      # $platform is undefined -> attempt to set automatically
      # the path comprises one or more platformspecific
      # dir_separator's
      if (/\:/) { #REM std File separator in MacOS
         #REM may include / BUT as character of MacOS file/folder NAME
         if($verbose > 0) {print "..assuming platform = MacOS\n";}
         $platf = "MacOS";
      } elsif (/\//) {
         if (/\>/) { # forbidden for UNIX file path
            $msg = "ERROR the chevron, >,\nis NOT a legal character";
            $msg=$msg."in a UNIX file name OR path";
            return $msg;
         }
         if (/\:/) { # forbidden for UNIX file path
            $msg = "ERROR the dots, :,\nis NOT a legal character";
            $msg=$msg."in a UNIX file name OR path";
            return $msg;
         }
         if (/^\-/) { # forbidden for UNIX file path
            $msg = "ERROR the minus, -, at head\nis NOT legal ";
            $msg=$msg."in a UNIX file name OR path";
            return $msg;
         }
         if($verbose > 0) {
            print "..assuming platform = UNIX/LINUX\n";
         }
         $platf = "UNIX"; #REM with dir_separator  = "/";
      } else {
         if($mode eq "interactive") {
            print "ERROR platform specification \[MacOS OR UNIX\]\n";
            print "could NOT be set automatically by SHARCivar\n";
            print "PLEASE enter either UNIX or MacOS \nHERE:>";
            $_ = <>;chomp;
            if ($_ eq "MacOS") {
               $platf = "MacOS";
            } elsif ( ($_ eq "UNIX") || ($_ eq "LINUX") ) {
               $platf = "UNIX";
            } else {
               return "ERROR could not figure out the platform";
            }
         } else {
            return "ERROR could not figure out the platform";
         }
#        #_END if($mode eq "interactive")
      }
#     #_END if(/ _specific_dir_separator_ /)
   }
   #_END if(defined $assumed_platf)
}
#_END sub set_platform
#
if($verbose > 10){print "Loading SUBroutine read_my_file \n";}
#_  SUBROUTINE ----------------- ========== - - - - - -
sub read_my_file {
   # SUB_NAME:       read_my_file
   # SUB_AUTHOR:     A. Dransfeld
   # SUB_VERSION:    0.1 1999-03-30
   # SUB_FUNCTION:   find out the name of the file to read and read it
   # SUB_INPUT:      nothing
   # SUB_RETURNS:    read file content is returned as  ONE string
   # SUB_RETURNS:    or return a message starting with "ERROR ..."
   # SUB REQUIRES global $platform
   #GLOBAL: $verbose
   local ($msg , $n_arguments);
   local ($xyz_file_content);
   #
   local ($flag) = "FLAG-SUB_read_my_file";
#test print "HALoo $flag\n";
   if ( ($platform eq "MacOS") || ($platform eq "UNIX")) {
      #REM fine then lets go on
   } else {
      print "The platfrom MUST be define as MacOS OR UNIX\nBYE\n";
      exit;
   }
   if ($platform eq "UNIX") {
      $n_arguments = $#ARGV;
   } else {
      $n_arguments = -1; # MacOS has different Option handling
   }
#test print "Number of provided arguments >$n_arguments\<\n";
   if ($n_arguments >= 0) {
      $fn = $ARGV[0]; #REM save the first for later
   } else {
      print "enter >q< to quit ...\n";
      print "enter >h< for help ..\n";
      print "Please Enter xyz File Name :>";
      $fn = <STDIN>; chop($fn);$fn =~ s/^\s+//; $fn =~ s/\s+$//;
      if($fn eq "q") {
         return "ERROR quit sequence requested";
      }
      if($fn eq "h") {
print "The atoms which are assigned to the AROMATIC system\n";
print "have three coordinates in Angstroem AND additionally\n";
print "a letter, e.g. X, as 4th coordinate\n";
print "The atoms which are NOT part of the AROMATIC system\n";
print "have ONY three coordinates in Angstroem\n";
return "ERROR help was required";
      }
      if($fn eq "") {$fn = $fn_xyz_default;};
   }
   #_END if ($n_arguments >= 0)
#
   if (-T $fn) {
      print "Adding NICS ghost to XYZ in file >$fn\<\n";
      print "Result should be in $fn_xyzBQ\n";
   } else {
      $msg="ERROR the File >$fn\<\ncan NOT be used\n";
      $msg=$msg."(may be file name lags extension \.xyz)\nBYE\n";
      return $msg;
   }
   #_END if (-T $fn)
   $/ = "NEVEROCCURS"; open(OUTPUT,"<$fn");  # assume it is readable
   $xyz_file_content = <OUTPUT>; close (OUTPUT); $/ = "\n";
   return $xyz_file_content; # "So far so good\n";
}
#_END sub read_my_file
#
if($verbose > 10){print "Loading SUBroutine xyz2xyzBQ \n";}
#_  SUBROUTINE ----------------- ========== - - - - - -
sub xyz2xyzBQ {
   # SUB_NAME:       xyz2xyzBQ
   # SUB_AUTHOR:     A. Dransfeld
   # SUB_VERSION:    V. 0.2 2000-05-20
   # SUB_FUNCTION:   transform xyz format+marked atoms
   # SUB_FUNCTION:   to xyz+NICS-ghost
   # SUB_INPUT:      content of xyz file
   # SUB_RETURNS:    content of xyz+NICSprobe, BQ, coordinates
   # SUB_RETURNS:    or return a message starting with "ERROR ..."
   #GLOBAL: $verbose
   local ($xyz) = $_[0];
   local (@lines, $n_atoms);
   local ($sum_atoms)  = 0;
   local ($sum_points) = 0;
   local (%a);
   local (%x,%y,%z);    # x, y and z coordinate of the real atoms
   local ($x_angs) = 0; # x, y and z coordinate of the additional BQ
   local ($y_angs) = 0; # all in Angstroem
   local ($z_angs) = 0;
   local ($ai , $xi , $yi , $zi);
   local ($xyzBQ);
   #
   local ($flag) = "FLAG-SUB_xyz2xyzBQ";
#test print "HALoo $flag\n $xyz \n<<END of XYZ>>\n";
   #_ parse file in lines
   @lines = split(/\n/, $xyz);
   #
   $n_atoms     = $lines[0];
   $n_atoms =~ s/\s+//g; #pre- OR proceeding Blanks
   $commet_line =  $lines[1];
   if($verbose > 9){print "Comments >$commet_line\<\n";};
   #
   $max_lines = $n_atoms + 1; #REM indexes start at 0 NOT at 1
   foreach ($i=2;$i<=$max_lines;++$i){
      $_ = $lines[$i];
      s/\,/ /g; # replace commas by blanks
      s/\s+$//; # erase preceeding blanks
#test print "Reading 1+$i > $_\<\n";
      #REM differnetiate between MARKED and unMARKED atoms
      #
if(/(\D{1,2})\s+(\-*\d*\.\d+)\s+(\-*\d*\.\d+)\s+(\-*\d*\.\d+)$/){
         #
         $a{$i} = $1; $x{$i} = $2; $y{$i} = $3; $z{$i} = $4;
#test print "Atom >$1\<\n xx $2 yy $3 zz $4\n";
         ++$sum_atoms; # NO ++ $sum_points
         #
#patch_2000-05-20,AD(start)
#_Assuming that ALL numbers are exponential, when the FIRST is expon.
      } elsif (/(\D{1,2})\s+(\-*\d*\.\d+E[\+|\-]\d{2})\s+/){
         $postmatch = $'; $a{$i} = $1; $x{$i} = 0+$2;
#test print "First EXPONENTIEL $1 $2 $x{$i}\nPostmatch $postmatch\n";
         @words = split(/\s+/,$postmatch); # $n_words = $#words;
#test print "N_words = $n_words\n";
         $y{$i} = 0+$words[0]; $z{$i} = 0+$words[1];
         if($#words > 1) {  # consider as 'aromatic atom'
            $x_angs = $x_angs + $x{$i};
            $y_angs = $y_angs + $y{$i};
            $z_angs = $z_angs + $z{$i};
            ++$sum_atoms; ++$sum_points;
         } else { # only y- and z- coord. of 'normal atom'
            ++$sum_atoms; # NO ++ $sum_points
         }
      } elsif (/\-*\d*\.\d+E[\+|\-]\d{2}/){
         $msg = "ERROR mixture of exponential and NONexp Numbers";
         return $msg;
#patch_2000-05-20,AD(end)
         #
      } else {
         @words = split;
         $a{$i} = $words[0];
$x{$i} = 0+$words[1]; $y{$i} = 0+$words[2]; $z{$i} = 0+$words[3];
         $x_angs = $x_angs + $x{$i};
         $y_angs = $y_angs + $y{$i};
         $z_angs = $z_angs + $z{$i};
         ++$sum_atoms; ++$sum_points;
      }
   }
   #_END foreach($i=2;($n_atoms+2)<$i;++$i)
#_assembling A + B + C
   if ($sum_points > 3) {
      ++$n_atoms; # additional one ghost
      #
      #_A)__ N atoms, comment
      $xyzBQ = "$n_atoms\n";
      $xyzBQ=$xyzBQ."$commet_line\n";
      #
      #_B)__ additional ghost
#test print "sumX $x_angs sumY $y_angs sumZ $z_angs\n";
      $ai = "BQ";
      $xi = $x_angs / $sum_points ;
      $yi = $y_angs / $sum_points ;
      $zi = $z_angs / $sum_points ;
      #REM $b = sprintf("%.2f",$a) ;
      $linei = sprintf("%2s  %10.6f %10.6f %10.6f\n",$ai,$xi,$yi,$zi);
      $xyzBQ=$xyzBQ."$linei";
      if($verbose > 3){
         print "ASSEMBLING the xyzBQ file\n";
         if($verbose > 6){
            print "using $sum_points atoms forming the\n";
            print "potentially aromatic system.\n";
            if ($verbose > 9){
               print "The NICS probe is added at position:\n";
               print "X= $xi Y= $yi Z= $zi\n";
            }
         }
      }
      #_END chatting
   } else {
      #IGNORE the fly shit
      print "Sorry\n ";
      print " EIHER not enough (<4) atoms marked ";
      print "(4. \'coordinate\' = letter X)\n        ";
      print "for construction of the NICS probe in xyz file\n";
      print " OR xyz file is nonexisten/empty\n";
      print "$version\nBYE\n";
      return "ERROR not enough atoms marked";
   }
   #_END if ($sum_points > 3)
   #
   #_C) real atoms
   foreach ($i=2;$i<=$max_lines;++$i){
      $ai = $a{$i};
      $xi = $x{$i}; $yi = $y{$i}; $zi = $z{$i};
      $linei = sprintf("%2s  %10.6f %10.6f %10.6f\n",$ai,$xi,$yi,$zi);
      $xyzBQ=$xyzBQ."$linei";
   }
   #_END foreach($i=2;($n_atoms+2)<$i;++$i)
   #
   return $xyzBQ;
}
#_END sub
# ------------------ SUBROUTINES(end) ------------------------
#
# === MAIN ===
#--
$platform = &set_platform();
#--
#test print "MessagE_from_SuB_set_platform:\nXX>$platform\<XX\n";
if (substr($platform,0,5) eq "ERROR") {
   print "Sorry, I MUST know the platform\!\n...EXITing script\n";
   exit;
} elsif ($platform eq "MacOS"){ $dir_separator = ":";
} elsif ($platform eq "UNIX" ){ $dir_separator = "/";
} # else never should occur
#--
$file_content = &read_my_file();
if (substr($file_content,0,5) eq "ERROR") {
   print $file_content;
   exit;
}
#--
#test print "MessagE_from_SuB_read_my_file >$file_content\<\n";
#test $file_content = "42\nCommoment\nAL 0.1 2.3 4.5\n";
#--
$file_contentBQ = &xyz2xyzBQ($file_content);
#--
#test print "MessagE_from_SuB_xyz2xyzBQ >$file_contentBQ\<\n";
#ADD2 check for ERROR... -> noOUTfile mentinoed if ERR
#test
 print "Result IS:\n$file_contentBQ\n---\n";
#
open(OUT,">$fn_xyzBQ") || die "Can not oopen $fn_xyzBQ\n";
print OUT "$file_contentBQ\n";
close(OUT);
print "Result should be in File > $fn_xyzBQ <\n";
#
print "Finished this $version\nwith VERBOSE >$verbose\<\n";

