CCL Home Page
Up Directory CCL xyz2bs.c
/* Written by Jan Labanowski (jkl@ccl.net) in 1997 for the common good.
 * converts files in the Xmol (XYZ) format to the format required by xbs
 * just compile by giving the following command:
 *    cc -o xyz2bs xyz2bs.c -lm
 * and then use as
 *    xyz2bs myfile.xyz
 * which will create myfile.bs and eventually myfile.mv (if more than one frame
 */

#include 
#include 
#include 
#include 
#include 

#define BONDSCALE 0.1   /* bond radius is BONDSCALE * sum of rvdw for atoms */

#define ATOMSCALE 0.3   /* atom radius = ATOMSCALE * rvdw */

#define BONDMINLENGTH 0.1 /* bond not showed if shorter than BONDMINLENGTH
                             times sum of covalent radii */

#define BONDMAXLENGTH 1.5 /* bond not showed if longer than bondmaxlenghth
                             times sum of covalent radii */

typedef struct ATOM_DAT {  /* structure to hold data about atom types */
  char symb[4];           /* chemical symbol */
  float rvdw;             /* van der Waals radius */
  float rcov;             /* covalent radius */
  float r;                /* Red color intensity in RGB values (0 to 1) */
  float g;                /* Green color intensity in RGB values (0 to 1) */
  float b;                /* Blue color intensity in RGB values (0 to 1) */
  } ATOM_DAT;

static int n_at_types = 0;


/*symb rvdw rcov    R      G      B */
static ATOM_DAT atom_data[] = {
"C",  1.65, 0.77, 0.184, 0.310, 0.310,
"H",  1.25, 0.32, 0.973, 0.973, 1.000,
"Li", 1.86, 1.23, 0.698, 0.133, 0.133,
"B",  0.00, 0.82, 0.000, 0.392, 0.000,
"N",  1.50, 0.75, 0.000, 0.749, 1.000,
"O",  1.35, 0.73, 0.804, 0.361, 0.361,
"F",  1.40, 0.72, 0.933, 0.910, 0.667,
"Na", 1.12, 1.54, 0.941, 0.973, 1.000,
"Mg", 0.87, 1.36, 0.133, 0.545, 0.133,
"Al", 1.70, 1.18, 0.184, 0.310, 0.310,
"Si", 1.93, 1.16, 0.933, 0.910, 0.667,
"P",  1.75, 1.06, 1.000, 0.647, 0.000,
"S",  1.85, 1.02, 0.678, 1.000, 0.184,
"Cl", 1.80, 0.99, 0.000, 0.392, 0.000,
"K",  1.44, 2.03, 1.000, 0.078, 0.576,
"Ca", 1.18, 1.74, 0.184, 0.310, 0.310,
"Sc", 1.37, 1.44, 0.184, 0.310, 0.310,
"Ti", 1.37, 1.32, 0.184, 0.310, 0.310,
"V",  1.37, 1.22, 0.184, 0.310, 0.310,
"Cr", 1.37, 1.18, 0.184, 0.310, 0.310,
"Mn", 1.37, 1.17, 0.184, 0.310, 0.310,
"Fe", 0.90, 1.17, 1.000, 0.647, 0.000,
"Co", 0.88, 1.16, 0.737, 0.561, 0.561,
"Ni", 0.69, 1.15, 0.737, 0.561, 0.561,
"Cu", 0.72, 1.17, 0.737, 0.561, 0.561,
"Zn", 0.74, 1.25, 0.737, 0.561, 0.561,
"Ga", 1.37, 1.26, 0.737, 0.561, 0.561,
"Br", 1.95, 1.14, 0.737, 0.561, 0.561,
"Rb", 1.58, 2.16, 0.737, 0.561, 0.561,
"Ag", 1.27, 1.34, 0.184, 0.310, 0.310,
"I",  2.15, 1.33, 0.627, 0.125, 0.941,
"Cs", 1.84, 2.35, 0.737, 0.561, 0.561,
"Au", 1.37, 1.34, 0.933, 0.910, 0.667,
"Bq", 1.00, 0.00, 1.000, 0.078, 0.576,
"X",  1.00, 0.00, 1.000, 0.078, 0.576,
"Xx", 1.00, 0.00, 1.000, 0.078, 0.576,
"LST",0.00, 0.00, 0.000, 0.000, 0.000}; /* do not delete this line ever !!! */


/* ====================================================================== */
/* given the chemical symbol, returns the element of atom_data table,
 *  or -1 if match was not found
 */
int find_type(symbol)
char *symbol;
{
 int i, type;

 for(i = 0; i < n_at_types; i++) {
   if(strcmp(symbol, atom_data[i].symb) == 0) {
     return(i);
     }
   }
 return(-1);
}


/* ====================================================================== */
/* skips blanks at the beginning of the string and moves chars to the left,
 *  Returns number of blanks found
 */
int skip_front_blanks(str)
char *str;
{
 int i, k;
 k = 0;
 while((isspace(str[k]) != 0) && (str[k] != '\0')) k++;
 
 if(k != 0) {
   i = 0;
   while(str[i] = str[i+k]) i++;
   }
 return(k);
}  

/* ====================================================================== */
/* skips blanks at the end of the string and returns number of blanks found
 */
int skip_end_blanks(str)
char *str;
{
 int i, k, l;
 l = strlen(str);
 k = l-1;
 for(k = l-1; k >= 0; k--) {
   if(isspace(str[k]) == 0) {
     break;
     }
   }
 str[++k] = '\0';
 return(l-k);
 }

/* ====================================================================== */
/* extract a word (i.e., sequence of nonblank characters) from the string,
 * starting at character pointer by start. start is then automatically
 * advanced to point at the next character after the word. Returns length
 * of extracted word, or zero if nothing extracted
 */
int extract_word(str, k, word)
char *str, *word;
int *k;
{
 int i, j;
 char ch;
 i = *k;
 word[0] = '\0';
 j = 0;
 while((ch = str[i++]) != '\0') {
   if(isspace(ch) == 0) {
     word[j++] = ch;
     word[j] = '\0';
     if(isspace(str[i]) != 0) {
       *k = i;
       return(j);
       }
     }
   }
  *k = i-1;
  return(j);
}

/* ====================================================================== */
main(argc, argv)
int argc;
char **argv;
{
 char inpname[200], outname[200], outmove[200];
 FILE *inpf, *outbs, *outmv;
 int i, j, k, l, n, n_spec, n_frames, n_atoms, n_bonds, ord, ord1, ord2, saved;
 int at_ord[10000];
 int spec_ord[100];
 float d, dx, dy, dz, dcov;
 float b_min[100], b_max[100], b_rad[100], b_col[100];
 float x[10000], y[10000], z[10000];
 int b_at1[10000], b_at2[10000];
 unsigned ip;
 char *pch;
 char line[500];
 char sym[3];
 char *title[1000];
 char fields[20][20];


 /* find how many atom types are available */
 n_at_types = 0;
 while(strcmp(atom_data[n_at_types++].symb, "LST") != 0);

 if(argc != 2) {
   fprintf(stderr, "Usage: xyz2bs filename.[xyz]\n");
   exit (2);
   }

 strcpy(inpname, argv[1]);  /* copy file name to inpfilename */
 skip_front_blanks(inpname);
 skip_end_blanks(inpname);
 strcpy(outname, inpname);  /* copy file name to inpfilename */
 strcpy(outmove, inpname);  /* copy file name to inpfilename */
 

 pch = strchr(inpname, '.');   /* find position of a period */
 if(pch == NULL) {
   pch = strchr(inpname, '\0');
   }

 ip = pch - inpname;  /* point at period */
 strcpy(inpname + ip, ".xyz");  /* input file */
 strcpy(outname + ip, ".bs");   /* output file for 1st frame */
 strcpy(outmove + ip, ".mv");   /* output file for frames 2 to last */

 if(((inpf = fopen(inpname, "r")) == NULL) &&
    ((inpf = fopen(argv[1], "r")) == NULL)) {
   fprintf(stderr,"Could open neither %s nor %s\n", inpname, argv[1]);
   exit(2);
   }

 n = -1;
 n_spec = -1;
 n_atoms = 1000;
 n_frames = -1;
 while (fgets(line, 490, inpf) != NULL) {
   n++;
   skip_front_blanks(line);
   skip_end_blanks(line);
fprintf(stderr,"%d: |%s|\n", n, line);

   if((n == 0) || (n == (n_frames+1)*(n_atoms + 2))) {
     if(line[0] == '\0') {  /* terminate, empty line where atom count */
       n--;
       break;
       }
     sscanf(line, "%d", &k); /* get number of atoms */
fprintf(stderr, "Nat=%d\n", k);
     if((n_frames > 0) && (k != n_atoms)) {
       fprintf(stderr,
       "Number of atoms in frame %d does not match previous frame\n",
       n_frames + 1);
       exit(2);
       }
     n_atoms = k;
     n_frames++;
     }
   else if((n == 1) || (n == n_frames*(n_atoms + 2) + 1)) { /* get titles */
     l = strlen(line) + 1;
     title[n_frames] = malloc(sizeof(char)*l);
     strcpy(title[n_frames], line);    /* save title */
fprintf(stderr,"title[%d]=|%s|\n", n_frames, line);
     }
   else {
     k = n % (n_atoms + 2) - 1;        /* atom number */
     l = 0;
     j = 0;
     while(extract_word(line, &j, fields[l]) != 0) {
fprintf(stderr,"Word[%d]=|%s|\n", l, fields[l]);
       l++; /* extract columns */
       }
     if(l < 4) {   /* if line has less columns than symbol and coordinates */
       fprintf(stderr, 
       "Wrong line in XYZ file for atom %d in frame %d\n", k, n_frames + 1);
       exit(2);
       }
     sym[0] = fields[0][0];             /* get atom symbol */;
     sym[1] = fields[0][1];
     sym[2] = '\0';
     if(isalpha(sym[0]) == 0) {
       fprintf(stderr, "Wrong atom symbol at line %d\n", n);
       exit(2);
       }
     if(isalpha(sym[1]) == 0) {
       sym[1] = '\0';
       }
     if(islower(sym[0]) != 0) {
       sym[0] = toupper(sym[0]);
       }
     if(isupper(sym[1]) != 0) {
       sym[1] = tolower(sym[1]);
       }
     
     x[n] = atof(fields[1]);
     y[n] = atof(fields[2]);
     z[n] = atof(fields[3]);
     if((ord = find_type(sym)) < 0) {
       fprintf (stderr,
"Atom symbol %s is not defined in this perl script. Add this symbol to\n",
       sym);
       fprintf(stderr,
       "the table at the top of the program, recompile,  and rerun.\n");
       exit(2);
       }
     at_ord[n] = ord;
     if(n_frames == 0) {
       saved = 0;
       for(i = 0; i <= n_spec; i++) {
         if(spec_ord[i] == ord) {
           saved = 1;
           break;
           }
         }
       if(saved == 0) {
         n_spec++;
         spec_ord[n_spec] = ord;
         }
       }
     }
   }

 if(n != (n_frames+1)*(n_atoms + 2) - 1) {
   fprintf(stderr, 
   "Number of atoms and number of coordinates do not much in last frame\n");
   exit(2);
   }
    
/* find bonds which are present in the 1st frame. */

 k = 0;
 n_bonds = -1;
 for (i = 2; i < n_atoms + 1; i++) {      /* scan all atom pairs */
   for(j = i+1; j < n_atoms + 2; j++) {
 
     /*  calculate distance between atoms; */
     dx = x[i] - x[j];
     dy = y[i] - y[j];
     dz = z[i] - z[j];
     d = sqrt(dx*dx + dy*dy + dz*dz);

     /* get atom pointers in the table */
     ord1 = at_ord[i];
     ord2 = at_ord[j];

     /* calculate sum of covalent radii for the atoms */
     if((atom_data[ord1].rcov > 0.001) &&
        (atom_data[ord2].rcov > 0.001)) {
        dcov = atom_data[ord1].rcov + atom_data[ord2].rcov;
        }
     else {
        dcov = 0.0;
        }

     if(d > 1.5*dcov) { /* skip nonbonded atom pairs */
       continue;
       }

     /* skip bonds already saved */
     saved = 0;
     for(l = 0; l <= n_bonds; l++) {
       if(((b_at1[l] == ord1) && (b_at2[l] == ord2)) ||
          ((b_at1[l] == ord2) && (b_at2[l] == ord1))) {
         saved = 1;
         continue;
         }
       }

     if(saved == 1) {  /* do not save is alrderady there */
       continue;
       }

     /* save unique bond */
     n_bonds++;
     b_at1[n_bonds] = ord1;
     b_at2[n_bonds] = ord2;
     b_min[n_bonds] = BONDMINLENGTH * dcov;
     b_max[n_bonds] = BONDMAXLENGTH * dcov;
     b_rad[n_bonds] = BONDSCALE * dcov;
     b_col[n_bonds] = 0.5;
     }
   }
  
 /* Now print the stuff */

 if((outbs = fopen(outname, "w")) == NULL) {
   fprintf(stderr, "Cannot create %s\n", outname);
   exit(2);
   }

 fprintf(outbs, "* %s\n",title[0]);

 /* print coordinates for the 1st frame */
 for(i = 1; i <= n_atoms; i++) {
   fprintf(outbs, "atom   %s   %11.6f %11.6f %11.6f\n", atom_data[at_ord[i+1]].symb,
        x[i+1], y[i+1], z[i+1]);
   }
 fprintf(outbs, "\n");

 /*  print specs for atoms */
 for (i = 0; i <= n_spec; i++) {
   ord = spec_ord[i];
   fprintf(outbs, "spec   %s  %6.3f  %5.2f %5.2f %5.2f\n", atom_data[ord].symb,
        ATOMSCALE*atom_data[ord].rvdw,  atom_data[ord].r, atom_data[ord].g,
        atom_data[ord].b);
   }
 fprintf(outbs, "\n");

 /* print bond specs */
 for(i = 0; i <= n_bonds; i++) {
   fprintf(outbs, "bonds  %s   %s  %6.3f  %6.3f  %5.2f  %5.2f\n",
         atom_data[b_at1[i]].symb, atom_data[b_at2[i]].symb, b_min[i],
         b_max[i], b_rad[i], b_col[i]);
    }

 fclose(outbs);

 /* check if additional frames are present in XYZ file, and print them */
 if(n_frames > 0) {
   if((outmv = fopen(outmove, "w")) == NULL) {
     fprintf(stderr, "Cannot create %s\n", outmove);
     exit(2);
     }
   for(i = 1; i <= n_frames; i++) {
     fprintf(outmv, "frame    %s\n", title[i]);
     for(j = i * (n_atoms + 2) + 2;  j <= (i+1) * (n_atoms + 2)-1; j++) {
       fprintf(outmv, "%9.4f  %9.4f  %9.4f\n", x[j], y[j], z[j]);
       }
     }
   fclose(outmv);
   }
 }
/* 
# COMMENTS...
# The Xmol is a program originally conceived in the Minnesota Supercomputer
# center and available from anon.ftp ftp.msc.edu (pub/xmol).
# The xbs is the program written by Michael Methfessel, and retrieved from
# http://ihp02.ihp-ffo.de/~msm/.
# the xbs is an X-windows program written in C language which displays
# molecules. The xbs uses its own format for representing molecules.
# For this reason this script is used to convert from the popular XYZ format
# (which is more populer) to the format required by xbs.
# 
# Short example of XYZ format:
#  6
#  This is methanol with normal modes
#  C      0.050065    0.665047    0.000000    0.200000   -0.020000    0.000000
#  O      0.050065   -0.767876    0.000000   -0.140000    0.100000    0.000000
#  H     -0.912101   -1.005927    0.000000    0.340000   -1.740000    0.000000
#  H      1.090132    0.995415    0.000000   -0.040000    0.800000    0.000000
#  H     -0.439468    1.081620    0.886605   -0.180000   -0.080000   -0.180000
#  H     -0.439468    1.081620   -0.886605   -0.180000   -0.080000    0.180000
#
#   1st line -- number of atoms
#   2nd line -- some title (or empty line)
#   3rd line to the end -- atom symbol and XYZ cartesian coordinates in the
#            first columns.  If there are more than 4 columns, the columns
#            additional columns represent:
#                if 5 columns, last column is atomic charges
#                if 7 columns, last column is normal modes vectors
# The XYZ file may contain more then one frame for animations. In this case
# the subsequent frames follow exactly the same format as above.
# 
# 
# The xbs can use one or 2 files which have extensions .bs and .mv.
# The .bs represents the information about the 1st frame and this file
# has to be present. It has the following format
# Short example of xbs format: for a first frame
#  * methanol
# atom   C      0.050065    0.665047    0.000000
# atom   O      0.050065   -0.767876    0.000000
# atom   H     -0.912101   -1.005927    0.000000
# atom   H      1.090132    0.995415    0.000000
# atom   H     -0.439468    1.081620    0.886605
# atom   H     -0.439468    1.081620   -0.886605
# 
# spec   C   0.495   0.70 
# spec   O   0.405   0.40
# spec   H   0.375   1.00
# 
# bonds  C   O   0.150   2.250   0.15   0.70
# bonds  C   H   0.109   1.635   0.11   0.70
# bonds  O   H   0.105   1.575   0.11   0.70
#
# Only lines which start with the words "atom", "spec", and "bonds".
# 
# It is assumed that length is given in Angstroms.
# are used by the program. The other lines are skipped over and not used.
# What the tagged lines mean
#   atom symbol  xcoor ycoor zcoor
#   spec symbol  radius color
#   bonds symbol1 symbol2 min-length max-length radius color.
# The color can be either one number or 3 numbers. One number is the
# degree of gray between 0.0 -- 1.0 (0.0 black, 1.0 white).
# Gray is nice to print to the b/w laser printer. You can also use the
# RGB values, i.e., 3 values between 0.0 and 1.0 which will give the
# saturation with Red, Green, and Blue. E.g. using 1.0 0.0 0.0 will give
# you red color, 1.0 1.0 0.0 bright yellow, 1.0 0.65 0.0 orange,
# and 0.65 0.17 0.17 brown. You can also use a gualified (i.e., official)
# Xwindows color name which is defined in the file /usr/lib/X11/rgb.txt, 
# e.g., spec 0.5 blue
#
*/
Modified: Thu Oct 16 16:00:00 1997 GMT
Page accessed 4776 times since Sat Apr 17 22:05:29 1999 GMT