/* Written by Jan Labanowski (jkl@ccl.net) in 1997 for the common good. * converts files in the Xmol (XYZ) format to the format required by xbs * just compile by giving the following command: * cc -o xyz2bs xyz2bs.c -lm * and then use as * xyz2bs myfile.xyz * which will create myfile.bs and eventually myfile.mv (if more than one frame */ #include #include #include #include #include #define BONDSCALE 0.1 /* bond radius is BONDSCALE * sum of rvdw for atoms */ #define ATOMSCALE 0.3 /* atom radius = ATOMSCALE * rvdw */ #define BONDMINLENGTH 0.1 /* bond not showed if shorter than BONDMINLENGTH times sum of covalent radii */ #define BONDMAXLENGTH 1.5 /* bond not showed if longer than bondmaxlenghth times sum of covalent radii */ typedef struct ATOM_DAT { /* structure to hold data about atom types */ char symb[4]; /* chemical symbol */ float rvdw; /* van der Waals radius */ float rcov; /* covalent radius */ float r; /* Red color intensity in RGB values (0 to 1) */ float g; /* Green color intensity in RGB values (0 to 1) */ float b; /* Blue color intensity in RGB values (0 to 1) */ } ATOM_DAT; static int n_at_types = 0; /*symb rvdw rcov R G B */ static ATOM_DAT atom_data[] = { "C", 1.65, 0.77, 0.184, 0.310, 0.310, "H", 1.25, 0.32, 0.973, 0.973, 1.000, "Li", 1.86, 1.23, 0.698, 0.133, 0.133, "B", 0.00, 0.82, 0.000, 0.392, 0.000, "N", 1.50, 0.75, 0.000, 0.749, 1.000, "O", 1.35, 0.73, 0.804, 0.361, 0.361, "F", 1.40, 0.72, 0.933, 0.910, 0.667, "Na", 1.12, 1.54, 0.941, 0.973, 1.000, "Mg", 0.87, 1.36, 0.133, 0.545, 0.133, "Al", 1.70, 1.18, 0.184, 0.310, 0.310, "Si", 1.93, 1.16, 0.933, 0.910, 0.667, "P", 1.75, 1.06, 1.000, 0.647, 0.000, "S", 1.85, 1.02, 0.678, 1.000, 0.184, "Cl", 1.80, 0.99, 0.000, 0.392, 0.000, "K", 1.44, 2.03, 1.000, 0.078, 0.576, "Ca", 1.18, 1.74, 0.184, 0.310, 0.310, "Sc", 1.37, 1.44, 0.184, 0.310, 0.310, "Ti", 1.37, 1.32, 0.184, 0.310, 0.310, "V", 1.37, 1.22, 0.184, 0.310, 0.310, "Cr", 1.37, 1.18, 0.184, 0.310, 0.310, "Mn", 1.37, 1.17, 0.184, 0.310, 0.310, "Fe", 0.90, 1.17, 1.000, 0.647, 0.000, "Co", 0.88, 1.16, 0.737, 0.561, 0.561, "Ni", 0.69, 1.15, 0.737, 0.561, 0.561, "Cu", 0.72, 1.17, 0.737, 0.561, 0.561, "Zn", 0.74, 1.25, 0.737, 0.561, 0.561, "Ga", 1.37, 1.26, 0.737, 0.561, 0.561, "Br", 1.95, 1.14, 0.737, 0.561, 0.561, "Rb", 1.58, 2.16, 0.737, 0.561, 0.561, "Ag", 1.27, 1.34, 0.184, 0.310, 0.310, "I", 2.15, 1.33, 0.627, 0.125, 0.941, "Cs", 1.84, 2.35, 0.737, 0.561, 0.561, "Au", 1.37, 1.34, 0.933, 0.910, 0.667, "Bq", 1.00, 0.00, 1.000, 0.078, 0.576, "X", 1.00, 0.00, 1.000, 0.078, 0.576, "Xx", 1.00, 0.00, 1.000, 0.078, 0.576, "LST",0.00, 0.00, 0.000, 0.000, 0.000}; /* do not delete this line ever !!! */ /* ====================================================================== */ /* given the chemical symbol, returns the element of atom_data table, * or -1 if match was not found */ int find_type(symbol) char *symbol; { int i, type; for(i = 0; i < n_at_types; i++) { if(strcmp(symbol, atom_data[i].symb) == 0) { return(i); } } return(-1); } /* ====================================================================== */ /* skips blanks at the beginning of the string and moves chars to the left, * Returns number of blanks found */ int skip_front_blanks(str) char *str; { int i, k; k = 0; while((isspace(str[k]) != 0) && (str[k] != '\0')) k++; if(k != 0) { i = 0; while(str[i] = str[i+k]) i++; } return(k); } /* ====================================================================== */ /* skips blanks at the end of the string and returns number of blanks found */ int skip_end_blanks(str) char *str; { int i, k, l; l = strlen(str); k = l-1; for(k = l-1; k >= 0; k--) { if(isspace(str[k]) == 0) { break; } } str[++k] = '\0'; return(l-k); } /* ====================================================================== */ /* extract a word (i.e., sequence of nonblank characters) from the string, * starting at character pointer by start. start is then automatically * advanced to point at the next character after the word. Returns length * of extracted word, or zero if nothing extracted */ int extract_word(str, k, word) char *str, *word; int *k; { int i, j; char ch; i = *k; word[0] = '\0'; j = 0; while((ch = str[i++]) != '\0') { if(isspace(ch) == 0) { word[j++] = ch; word[j] = '\0'; if(isspace(str[i]) != 0) { *k = i; return(j); } } } *k = i-1; return(j); } /* ====================================================================== */ main(argc, argv) int argc; char **argv; { char inpname[200], outname[200], outmove[200]; FILE *inpf, *outbs, *outmv; int i, j, k, l, n, n_spec, n_frames, n_atoms, n_bonds, ord, ord1, ord2, saved; int at_ord[10000]; int spec_ord[100]; float d, dx, dy, dz, dcov; float b_min[100], b_max[100], b_rad[100], b_col[100]; float x[10000], y[10000], z[10000]; int b_at1[10000], b_at2[10000]; unsigned ip; char *pch; char line[500]; char sym[3]; char *title[1000]; char fields[20][20]; /* find how many atom types are available */ n_at_types = 0; while(strcmp(atom_data[n_at_types++].symb, "LST") != 0); if(argc != 2) { fprintf(stderr, "Usage: xyz2bs filename.[xyz]\n"); exit (2); } strcpy(inpname, argv[1]); /* copy file name to inpfilename */ skip_front_blanks(inpname); skip_end_blanks(inpname); strcpy(outname, inpname); /* copy file name to inpfilename */ strcpy(outmove, inpname); /* copy file name to inpfilename */ pch = strchr(inpname, '.'); /* find position of a period */ if(pch == NULL) { pch = strchr(inpname, '\0'); } ip = pch - inpname; /* point at period */ strcpy(inpname + ip, ".xyz"); /* input file */ strcpy(outname + ip, ".bs"); /* output file for 1st frame */ strcpy(outmove + ip, ".mv"); /* output file for frames 2 to last */ if(((inpf = fopen(inpname, "r")) == NULL) && ((inpf = fopen(argv[1], "r")) == NULL)) { fprintf(stderr,"Could open neither %s nor %s\n", inpname, argv[1]); exit(2); } n = -1; n_spec = -1; n_atoms = 1000; n_frames = -1; while (fgets(line, 490, inpf) != NULL) { n++; skip_front_blanks(line); skip_end_blanks(line); fprintf(stderr,"%d: |%s|\n", n, line); if((n == 0) || (n == (n_frames+1)*(n_atoms + 2))) { if(line[0] == '\0') { /* terminate, empty line where atom count */ n--; break; } sscanf(line, "%d", &k); /* get number of atoms */ fprintf(stderr, "Nat=%d\n", k); if((n_frames > 0) && (k != n_atoms)) { fprintf(stderr, "Number of atoms in frame %d does not match previous frame\n", n_frames + 1); exit(2); } n_atoms = k; n_frames++; } else if((n == 1) || (n == n_frames*(n_atoms + 2) + 1)) { /* get titles */ l = strlen(line) + 1; title[n_frames] = malloc(sizeof(char)*l); strcpy(title[n_frames], line); /* save title */ fprintf(stderr,"title[%d]=|%s|\n", n_frames, line); } else { k = n % (n_atoms + 2) - 1; /* atom number */ l = 0; j = 0; while(extract_word(line, &j, fields[l]) != 0) { fprintf(stderr,"Word[%d]=|%s|\n", l, fields[l]); l++; /* extract columns */ } if(l < 4) { /* if line has less columns than symbol and coordinates */ fprintf(stderr, "Wrong line in XYZ file for atom %d in frame %d\n", k, n_frames + 1); exit(2); } sym[0] = fields[0][0]; /* get atom symbol */; sym[1] = fields[0][1]; sym[2] = '\0'; if(isalpha(sym[0]) == 0) { fprintf(stderr, "Wrong atom symbol at line %d\n", n); exit(2); } if(isalpha(sym[1]) == 0) { sym[1] = '\0'; } if(islower(sym[0]) != 0) { sym[0] = toupper(sym[0]); } if(isupper(sym[1]) != 0) { sym[1] = tolower(sym[1]); } x[n] = atof(fields[1]); y[n] = atof(fields[2]); z[n] = atof(fields[3]); if((ord = find_type(sym)) < 0) { fprintf (stderr, "Atom symbol %s is not defined in this perl script. Add this symbol to\n", sym); fprintf(stderr, "the table at the top of the program, recompile, and rerun.\n"); exit(2); } at_ord[n] = ord; if(n_frames == 0) { saved = 0; for(i = 0; i <= n_spec; i++) { if(spec_ord[i] == ord) { saved = 1; break; } } if(saved == 0) { n_spec++; spec_ord[n_spec] = ord; } } } } if(n != (n_frames+1)*(n_atoms + 2) - 1) { fprintf(stderr, "Number of atoms and number of coordinates do not much in last frame\n"); exit(2); } /* find bonds which are present in the 1st frame. */ k = 0; n_bonds = -1; for (i = 2; i < n_atoms + 1; i++) { /* scan all atom pairs */ for(j = i+1; j < n_atoms + 2; j++) { /* calculate distance between atoms; */ dx = x[i] - x[j]; dy = y[i] - y[j]; dz = z[i] - z[j]; d = sqrt(dx*dx + dy*dy + dz*dz); /* get atom pointers in the table */ ord1 = at_ord[i]; ord2 = at_ord[j]; /* calculate sum of covalent radii for the atoms */ if((atom_data[ord1].rcov > 0.001) && (atom_data[ord2].rcov > 0.001)) { dcov = atom_data[ord1].rcov + atom_data[ord2].rcov; } else { dcov = 0.0; } if(d > 1.5*dcov) { /* skip nonbonded atom pairs */ continue; } /* skip bonds already saved */ saved = 0; for(l = 0; l <= n_bonds; l++) { if(((b_at1[l] == ord1) && (b_at2[l] == ord2)) || ((b_at1[l] == ord2) && (b_at2[l] == ord1))) { saved = 1; continue; } } if(saved == 1) { /* do not save is alrderady there */ continue; } /* save unique bond */ n_bonds++; b_at1[n_bonds] = ord1; b_at2[n_bonds] = ord2; b_min[n_bonds] = BONDMINLENGTH * dcov; b_max[n_bonds] = BONDMAXLENGTH * dcov; b_rad[n_bonds] = BONDSCALE * dcov; b_col[n_bonds] = 0.5; } } /* Now print the stuff */ if((outbs = fopen(outname, "w")) == NULL) { fprintf(stderr, "Cannot create %s\n", outname); exit(2); } fprintf(outbs, "* %s\n",title[0]); /* print coordinates for the 1st frame */ for(i = 1; i <= n_atoms; i++) { fprintf(outbs, "atom %s %11.6f %11.6f %11.6f\n", atom_data[at_ord[i+1]].symb, x[i+1], y[i+1], z[i+1]); } fprintf(outbs, "\n"); /* print specs for atoms */ for (i = 0; i <= n_spec; i++) { ord = spec_ord[i]; fprintf(outbs, "spec %s %6.3f %5.2f %5.2f %5.2f\n", atom_data[ord].symb, ATOMSCALE*atom_data[ord].rvdw, atom_data[ord].r, atom_data[ord].g, atom_data[ord].b); } fprintf(outbs, "\n"); /* print bond specs */ for(i = 0; i <= n_bonds; i++) { fprintf(outbs, "bonds %s %s %6.3f %6.3f %5.2f %5.2f\n", atom_data[b_at1[i]].symb, atom_data[b_at2[i]].symb, b_min[i], b_max[i], b_rad[i], b_col[i]); } fclose(outbs); /* check if additional frames are present in XYZ file, and print them */ if(n_frames > 0) { if((outmv = fopen(outmove, "w")) == NULL) { fprintf(stderr, "Cannot create %s\n", outmove); exit(2); } for(i = 1; i <= n_frames; i++) { fprintf(outmv, "frame %s\n", title[i]); for(j = i * (n_atoms + 2) + 2; j <= (i+1) * (n_atoms + 2)-1; j++) { fprintf(outmv, "%9.4f %9.4f %9.4f\n", x[j], y[j], z[j]); } } fclose(outmv); } } /* # COMMENTS... # The Xmol is a program originally conceived in the Minnesota Supercomputer # center and available from anon.ftp ftp.msc.edu (pub/xmol). # The xbs is the program written by Michael Methfessel, and retrieved from # http://ihp02.ihp-ffo.de/~msm/. # the xbs is an X-windows program written in C language which displays # molecules. The xbs uses its own format for representing molecules. # For this reason this script is used to convert from the popular XYZ format # (which is more populer) to the format required by xbs. # # Short example of XYZ format: # 6 # This is methanol with normal modes # C 0.050065 0.665047 0.000000 0.200000 -0.020000 0.000000 # O 0.050065 -0.767876 0.000000 -0.140000 0.100000 0.000000 # H -0.912101 -1.005927 0.000000 0.340000 -1.740000 0.000000 # H 1.090132 0.995415 0.000000 -0.040000 0.800000 0.000000 # H -0.439468 1.081620 0.886605 -0.180000 -0.080000 -0.180000 # H -0.439468 1.081620 -0.886605 -0.180000 -0.080000 0.180000 # # 1st line -- number of atoms # 2nd line -- some title (or empty line) # 3rd line to the end -- atom symbol and XYZ cartesian coordinates in the # first columns. If there are more than 4 columns, the columns # additional columns represent: # if 5 columns, last column is atomic charges # if 7 columns, last column is normal modes vectors # The XYZ file may contain more then one frame for animations. In this case # the subsequent frames follow exactly the same format as above. # # # The xbs can use one or 2 files which have extensions .bs and .mv. # The .bs represents the information about the 1st frame and this file # has to be present. It has the following format # Short example of xbs format: for a first frame # * methanol # atom C 0.050065 0.665047 0.000000 # atom O 0.050065 -0.767876 0.000000 # atom H -0.912101 -1.005927 0.000000 # atom H 1.090132 0.995415 0.000000 # atom H -0.439468 1.081620 0.886605 # atom H -0.439468 1.081620 -0.886605 # # spec C 0.495 0.70 # spec O 0.405 0.40 # spec H 0.375 1.00 # # bonds C O 0.150 2.250 0.15 0.70 # bonds C H 0.109 1.635 0.11 0.70 # bonds O H 0.105 1.575 0.11 0.70 # # Only lines which start with the words "atom", "spec", and "bonds". # # It is assumed that length is given in Angstroms. # are used by the program. The other lines are skipped over and not used. # What the tagged lines mean # atom symbol xcoor ycoor zcoor # spec symbol radius color # bonds symbol1 symbol2 min-length max-length radius color. # The color can be either one number or 3 numbers. One number is the # degree of gray between 0.0 -- 1.0 (0.0 black, 1.0 white). # Gray is nice to print to the b/w laser printer. You can also use the # RGB values, i.e., 3 values between 0.0 and 1.0 which will give the # saturation with Red, Green, and Blue. E.g. using 1.0 0.0 0.0 will give # you red color, 1.0 1.0 0.0 bright yellow, 1.0 0.65 0.0 orange, # and 0.65 0.17 0.17 brown. You can also use a gualified (i.e., official) # Xwindows color name which is defined in the file /usr/lib/X11/rgb.txt, # e.g., spec 0.5 blue # */