Molecular Mechanics-Based Measures of Steric Effects: Customized Code to
Compute Ligand Repulsive Energies
Robert J. Bubel
a, Warthen Douglass, and David P. White*
bDepartment of Chemistry, University of North Carolina at Wilmington, 601 South
College Road, Wilmington, NC 28403-3297
a
Undergraduate student from the State University of New York at New Paltz
Supplementary Material:
ERCODE.CPP
#include<ctype.h> #include<stdio.h> #include<stdlib.h> #include<string.h> #include<math.h> #define TRUE 1 #define FALSE 0
/**********************GLOBAL VARIABLES*******************************/ struct nodetag
{ // each atom or LP in the molecule has one of int number; // these structures dedicated to them
float x, y, z; // coordinates char type[10]; // forcefield type
float r0, d0; // constants from parameter file
int side; // parameter indicating side ligand side = 0 // fragment side = 1 struct nodetag *next;
};
typedef nodetag *nodeptr;
nodeptr bgf_list; // global pointer to beginning of data
char rlist[230][100]; // 2d char array of parameter file
int cons[100][25]; // replica of connectivity table in interger form
int m[100][100]; // this is the binary array, may want to make it // bigger in case of molecules with more than
// 99 atoms
float unitx, unity, unitz; // unit vector of re bond
float re; // length of re bond
/***********************END GLOBAL VARIABLES*******************************/
void init_ref(void)
// opens the parameter file and stores the constants in the rlist {
char buf[45]="Forcefields (R0 / D0)\n"; FILE *stream;
stream=fopen("param_r.set","rt"); //open parameter file if (!stream)
{printf("File not opened! \n");
exit(1); //program will not work }
do
{ fgets(rlist[i], 100, stream); // useless info(top of file) i++;
}
while(strcmp(buf, rlist[i-1])); // until we get to first line do
{ fgets(rlist[j], 100, stream); // save these data to rlist //
j++;
}while(rlist[j-1][0] != 'E'); // ends at END }/*end init_ref*/
/*****************************************************************/ void menu(void)
{
printf( "==========================================================\n" "Available commands are:\n"
" <1> calculate for *.bgf(start or continue)\n" " <0> quit \n"
"=========================================================\n"); }/*end menu*/
/**********************************************************************/ void getcomm(int &command)
{
printf("Enter command: ");
scanf("%d",&command); // get the command }/*end getcomm
/******************************************************************/ void connections(char conect[], int cons[])
//receives one line of chars from connectivity table translates it to ints {
int i, j , k, l; l=0;
k=9;
do{ j=0;
do{ cx[j]=conect[k]; k++; j++;
}while (j<5); // cover five places for each number cons[l] = atoi(cx); // convert it to an interger
l++; k++;
}while(k<i+1); // is string is at the end?
cons[l]=-1; // indicates the end of connnections }/*end connections*/
/********************************************************************/ void find_constant(nodeptr p, char type[])
//uses rlist to find r0 and d0 based on forcefield type {
char rx[7], dx[11]; // char r0 and d0 to become floats int ptr=1; // flag for when the proper forcefield
// was found in the parameter file int i;
int j=0, d=0;
while (rlist[d][0] != 'E' && ptr !=0) // until we reach a match or the end { ptr = strncmp(rlist[d],type,5);
d++; }
if (ptr != 0) // if none is found { printf("Constant not available for ");
for(i=0;i<6;i++) printf("%c", type[i]); printf("\n");
printf("please enter proper values \nr0: ");
scanf("%f", &p->r0); // prompt user for own values printf("d0: ");
scanf("%f", &p->d0);
}else // type found {
for (i=27; i<35; i++)
{ rx[j]=rlist[d-1][i]; // save constants as chars j++;
} j=0;
for (i=35; i<47; i++) { dx[j]=rlist[d-1][i];
}
p->r0 = atof(rx); // convert them to floats p->d0 = atof(dx);
}
}/*end find_constant*/
/************************************************************************/ void fill_side_array(int side_array[], int current_line[], int i)
//recursive traverses through atoms on frag side indicates each atom with a 1 {
if(side_array[current_line[0] - 1] != -1)// we only replace -1's with 1's ; //stops from placing a 1 over a 0 else
{ side_array[current_line[0] - 1] = 1; // put a 1 at current position i=1;
while(current_line[i] != -1) //go through all the numbers that //are attached to current line
{ fill_side_array(side_array, cons[current_line[i] - 1], i); i++;
} }
}/*end fill_side_array*/
/***********************************************************************/ float dist_form(float x1,float y1,float z1,float x2,float y2, float z2) // given the coordinates of two atoms, find the distance between
{
double half; float dist;
half = (x1-x2)*(x1-x2)+(y1-y2)*(y1-y2)+(z1-z2)*(z1-z2); dist = sqrt(half); // find distance return dist;
}/*end dist_form*/
/**********************************************************************/ void get_unit(int lig, int frag)
// given two atom #'s lookup the info and find unit vector between {
float distance;
nodeptr t1 = bgf_list, t2 = bgf_list;
while(t1->number != lig) // lookup both atoms based on atom # t1 = t1->next;
t2 = t2->next;
distance = dist_form(t1->x, t1->y, t1->z, t2->x, t2->y, t2->z); unitx = ((t1->x - t2->x)/distance);
unity = ((t1->y - t2->y)/distance); unitz = ((t1->z - t2->z)/distance);
re = distance; // equilibrium re bond length }/*end get_unit*/
/**********************************************************************/ int connected(int ligptr, int frag)
// returns a 1 if the two atom #'s are connected {
int j=0; int i=0; do
{ if (cons[ligptr][i] == frag) // check for connectivity in cons[] j = 1;
i++;
}while(cons[ligptr][i] != -1); return j;
}/*end connected()*/
/***********************************************************************/ int find_side(int numatoms)
// uses the connections table>which atoms make up ligand side? fragment side? // returns a 1 on success, returns 0 if numbers given by user are bad
{
int side_array[100]; int lig, frag, i; nodeptr t;
int number;
number = bgf_list->number; // number of atoms on list printf("Which atom number is on"
" the ligand side of the re bond (donor atom): ");
scanf("%d", &lig); // prompt user for atom #'s
printf("Which atom number is on the fragment side of the re bond: "); scanf("%d", &frag);
if (lig>numatoms || frag>numatoms)
{ printf("You have entered one of the numbers incorrectly! \n"); return 0; // too high a number
} // prevents program from crashing else
return 0;
} else
{ for(i=0; i<99; i++)
side_array[i] = -1; // initialize side_array to all -1's i=0;
while(cons[lig-1][i] != -1) // atoms connected directly to it { if(cons[lig-1][i] == frag) // except the frag atom
; else
side_array[cons[lig-1][i]-1] = 0; // identifies this side w/ a 0 i++;
} i=0;
fill_side_array(side_array, cons[frag-1], i); // go through all atoms i=0; // on fragment side while(i<99)
{ if(side_array[i] == -1)
side_array[i] = 0; // anything left must be on ligand side i++;
}
i=number-1; t = bgf_list; while(t != NULL)
{ t->side = side_array[i]; // puts proper side value into i--; // corresponding node
t = t->next; }
get_unit(lig, frag); // unit vector between the atoms for } // when we change re bond length return 1; // 1 indicates a success
}/*end find_side*/
/********************************************************************/ void format_m(int amount)
// stops program from doing calculations twice if[x,y]==1 -> [y,x] gets 0 { int i=0;
int j=0;
while(i < amount) {
{ if (m[i][j] == 1)
{ if (m[j][i] == 0) printf("fatal error"); //should never happen if (m[j][i] == 1)
m[j][i] = 0; // switches opposite // member on matrix to 0 }
j++; } i++; j=0; }
}/*end format_m*/
/*********************************************************************/ void find_connections(FILE *stream)
// function used to create m[][]-- binary array // binary array is a square matrix [#atoms x #atoms] { int k; int amount=0;
int w, i;
char conect[100];
fgets(conect,100,stream); do{
if (conect[0] == 'O') // does not look at the ORDER lines /*do nothing*/;
else
{ connections(conect, cons[amount]); // translate line to ints
amount++; }
fgets(conect,100,stream);
}while(conect[0] != 'E'); // ends when the END string is read /*======================================*/
for(w=0; w< amount; w++) //a binary array m[#][] for each atom { k=0;
do{ // 0 -> bonded relationship(1-2),(1-3) m[w][k]=1; // 1 -> non-bonded
k++;
}while(k < amount); //initializes binary array to all 1's m[w][k]=-1; //defines last member of array
{ do
{ m[w][cons[k][i]-1]=0; //which atoms have a 1-1 1-2 or 1-3 i++;
}while(cons[k][i] != -1); // 1-1 or 1-2? d++;
k=(cons[w][d])-1; i=0;
}while(k != -2); // 1-3 relationship? }
format_m(amount); // fix matrix prevent calculation 2x }/*end find_connections*/
/************************************************************************/ int get_data(nodeptr &p, char input[])
// get individual values from 1 line of input { int i;
int j=0;
char number[5], x[11], y[11], z[11], type[10];
if(!strncmp("HETATM",input,6)) //test if information is valid { for(i=8; i<13; i++){number[j] = input[i]; j++;} j=0; // all of these for(i=31; i<41; i++){x[j] = input[i]; j++;} j=0; // just put the for(i=41; i<51; i++){y[j] = input[i]; j++;} j=0; // important data for(i=51; i<61; i++){z[j] = input[i]; j++;} j=0; // in small for(i=60; i<67; i++){type[j] = input[i]; j++;} // character strings p = (nodeptr)malloc(sizeof(nodetag));
if(p==NULL) // malloc didn't give us the mem { printf("ran out of memory \n");
printf("please exit program, all previous data is still valid"); return 0; //allows user to exit program smoothly }
p->number = atoi(number); p->x = atof(x);
p->y = atof(y); p->z = atof(z);
strcpy(p->type, type);
find_constant(p, p->type); // determine R0 and D0 return 1;
}
else return 0; // if the information is invalid }/*end get_data*/
// appropriate distance, calculate and return Evdw,R
{ float coord1[3]; // temp coordinates so we don't change float coord2[3]; // original value on list
float dis, vander_out; if(t1->side == 1)
{ coord1[0] = (factor*unitx) + t1->x; //if the atom is on the fragment coord1[1] = (factor*unity) + t1->y; //side->move it by move it along coord1[2] = (factor*unitz) + t1->z; //unit vector a distace of factor }
else
{ coord1[0] = t1->x; // then it is on the ligand side so coord1[1] = t1->y; // we use the same coordinates
coord1[2] = t1->z; }
if(t2->side == 1)
{ coord2[0] = (factor*unitx) + t2->x; coord2[1] = (factor*unity) + t2->y; coord2[2] = (factor*unitz) + t2->z; }
else
{ coord2[0] = t2->x; coord2[1] = t2->y; coord2[2] = t2->z; }
dis=dist_form(coord1[0], coord1[1], coord1[2],
coord2[0], coord2[1], coord2[2]); // get distance
vander_out = (d0 * exp(12.5*(1 - (dis/r0)))); // EvdW,R formula return vander_out;
}/*end vander*/
/*************************************************************************/ void vdw_formula(nodeptr p, int atom1, int atom2, float vdw[])
// receive two atom #'s, lookup info on them calculate 7 different Evdw,R // for the pair, add it to the total
{ float d0, r0; nodeptr t1=p, t2=p;
while(t1->number != atom1) // look up info on each atom t1 = t1->next;
while(t2->number != atom2) t2 = t2->next;
vdw[0] = vdw[0] + vander(t1, t2, 0, r0, d0); // EvdW,R with no change // in re bond length
vdw[1] = vdw[1] + vander(t1, t2, .01, r0, d0); // EvdW,R with bond lenth // increased by .01 angstroms vdw[2] = vdw[2] + vander(t1, t2, -.01, r0, d0); // EvdW,R with bond lenth
// decreased by .01 angstroms vdw[3] = vdw[3] + vander(t1, t2, .02, r0, d0); // increased .02 vdw[4] = vdw[4] + vander(t1, t2, -.02, r0, d0); // decreased .02 vdw[5] = vdw[5] + vander(t1, t2, .03, r0, d0);
vdw[6] = vdw[6] + vander(t1, t2, -.03, r0, d0);
}/*end vdw_formula*/
/******************************************************************/ void vanderwahls(nodeptr p, float vdw[])
// examines binary array searching for 1's, sends two atom #'s // for which seven Evdw,R values are going to be calculated { int number, i, j;
number = p->number; // last atom number i=0; j=0;
while(i<number) {
while(j<number) // every member of binary array { if(m[i][j] == 1)
{ vdw_formula(p, i+1, j+1, vdw);// if a 1, perform calc on it j++;
} else
j++; // next element on current row }
j=0;
i++; // next row }
}/*end vanderwahls*/
/************************************************************/ void bond_shrinkage(void)
// this function searches for H__C bonds and shrinks bond length by 0.915 { nodeptr t1, t2;
float distance, ux, uy, uz, n_distance; int atom1, atom2;
while(t1 != NULL)
{ if(!strncmp(" H__C ",t1->type,7)) // look for H__C forcefield types { atom1 = t1->number; // if it finds one identify the H
atom2 = cons[atom1-1][1]; // and the C while(t2->number != atom2)
t2 = t2->next; //lookup info about C atom distance = dist_form(t1->x, t1->y, t1->z, t2->x, t2->y, t2->z); ux = ((t1->x - t2->x)/distance);
uy = ((t1->y - t2->y)/distance); // find unit vector from C to H uz = ((t1->z - t2->z)/distance);
n_distance = distance * .915; // decrease distance by 0.915 t1->x = n_distance*ux + t2->x;
t1->y = n_distance*uy + t2->y; // add new vector to C coordinates t1->z = n_distance*uz + t2->z;
}
t1 = t1->next; // go on looking for more t2 = bgf_list;
}
}/*end bond_shrinkage*/
/*************************************************************************/ void find_er(float vdw[], char filename[], FILE *out)
//calculates Ligand Repulsive Energy { float sxx, sxy, slope, er, avg;
avg = (vdw[0] + vdw[1] + vdw[2] + vdw[3] + vdw[4] + vdw[5] + vdw[6])/7; sxx = .0028;
sxy = (.01*(vdw[1]-avg))+(-.01*(vdw[2]-avg))+(.02*(vdw[3]-avg))+ (-.02*(vdw[4]-avg))+(.03*(vdw[5]-avg))+(-.03*(vdw[6]-avg)); slope = sxy/sxx; // average slope formula er = re*slope;
fprintf(out,"%s ", filename); // sends filename to outfile fprintf(out,"%f \n", er); // sends Er to outfile }/*end find_er*/
/*********************************************************************/ void free_mem(void)
// traverses through list deleting all nodes to get mem back { nodeptr t1,t2;
t1=bgf_list; t2=bgf_list->next; while(t2->next != NULL) { free (t1);
t2 = t2->next; }
}/*end free_mem*/
/********************************************************************/ void adjust_filename(char filename[])
// receives user input file name, checks and adds *.bgf extension { int len;
len = strlen(filename);
if (toupper(filename[len-1]) == 'F' && //if user entered file extension toupper(filename[len-2]) == 'G' &&
toupper(filename[len-3]) == 'B' && filename[len-4] == '.')
; // do nothing else
{
char end[4] = ".bgf";
strncat(filename, end, 4); // attach .bgf to end if filename }
}/*end adjust_filename*/
/********************************************************************/ void bfg(FILE *out, int on_off)
// essentially bgf file driver { int done=0;
while(!done) // this while loop allows user to calc // several files without having to go
// back to menu {
char filename[50];
char buf[30]="FORMAT CONECT (a6,12i6)\n";// when to stop reading char input[100]; // holds data temporarily int numatoms;
int i;
float vdw[7]; // to find Er we need seven different // values for Evdw
for(i=0; i<7; i++) vdw[i] = 0; // Evdw,R is a summation of all // individual Evdw's
// so we start each value at zero nodeptr p;
bgf_list=NULL;
printf("Enter the filename: "); scanf("%s",filename);
// file so user doesn't need enter it FILE *stream;
stream=fopen(filename,"rt"); if (!stream)
{printf("File not opened! \n");
done = 1; // go back to the menu
} else
{ do{
fgets(input, 101, stream);
if(get_data(p, input)) // check if information is useable // if it is, puts information in p {p->next = bgf_list;
bgf_list = p; // bgf_list is the real list we use }
}while(strcmp(buf, input)); numatoms = bgf_list->number;
find_connections(stream); //function uses connectivity table to // create binary array
if(find_side(numatoms)) // this function is used to tell which // side of the re each atom is on
{
if (on_off == 1)
bond_shrinkage();//allows for C__H bond shrinkage
vanderwahls(bgf_list, vdw); // simultaneously solves // for seven values of repulsive // vanderwahls energy by
// adjusting xyz coordinates of // individual atoms
find_er(vdw, filename, out);// calculates Er and sends it to ouput // file along with file name
}
free_mem(); //clears up memory so we can do it again fclose(stream); //close the current input file stream }
}
void driver()
// gets the ouput filename, options...
{ int done; //finish flag int command;
int on_off = -1; char shrink[1];
char outfile[20]; // file for data output printf("Please enter a filename for the data: ");
scanf("%s",outfile); FILE *out;
if ((out = fopen(outfile, "wt")) == NULL)
{
fprintf(stderr, "Cannot open output file.\n"); exit(1);
}
done = FALSE; do {
printf("Do you want SHRINK_CH_BONDS on? (y/n) \n"); scanf("%s",&shrink);
if(toupper(shrink[0]) == 'Y') on_off = 1;
else
if(toupper(shrink[0]) == 'N') on_off =0;
}while (on_off == -1); menu();
while (!done)
{ getcomm (command); if (command == 1)
{ bfg(out, on_off); } else
if (command == 0) done = TRUE;
menu(); }
fclose(out); }/*end driver*/
/*********************************************************************/
{ printf("ERCODE. \n"
"When using this work, please reference \n" "Robert J. Bubel \n"
"T. Warthen Douglass and \n" "David P. White \n"
"Journal of Computational Chemistry \n" "submitted for publication, 1999 \n");
init_ref();//this fills up char rlist[155][100] with information in //parameter file
driver(); //this is the main part of the program
printf("Bye ! \n");