// Copyright 1992, U.C.S.F. Computer Graphics Laboratory
// $Id: PDBio.cc,v 1.20 1994/10/04 22:00:21 gregc Exp $

extern "C" {
#include <ctype.h>
#include <sys/types.h>
#include <time.h>
}
#include <fstream.h>

#include "int.VoidP.CHMap.h"
#include "VoidP.CHSet.h"
#include "Symbol.VoidP.VHMap.h"
#include <tmpl/template.h>

#ifndef _toupper
#define	_toupper	toupper
#endif

// Residue sequence numbers: (chain, number, insertion code) triples
// Make unique by (1) nothing special if unused, (2) append asterisks
// if previously used.

template <class Bond, class Atom, class Residue, class Coord, class CoordSet, class Molecule>
void PDBio<Bond,Atom,Residue,Coord,CoordSet,Molecule>::
addBond(intVoidPMap *atomSerialNums, int from, int to)
{
	if (to < 0 || to < from)
		return;
	if (!atomSerialNums->contains(to)) {
		cerr << "CONECT record to nonexistant atom: (" << from << ", "
								<< to << endl;
		return;
	}
	Atom *a = (Atom *) (*atomSerialNums)[from];
	Atom *b = (Atom *) (*atomSerialNums)[to];
	if (!a->isConnectedTo(b))
		(void) new Bond(a, b);
}

template <class Bond, class Atom, class Residue, class Coord, class CoordSet, class Molecule>
void PDBio<Bond,Atom,Residue,Coord,CoordSet,Molecule>::
addBondToNamed(Atom *a, Residue *r, const Symbol &name)
{
	Atom *b = r->lookupAtom(name);
	if (b == NULL)
		return;
	if (!a->isConnectedTo(b))
		(void) new Bond(a, b);
}

template <class Bond, class Atom, class Residue, class Coord, class CoordSet, class Molecule>
bool PDBio<Bond,Atom,Residue,Coord,CoordSet,Molecule>::
addBondIfClose(Atom *a, Atom *b)
{
	if (a->isConnectedTo(b))
		return true;
	float bond_len = AtomicSymbol::bondLength(a->atomicSymbol(),
							b->atomicSymbol());
	if (bond_len == 0.0)
		return false;
	float min_bond_len_sq = bond_len - PDBioTolerance;
	min_bond_len_sq *= min_bond_len_sq;
	float max_bond_len_sq = bond_len + PDBioTolerance;
	max_bond_len_sq *= max_bond_len_sq;
	Coord *c0 = a->getCoord();
	Coord *c1 = b->getCoord();
	float dist_sq = (c0->xyz[0] - c1->xyz[0]) * (c0->xyz[0] - c1->xyz[0])
			+ (c0->xyz[1] - c1->xyz[1]) * (c0->xyz[1] - c1->xyz[1])
			+ (c0->xyz[2] - c1->xyz[2]) * (c0->xyz[2] - c1->xyz[2]);
	if (dist_sq < min_bond_len_sq || dist_sq > max_bond_len_sq)
		return false;
	(void) new Bond(a, b);
	return true;
}

template <class Bond, class Atom, class Residue, class Coord, class CoordSet, class Molecule>
Atom * PDBio<Bond,Atom,Residue,Coord,CoordSet,Molecule>::
findClosest(Atom *a, Residue *r, float *ret_dist_sq)
{
	if (a == NULL)
		return NULL;
	CIter<Atom> ai = r->citerAtom();
	if (!ai.ok())
		return NULL;
	Atom *closest = NULL;
	float dist_sq = 0;
	Coord *c = a->getCoord();
	for (ai.next(); ai.ok(); ai.next()) {
		if (ai == a)
			continue;
		Coord *c1 = ai->getCoord();
		float new_dist_sq
			= (c->xyz[0] - c1->xyz[0]) * (c->xyz[0] - c1->xyz[0])
			+ (c->xyz[1] - c1->xyz[1]) * (c->xyz[1] - c1->xyz[1])
			+ (c->xyz[2] - c1->xyz[2]) * (c->xyz[2] - c1->xyz[2]);
		if (closest != NULL && new_dist_sq >= dist_sq)
			continue;
		dist_sq = new_dist_sq;
		closest = ai;
	}
	if (ret_dist_sq)
		*ret_dist_sq = dist_sq;
	return closest;
}

template <class Bond, class Atom, class Residue, class Coord, class CoordSet, class Molecule>
void PDBio<Bond,Atom,Residue,Coord,CoordSet,Molecule>::
addBondNearestPair(Residue *from, Residue *to)
{
	Atom	*fsave = NULL, *tsave;
	float	dist_sq;

	for (CIter<Atom> ai = from->citerAtom(); ai.ok(); ai.next()) {
		float	new_dist_sq;

		Atom *b = findClosest(ai, to, &new_dist_sq);
		if (fsave == NULL
		|| new_dist_sq < dist_sq) {
			fsave = ai;
			tsave = b;
			dist_sq = new_dist_sq;
		}
	}
	if (!fsave->isConnectedTo(tsave))
		(void) new Bond(fsave, tsave);
}

template <class Bond, class Atom, class Residue, class Coord, class CoordSet, class Molecule>
bool PDBio<Bond,Atom,Residue,Coord,CoordSet,Molecule>::
readOneMolecule(istream *input, Molecule *m, const char *filename,
					int *line_num, void *start, void *end)
{
	VoidPCHSet	*startResidues = (VoidPCHSet *) start;
	VoidPCHSet	*endResidues = (VoidPCHSet *) end;
	int		start_connect = 1;
	int		het_bonus = 0;	// # of *'s to append to chain id
	String		cid, aname, rname;
	Symbol		sulfur_gamma("SG");	// should be global
	Symbol		model("model");		// ditto
	int		in_model = 0;
	Residue		*curResidue = NULL;
	MolCoordSetId	csid;
	bool		more = false;
	intVoidPCHMap	asn((void *) 0, 256);	// Atom Serial Numbers -> Atom*
	PDB		record;
	
	while (*input >> record) {
		int	i;

		*line_num += 1;
		if (rMask.test(record.type()) && rFunc != 0
		&& rFunc(&record, m, &asn))
			continue;

		switch (record.type()) {

		default:	// ignore other record types
			break;

		case PDB::UNKNOWN:
			if (!isascii(record.unknown.junk[0])) {
				ioErr << "Non-ASCII character on line "
					<< ios::dec << *line_num << " of "
					<< filename;
				return false;
			}
			cerr << "Ignored bad PDB record found on line "
							<< *line_num << endl;
			break;

	// TODO: save as comments
		case PDB::HEADER:
			if (!what.test(COMMENTS))
				break;
			break;
		case PDB::SOURCE:
			if (!what.test(COMMENTS))
				break;
			break;
		case PDB::COMPND:
			if (!what.test(COMMENTS))
				break;
			break;
		case PDB::AUTHOR:
			if (!what.test(COMMENTS))
				break;
			break;
		case PDB::JRNL:
			if (!what.test(COMMENTS))
				break;
			break;

		case PDB::MODEL:
			in_model += 1;
			het_bonus = 0;		// restart residue naming
			curResidue = NULL;
			// set coordinate set name to model#
			csid = MolCoordSetId(model, record.model.num);
			m->activeCoordSet(new CoordSet(m, csid));
			break;

		case PDB::ENDMDL:
			if (what.test(MULTIPLE_MODELS)) {
				more = true;
				goto finished;
			}
			break;

		case PDB::END:
			goto finished;

		case PDB::TER:
			start_connect = 1;
			break;

		case PDB::ATOM:
		case PDB::HETATM:
			if (!what.test(ATOMS))
				break;

			if (record.atom.residue.chainId != ' ')
				cid = upcase(record.atom.residue.chainId);
			else
				cid = "";
			for (i = 0; i < het_bonus; i += 1)
				cid += '*';
			if (islower(record.atom.residue.insertCode))
				record.atom.residue.insertCode
					= _toupper(record.atom.residue.insertCode);
			MolResId rid(cid, record.atom.residue.seqNum,
				record.atom.residue.insertCode);
			rname = record.atom.residue.name;
			PDBioCanonicalizeResidueName(&rname);
			if (in_model > 1) {
				if (curResidue == NULL)
					curResidue = m->lookupResidue(rid);
				else if (curResidue->id() != rid
				|| curResidue->type() != rname) {
					if (curResidue->id().chainId() ==
								rid.chainId() 
					&& curResidue->id().position()
							> rid.position()) {
						het_bonus += 1;
						cid += '*';
						rid = MolResId(cid,
						record.atom.residue.seqNum,
						record.atom.residue.insertCode);
					}
					curResidue = m->lookupResidue(rid);
				}
				if (curResidue == NULL) {
					ioErr << "Residue " << rid
						<< " not in first model"
						<< " on line " << *line_num
						<< " of " << filename;
					goto finished;
				}
			} else if (curResidue == NULL
			|| curResidue->id() != rid
			|| curResidue->type() != rname) {
				if (curResidue != NULL
				&& curResidue->id().chainId() == rid.chainId() 
				&& curResidue->id().position()
							> rid.position()) {
					het_bonus += 1;
					cid += '*';
					rid = MolResId(cid,
						record.atom.residue.seqNum,
						record.atom.residue.insertCode);
				}
				if (record.type() == PDB::HETATM)
					start_connect = 1;
				else if (curResidue != NULL
				&& curResidue->id().chainId() != rid.chainId())
					start_connect = 1;
				if (start_connect && curResidue != NULL)
					(void) endResidues->add((void *) curResidue);
				curResidue = new Residue(m, rname, rid);
				if (start_connect)
					(void) startResidues->add((void *) curResidue);
				start_connect = 0;
			}
			aname = record.atom.name;
			if (record.atom.altLoc)
				aname += record.atom.altLoc;
			PDBioCanonicalizeAtomName(&aname);
			Atom *a = curResidue->lookupAtom(aname);
			if (in_model > 1) {
				if (a == NULL) {
					ioErr << "Atom " << aname
						<< " not in first model"
						<< " on line " << *line_num
						<< " of " << filename;
					goto finished;
				}
			} else if (a != NULL) {
				cerr << curResidue->type() << ' '
					<< curResidue->id()
					<< " already has atom named " << aname
					<< " on line " << *line_num << " of "
					<< filename << endl;
				break;
			} else
				a = new Atom(m, curResidue, aname,
						AtomicSymbol(record.atom.name));
			Coord *p = a->getCoord();
			p->xyz[0] = record.atom.xyz[0];
			p->xyz[1] = record.atom.xyz[1];
			p->xyz[2] = record.atom.xyz[2];
			if (asn.contains(record.atom.serialNum))
				cerr
			<< "warning: duplicate atom serial number found: "
					<< record.atom.serialNum << endl;
			asn[record.atom.serialNum] = (void *) a;
			break;

		case PDB::CONECT:
			if (!what.test(ATOMS))
				break;

			if (!asn.contains(record.conect.serialNum)){
				cerr
				<< "CONECT record for nonexistant atom: "
					<< record.conect.serialNum << endl;
				break;
			}
			if (what.test(COVALENT_BONDS)) {
				for (int i = 0; i < 4; i += 1)
					addBond(&asn, record.conect.serialNum,
						record.conect.covalent[i]);
			}
			if (what.test(HYDROGEN_BONDS)) {
				addBond(&asn, record.conect.serialNum,
					record.conect.bonds[0].hydrogen[0]);
				addBond(&asn, record.conect.serialNum,
					record.conect.bonds[0].hydrogen[1]);
				addBond(&asn, record.conect.serialNum,
					record.conect.bonds[1].hydrogen[0]);
				addBond(&asn, record.conect.serialNum,
					record.conect.bonds[1].hydrogen[1]);
			}
			if (what.test(SALT_BRIDGES)) {
				addBond(&asn, record.conect.serialNum,
					record.conect.bonds[0].salt);
				addBond(&asn, record.conect.serialNum,
					record.conect.bonds[1].salt);
			}
			break;

		case PDB::SSBOND:
			// process SSBOND records as CONECT because midas
			// used to use them that way
			if (!what.test(COVALENT_BONDS))
				break;
			MolResId ss(record.ssbond.residues[0].chainId,
				record.ssbond.residues[0].seqNum,
				record.ssbond.residues[0].insertCode);
			Residue *ssres;
			if (!m->lookupResidue(ss))
				break;
			ssres = m->lookupResidue(ss);
			if (ssres == NULL)
				break;
			if (ssres->type() != record.ssbond.residues[0].name) {
				cerr <<  "can't touch this 0" << endl;
				break;
			}
			Atom *ap0;
			if (!ssres->lookupAtom(sulfur_gamma))
				break;
			ap0 = ssres->lookupAtom(sulfur_gamma);
			if (ap0 == NULL) {
				cerr << "Atom SG not found in " << ssres->type()
						<< ' ' << ssres->id() << endl;
				break;
			}

			ss = MolResId(record.ssbond.residues[1].chainId,
				record.ssbond.residues[1].seqNum,
				record.ssbond.residues[1].insertCode);
			if (!m->lookupResidue(ss))
				break;
			ssres = m->lookupResidue(ss);
			if (ssres == NULL)
				break;
			if (ssres->type() != record.ssbond.residues[1].name) {
				cerr <<  "can't touch this 1" << endl;
				break;
			}
			Atom *ap1;
			if (!ssres->lookupAtom(sulfur_gamma))
				break;
			ap1 = ssres->lookupAtom(sulfur_gamma);
			if (ap1 == NULL) {
				cerr << "Atom SG not found in " << ssres->type()
						<< ' ' << ssres->id() << endl;
				break;
			}
			if (!ap0->isConnectedTo(ap1))
				(void) new Bond(ap0, ap1);
			break;
		}
	}

finished:
	// make the last residue an end residue
	if (curResidue != NULL) {
		(void) endResidues->add((void *) curResidue);
		curResidue = NULL;
	}
	return more;
}

template <class Bond, class Atom, class Residue, class Coord, class CoordSet, class Molecule>
void PDBio<Bond,Atom,Residue,Coord,CoordSet,Molecule>::
connectMolecule(Molecule *m, void *start, void *end)
{
	VoidPCHSet	*startResidues = (VoidPCHSet *) start;
	VoidPCHSet	*endResidues = (VoidPCHSet *) end;
	// walk the residues, connecting residues as appropriate and
	// connect the atoms within the residue
	Residue *prev_res = NULL;
	Atom	*prev_link_atom = NULL;
	for (CIter<Residue> ri = m->citerResidue(); ri.ok(); ri.next()) {
		int start = startResidues->contains((void *) (Residue *) ri);
		int end = endResidues->contains((void *) (Residue *) ri);
		const TmplResidue *tr = tmplFindResidue(ri->type(), start, end);
		if (tr != NULL) {
			// foreach atom in residue
			//	connect up like atom in template
			for (CIter<TmplAtom> ai = tr->citerAtom(); ai.ok();
								ai.next()) {
				Atom *a = ri->lookupAtom(ai->name());
				if (a == NULL)
					continue;
				a->type(ai->type());
#ifdef __GNUG__
	{ // workaround g++ bug
#endif
				for (CIter<TmplBond> bi = ai->citerBond();
							bi.ok(); bi.next()) {
					Atom *b = ri->lookupAtom(bi->otherAtom(ai)->name());
					if (b == NULL)
						continue;
					if (!a->isConnectedTo(b))
						(void) new Bond(a, b);
				}
#ifdef __GNUG__
	}
#endif
			}
		} else {
			// connect up atoms in residue by distance
			for (CIter<Atom> ai = ri->citerAtom(); ai.ok();
								ai.next()) {
				CIter<Atom> aai = ai;
				bool bonded = false;
				for (aai.next(); aai.ok(); aai.next())
					if (addBondIfClose(ai, aai))
						bonded = true;
				if (!bonded) {
					CIter<Bond> bi = ai->citerBond();
					if (!bi.ok()) {
						// atom doesn't have a bond yet
						Atom *b = findClosest(ai, ri,
									NULL);
						if (b != NULL)
							(void) new Bond(ai, b);
					}
				}
			}
		}

		// connect up previous residue
		if (prev_link_atom != NULL)
			if (tr != NULL && tr->chief() != NULL)
				addBondToNamed(prev_link_atom, ri,
						tr->chief()->name());
			else {
				Atom *b = findClosest(prev_link_atom, ri, NULL);
				if (b != NULL
				&& !prev_link_atom->isConnectedTo(b))
					(void) new Bond(prev_link_atom, b);
					
			}
		else if (prev_res != NULL)
			if (tr != NULL && tr->chief() != NULL) {
				Atom *a = ri->lookupAtom(tr->chief()->name());
				Atom *b = findClosest(a, prev_res, NULL);
				if (a != NULL && b != NULL
				&& !a->isConnectedTo(b))
					(void) new Bond(a, b);
			} else
				addBondNearestPair(prev_res, ri);

		if (end) {
			prev_res = NULL;
			prev_link_atom = NULL;
		} else {
			prev_res = ri;
			if (tr == NULL
			|| tr->link() == NULL)
				prev_link_atom = NULL;
			else {
				prev_link_atom = ri->lookupAtom(tr->link()->name());
			}
		}
	}
}

template <class Bond, class Atom, class Residue, class Coord, class CoordSet, class Molecule>
int PDBio<Bond,Atom,Residue,Coord,CoordSet,Molecule>::
readPDBstream(istream *input, const char *filename, int lineNum)
{
	bool	more;

	more = false;
	do {
		VoidPCHSet	startResidues, endResidues;

		Molecule *m = new Molecule;
		more = readOneMolecule(input, m, filename, &lineNum,
				(void *) &startResidues, (void *) &endResidues);
		if (!ok()) {
			delete m;
			return lineNum;
		}
		ml.append(m);
		if (what.test(COVALENT_BONDS))
			connectMolecule(m, (void *) &startResidues,
							(void *) &endResidues);
	} while (more && input);
	return lineNum;
}

template <class Bond, class Atom, class Residue, class Coord, class CoordSet, class Molecule>
void PDBio<Bond,Atom,Residue,Coord,CoordSet,Molecule>::
readPDBfile(const char *filename)
{
	istream	*is;

	ioErr.seekp(0, ios::beg);
	if (filename == NULL) {
		is = &cin;
		filename = "<stdin>";
	} else {
		is = new ifstream(filename);
		if (!*is) {
			ioErr << "Unable to open " << filename
							<< " for reading.";
			return;
		}
	}
	readPDBstream(is, filename, 0);
	if (is != &cin)
		delete is;
}

#include "VoidP.int.VHMap.h"

template <class Bond, class Atom, class Residue, class Coord, class CoordSet, class Molecule>
void PDBio<Bond,Atom,Residue,Coord,CoordSet,Molecule>::
writePDBfile(const char *filename)
{
	ostream	*os;

	ioErr.seekp(0, ios::beg);
	if (filename == NULL) {
		os = &cout;
		filename = "<stdout>";
	} else {
		os = new ofstream(filename);
		if (!*os) {
			ioErr << "Unable to open " << filename
							<< " for writing.";
			return;
		}
	}
	writePDBstream(os, filename);
	if (os != &cout) {
		if (os->bad())
			ioErr << "Problem writing " << filename << ".";
		delete os;
	}
}

template <class Bond, class Atom, class Residue, class Coord, class CoordSet, class Molecule>
void PDBio<Bond,Atom,Residue,Coord,CoordSet,Molecule>::
writePDBstream(ostream *os, const char *filename)
{
	int model = 0;
	int serial_num = 0;
	int multi_model = ml.length() > 1;
	VoidPintMap *rev_asn = NULL;
	for (Pix i = ml.first(); i != 0; ml.next(i)) {
		PDB		p;

		model += 1;
		if (multi_model) {
			p.type(PDB::MODEL);
			p.model.num = model;
			if (!wMask.test(PDB::MODEL) || wFunc == 0
			|| !wFunc(os, &p, ml(i), NULL, rev_asn))
				*os << p << endl;
		}

		// TODO: multiple coordinate sets
		if (what.test(ATOMS)) {
			p.type(PDB::ATOM);
			for (CIter<Atom> ai = ml(i)->citerAtom(); ai.ok();
								ai.next()) {
				if (serial_num == 0)
					rev_asn = new VoidPintVHMap(0,
							ai.numRemaining());
				p.atom.serialNum = ++serial_num;
				(*rev_asn)[(void *) (Atom *) ai]
							= p.atom.serialNum;
				strcpy(p.atom.name, ai->name().chars());
				p.atom.altLoc = 0;
				strcpy(p.atom.residue.name,
						ai->residue()->type().chars());
				p.atom.residue.chainId
					= ai->residue()->id().chainId().chars()[0];
				p.atom.residue.seqNum
					= ai->residue()->id().position();
				p.atom.residue.insertCode
					= ai->residue()->id().insertionCode();
				Coord	*c = ai->getCoord();
				p.atom.xyz[0] = c->xyz[0];
				p.atom.xyz[1] = c->xyz[1];
				p.atom.xyz[2] = c->xyz[2];
				p.atom.occupancy = 1.0;
				p.atom.tempFactor = 0.0;
				p.atom.ftnoteNum = 0;

				if (!wMask.test(PDB::ATOM) || wFunc == 0
				|| !wFunc(os, &p, ml(i), ai, rev_asn))
					*os << p << endl;
			}
		}

		if (what.test(ATOMS) && what.test(COVALENT_BONDS)) {
			int serial_num = 0;
			for (CIter<Atom> ai = ml(i)->citerAtom(); ai.ok();
								ai.next()) {
				int count = 0;
				p.type(PDB::CONECT);
				p.conect.serialNum = ++serial_num;
				for (CIter<Bond> bi = ai->citerBond(); bi.ok();
								bi.next()) {
					Atom *a = bi->otherAtom(ai);
					if (a->atomicSymbol()
							== AtomicSymbol::H)
						continue;	// TODO: H bonds
					p.conect.covalent[count++]
						= (*rev_asn)[(void *) a];
					if (count == 4)
						break;
				}
				if (count == 0)
					continue;

				if (!wMask.test(PDB::CONECT) || wFunc == 0
				|| !wFunc(os, &p, ml(i), ai, rev_asn))
					*os << p << endl;
			}
		}

		if (multi_model) {
			p.type(PDB::ENDMDL);
			if (!wMask.test(PDB::ENDMDL) || wFunc == 0
			|| !wFunc(os, &p, ml(i), NULL, rev_asn))
				*os << p << endl;
		}
	}
	if (rev_asn != NULL) {
		delete rev_asn;
		rev_asn = NULL;
	}
}
