WHIM記述子 (2)

前回のつづきです。CDKによるコードは以下のとおり。
桁落ちでNaNが出力されないように、ところどころ補正しています。


/* whim_unity.java */

import java.io.*;
import org.openscience.cdk.interfaces.*;
import org.openscience.cdk.io.iterator.IteratingMDLReader;
import org.openscience.cdk.DefaultChemObjectBuilder;
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;

import Jama.EigenvalueDecomposition;
import Jama.Matrix;
import org.openscience.cdk.Molecule;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.geometry.GeometryTools;

import javax.vecmath.Point3d;
import java.util.HashMap;
import java.util.Map;

import org.openscience.cdk.qsar.descriptors.molecular.WHIMDescriptor;

class whim_unity {

public static void main(String args[]){

if(args.length!=1){
System.err.println("whim <sd-file>");
System.exit(1);
}
FileInputStream fis = null;
IteratingMDLReader isr = null;

try{
fis = new FileInputStream(new File(args[0]) );
isr = new IteratingMDLReader(fis, DefaultChemObjectBuilder.getInstance() );
} catch (Exception e) {
e.printStackTrace();
}

String type = "";
Map<String,Double> hashatwt, hashvdw, hasheneg, hashpol;

while( isr.hasNext() ){
IMolecule imol = (IMolecule)isr.next();
String id = (String)imol.getProperty("cdk:Title");

double sum = 0.0;
Molecule ac = (Molecule) imol;

// get the coordinate matrix
double[][] cmat = new double[ac.getAtomCount()][3];
for (int i = 0; i < ac.getAtomCount(); i++) {
Point3d coords = ac.getAtom(i).getPoint3d();
cmat[i][0] = coords.x;
cmat[i][1] = coords.y;
cmat[i][2] = coords.z;
}

// set up the weight vector
Map<String,Double> hash = null;
double[] wt = new double[ac.getAtomCount()];

// Only "unity" weighting is supported here.
for (int i = 0; i < ac.getAtomCount(); i++) wt[i] = 1.0;

PCA pcaobject = null;
try {
pcaobject = new PCA(cmat, wt);
} catch (Exception e1) {
e1.printStackTrace();
}

// directional WHIM's
double[] lambda = pcaobject.getEigenvalues();
double[] gamma = new double[3];
double[] nu = new double[3];
double[] eta = new double[3];

for (int i = 0; i < 3; i++) sum += lambda[i];
for (int i = 0; i < 3; i++) nu[i] = lambda[i] / sum;

double[][] scores = pcaobject.getScores();
for (int i = 0; i < 3; i++) {
sum = 0.0;
for (int j = 0; j < ac.getAtomCount(); j++)
sum += scores[j][i] * scores[j][i] * scores[j][i] * scores[j][i];
if( sum == 0.0 || lambda[i] == 0.0 ){
eta[i] = 0.0;
} else {
sum = sum / (lambda[i] * lambda[i] * ac.getAtomCount());
eta[i] = 1.0 / sum;
}
}

// look for symmetric & asymmetric atoms for the gamma descriptor
for (int i = 0; i < 3; i++) {
double ns = 0.0;
double na = 0.0;
for (int j = 0; j < ac.getAtomCount(); j++) {
boolean foundmatch = false;
for (int k = 0; k < ac.getAtomCount(); k++) {
if (k == j) continue;
// if ( scores[j][i] == -1 * scores[k][i] ) {
if ( scores[j][i] + scores[k][i] < 0.001 ) {
ns++;
foundmatch = true;
break;
}
}
if (!foundmatch) na++;
}
double n = (double) ac.getAtomCount();
if( ns == 0.0 ){
gamma[i] = -1.0 * (na / n) * Math.log(1.0 / n) / Math.log(2.0);
} else {
gamma[i] = -1.0 * ((ns / n) * Math.log(ns / n) / Math.log(2.0)
+ (na / n) * Math.log(1.0 / n) / Math.log(2.0));
}
gamma[i] = 1.0 / (1.0 + gamma[i]);
}

// non directional WHIMS's
double t = lambda[0] + lambda[1] + lambda[2];
double a = lambda[0] * lambda[1]
+ lambda[0] * lambda[2] + lambda[1] * lambda[2];
double v = t + a + lambda[0] * lambda[1] * lambda[2];

double k = 0.0;
sum = 0.0;
for (int i = 0; i < 3; i++) sum += lambda[i];
for (int i = 0; i < 3; i++) k = (lambda[i] / sum) - (1.0 / 3.0);
k = k / (4.0 / 3.0);

double g = Math.pow(gamma[0] * gamma[1] * gamma[2], 1.0 / 3.0);
double d = eta[0] + eta[1] + eta[2];

System.out.printf("%s\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f",
id, lambda[0], lambda[1], lambda[2], nu[0], nu[1]);
System.out.printf("\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f",
gamma[0], gamma[1], gamma[2], eta[0], eta[1], eta[2]);
System.out.printf("\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\n",
t, a, v, k, g, d);
}
}

static private class PCA {

Matrix evec;
Matrix t;
double[] eval;
public PCA(double[][] cmat, double[] wt) throws CDKException {
int ncol = 3;
int nrow = wt.length;
if (cmat.length != wt.length) {
throw new CDKException("WHIMD: no.weights should be equal no.atoms\n");
}

// make a copy of the coordinate matrix
double[][] d = new double[nrow][ncol];
for (int i = 0; i < nrow; i++) {
for (int j = 0; j < ncol; j++)
d[i][j] = cmat[i][j];
}

// do mean centering
for (int i = 0; i < ncol; i++) {
double mean = 0.0;
for (int j = 0; j < nrow; j++)
mean += d[j][i];
mean = mean / (double) nrow;
for (int j = 0; j < nrow; j++)
d[j][i] = (d[j][i] - mean);
}

// get the covariance matrix
double[][] covmat = new double[ncol][ncol];
double sumwt = 0.;
for (int i = 0; i < nrow; i++) sumwt += wt[i];
for (int i = 0; i < ncol; i++) {
double meanx = 0;
for (int k = 0; k < nrow; k++) meanx += d[k][i];
meanx = meanx / (double) nrow;
for (int j = 0; j < ncol; j++) {
double meany = 0.0;
for (int k = 0; k < nrow; k++) meany += d[k][j];
meany = meany / (double) nrow;
double sum = 0.;
for (int k = 0; k < nrow; k++) {
sum += wt[k] * (d[k][i] - meanx) * (d[k][j] - meany);
}
covmat[i][j] = sum / sumwt;
}
}

// get the loadings (ie eigenvector matrix)
Matrix m = new Matrix(covmat);
EigenvalueDecomposition ed = m.eig();
eval = ed.getRealEigenvalues();
evec = ed.getV();
Matrix x = new Matrix(d);
t = x.times(evec);
}
double[] getEigenvalues() { return (eval); }
double[][] getScores() { return (t.getArray()); }
}
}

今回は特に原子に重み付けしていませんが、
WHIMDescriptorクラスを使うと、重み付けのパラメータを選択できます。