@inproceedings{2a9f2d7342744f8b8bda8cd531012005,

title = "Proper statistical modeling and validation in QSAR: A case study in the prediction of rat fat-air partitioning",

abstract = "A number of multivariate regression methods commonly used to develop predictive models, along with model validation techniques, are contrary to the current opinion of experts in the field of statistics. Such methods result in overly optimistic models that cannot be relied upon to produce meaningful predictions for new compounds. Ridge regression is one appropriate methodology when the number of independent variables exceeds the number of observations. Although variable reduction is not a necessary component of a ridge regression analysis, descriptor thinning may be applied to eliminate variables that have no relationship to the property or activity of interest in an effort to increase model interpretability; although it is critical that this process be carried out correctly. In this paper, we have developed a predictive model for rat fat:air partition coefficient using proper statistical techniques. For comparative purposes, we have also used stepwise ordinary least squares regression, commonly used in QSAR studies but which often results in an inflated {"}na{\"i}ve{"} q2. It is important to note that all descriptors used in this analysis are computed strictly from chemical structure without the need for any additional experimental input and, therefore, can be applied to any chemical, real or hypothetical, in order to assess the pharmacokinetics and toxic potential.",

keywords = "Descriptor thinning, Gram-Schmidt, Mathematical descriptors, Overfitting, Ridge regression, Stepwise regression",

author = "Basak, {Subhash C} and Denise Mills and Hawkins, {Douglas M} and Kraker, {Jessica J.}",

year = "2007",

month = dec,

day = "1",

doi = "10.1063/1.2836137",

language = "English (US)",

isbn = "9780735404786",

series = "AIP Conference Proceedings",

number = "2",

pages = "548--551",

booktitle = "Computation in Modern Science and Engineering - Proceedings of the International Conference on Computational Methods in Science and Engineering 2007 (ICCMSE 2007)",

edition = "2",

note = "International Conference on Computational Methods in Science and Engineering 2007, ICCMSE 2007 ; Conference date: 25-09-2007 Through 30-09-2007",

}