RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
ShapeInput.h
Go to the documentation of this file.
1//
2// Copyright (C) 2026 David Cosgrove and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10// Original author: David Cosgrove (CozChemIx Limited)
11//
12
13#ifndef RDKIT_SHAPEINPUT_GUARD
14#define RDKIT_SHAPEINPUT_GUARD
15
16#include <array>
17#include <vector>
18
19#include <RDGeneral/export.h>
21
23#include <boost/dynamic_bitset.hpp>
24#ifdef RDK_USE_BOOST_SERIALIZATION
25#include <boost/archive/text_oarchive.hpp>
26#include <boost/archive/text_iarchive.hpp>
27#include <boost/serialization/vector.hpp>
28#include <boost/serialization/array.hpp>
29#include <boost/serialization/unique_ptr.hpp>
30#endif
32
34
35// The code below was provided by Claude (Sonnet 4.6).
36// If first tried to get me to use boost/serialization/dynamic_bitset.hpp
37// and then admitted that it had made that up.
38namespace boost {
39namespace serialization {
40
41template <class Archive, typename Block, typename Allocator>
42void serialize(Archive &ar, boost::dynamic_bitset<Block, Allocator> &bs,
43 const unsigned int /*version*/) {
44 size_t num_bits = bs.size();
45 ar & num_bits;
46
47 std::vector<Block> blocks;
48
49 if (Archive::is_saving::value) {
50 to_block_range(bs, std::back_inserter(blocks));
51 }
52
53 ar & blocks;
54
55 if (Archive::is_loading::value) {
56 bs.resize(num_bits);
57 from_block_range(blocks.begin(), blocks.end(), bs);
58 bs.resize(num_bits); // trim any excess bits
59 }
60}
61
62} // namespace serialization
63} // namespace boost
64
65namespace RDKit {
66class ROMol;
67class RWMol;
68namespace GaussianShape {
69
70// From Grant et al.
71constexpr double P = 2.7;
72constexpr double KAPPA = 2.41798793102;
74 std::vector<std::tuple<unsigned int, RDGeom::Point3D, double>>;
75
77 ShapeInputOptions() = default;
82
83 ~ShapeInputOptions() = default;
84
85 // By default, it will create features using the RDKit pharmacophore
86 // definitions.
88 true}; //! Whether to build the color features. By default, it will
89 //! create features using the RDKit pharmacophore definitions.
90
91 CustomFeatures customFeatures; //! Custom color features used verbatim. A
92 //! vector of tuples of integer type, Point3D
93 //! coords, double radius.
94 std::vector<unsigned int>
95 atomSubset; //! If not empty, use just these atoms in the molecule to
96 //! form the ShapeInput object.
97 std::vector<std::pair<unsigned int, double>>
98 atomRadii; //! Use these non-standard radii for these atoms. The int is
99 //! for the atom index in the molecule, not the atomic number.
100 //! Not all atoms need be specified, just some radii can be
101 //! over-ridden, with the rest left as standard.
103 true}; //! Whether to use carbon radii for all atoms (which is quicker
104 //! but less accurate) or vdw radii appropriate for the elements.
105};
106
107// Data for shape alignment code
109 public:
110 //! Create the ShapeInput object.
111 //! @param mol: The molecule of interest
112 //! @param confId: The conformer to use
113 //! @param opts: Options for setting up the shape
114 ShapeInput(const ROMol &mol, int confId = -1,
115 const ShapeInputOptions &opts = ShapeInputOptions(),
116 const ShapeOverlayOptions &overlayOpts = ShapeOverlayOptions());
117 ShapeInput(const std::string &str) {
118#ifndef RDK_USE_BOOST_SERIALIZATION
119 PRECONDITION(0, "Boost SERIALIZATION is not enabled")
120#else
121 std::stringstream ss(str);
122 boost::archive::text_iarchive ia(ss);
123 ia &*this;
124#endif
125 }
126 ShapeInput(const ShapeInput &other);
127 ShapeInput(ShapeInput &&other) = default;
129 ShapeInput &operator=(ShapeInput &&other) = default;
130 virtual ~ShapeInput() = default;
131
132 std::string toString() const {
133#ifndef RDK_USE_BOOST_SERIALIZATION
134 PRECONDITION(0, "Boost SERIALIZATION is not enabled")
135#else
136 std::stringstream ss;
137 boost::archive::text_oarchive oa(ss);
138 oa &*this;
139 return ss.str();
140#endif
141 }
142
143 // Note that the coords returned is a vector size 4*getNumAtoms()
144 // with the 4th value per atom being the alpha paramter.
145 const std::vector<double> &getCoords() const { return d_coords; }
146 //! Fetch the coordinates of the atoms and optionally features.
147 std::vector<RDGeom::Point3D> getAtomPoints(bool includeColors = false) const;
148 bool getNormalized() const { return d_normalized; }
149 const std::vector<int> &getTypes() const { return d_types; }
150 unsigned int getNumAtoms() const { return d_numAtoms; }
151 unsigned int getNumFeatures() const { return d_numFeats; }
152 double getShapeVolume() const { return d_selfOverlapVol; }
153 double getColorVolume() const { return d_selfOverlapColor; }
154 const boost::dynamic_bitset<> *getCarbonRadii() const {
155 return d_carbonRadii.get();
156 }
157 // These functions use cached values if available.
158 const std::array<double, 9> &calcCanonicalRotation();
159 const std::array<double, 3> &calcCanonicalTranslation();
160 const std::array<double, 3> &calcEigenValues();
161 const std::array<size_t, 6> &calcExtremes();
162 // Return the principal moments of inertia, if Eigen3 is available, and the
163 // eigenvalues of the canonical transformation if not.
164 std::array<double, 3> calcMomentsOfInertia(bool includeColors = false) const;
165
166 // Align the principal axes to the cartesian axes and centre on the origin.
167 // Doesn't require that the shape was created from a molecule. Creates
168 // the necessary transformation if not already done.
170
172
173 // Mock a molecule up from the shape for visual inspection and sometimes
174 // calculation of the normalization matrices. No bonds.
175 // Atoms are C, features are N.
176 virtual std::unique_ptr<RWMol> shapeToMol(bool includeColors = true) const;
177
178#ifdef RDK_USE_BOOST_SERIALIZATION
179 template <class Archive>
180 void serialize(Archive &ar, const unsigned int) {
181 ar & d_coords;
182 ar & d_types;
183 ar & d_numAtoms;
184 ar & d_numFeats;
185 ar & d_selfOverlapVol;
186 ar & d_selfOverlapColor;
187 ar & d_extremePoints;
188 ar & d_carbonRadii;
189 ar & d_normalized;
190 ar & d_normalizationOK;
191 ar & d_canonRot;
192 ar & d_canonTrans;
193 ar & d_eigenValues;
194 }
195#endif
196
197 private:
198 void extractAtoms(const ROMol &mol, int confId,
199 const ShapeInputOptions &opts);
200 // Extract the features for the color scores, using RDKit pphore features
201 // for now. Other options to be added later.
202 void extractFeatures(const ROMol &mol, int confId,
203 const ShapeInputOptions &shapeOpts);
204 // Calculate the rotation and translation that will align the principal axes
205 // to the cartesian axes and centre on the origin.
206 void calcNormalization();
207
208 void calculateExtremes();
209
210 std::vector<double> d_coords; // The coordinates and alpha parameter for the
211 // atoms and features, packed as 4 floats per
212 // item - x, y, z and alpha. alpha is KAPPA / (r * r) where r is the radius
213 // of the atom. This is not used if using all_atoms_carbon mode.
214 std::vector<int> d_types; // The feature types. The size is the same
215 // as the number of coordinates, padded with 0
216 // for the atoms.
217 unsigned int d_numAtoms; // The number of atoms
218 unsigned int d_numFeats; // The number of features
219 double d_selfOverlapVol{0.0}; // Shape volume
220 double d_selfOverlapColor{0.0}; // Color volume
221 // These are the points at the extremes of the x, y and z axes.
222 // they are min_x, min_y, min_z and max_x, max_y, max_z.
223 std::array<size_t, 6> d_extremePoints;
224 std::unique_ptr<boost::dynamic_bitset<>>
225 d_carbonRadii; // Flags those atoms with a carbon radius, for faster
226 // calculation later.
227
228 // This is the rotation and translation to align the principal axes of the
229 // shape with cartesian axes. If d_normalized is true, it has been applied
230 // to the coordinates.
231 bool d_normalized{false};
232 // If the shape is moved, the normalization matrices are no longer valid.
233 // This flags that so it is re-computed as required.
234 bool d_normalizationOK{false};
235
236 std::array<double, 9> d_canonRot;
237 std::array<double, 3> d_canonTrans;
238 // The sorted eigenvalues of the principal axes.
239 std::array<double, 3> d_eigenValues;
240};
241
242// Calculate the mean position of the given atoms.
244 const ROMol &mol, int confId, const std::vector<unsigned int> &ats);
245
247 const double *quat, const double *trans);
248
249// Apply the transformation to the coordinates assumed to be in
250// ShapeInput.d_coords form.
252 std::vector<double> &shape, RDGeom::Transform3D &xform);
254 const double *inShape, double *outShape, size_t numPoints,
255 RDGeom::Transform3D &xform);
257 std::vector<double> &shape, const RDGeom::Point3D &translation);
259 const double *inShape, double *outShape, size_t numPoints,
260 const RDGeom::Point3D &translation);
261
262} // namespace GaussianShape
263} // namespace RDKit
264
265#endif // RDKIT_SHAPEINPUT_GUARD
#define PRECONDITION(expr, mess)
Definition Invariant.h:108
ShapeInput(const std::string &str)
Definition ShapeInput.h:117
ShapeInput(const ROMol &mol, int confId=-1, const ShapeInputOptions &opts=ShapeInputOptions(), const ShapeOverlayOptions &overlayOpts=ShapeOverlayOptions())
const std::array< size_t, 6 > & calcExtremes()
const std::array< double, 3 > & calcEigenValues()
std::vector< RDGeom::Point3D > getAtomPoints(bool includeColors=false) const
Fetch the coordinates of the atoms and optionally features.
ShapeInput & operator=(const ShapeInput &other)
unsigned int getNumAtoms() const
Definition ShapeInput.h:150
ShapeInput(const ShapeInput &other)
std::string toString() const
Definition ShapeInput.h:132
ShapeInput & operator=(ShapeInput &&other)=default
ShapeInput(ShapeInput &&other)=default
const std::vector< int > & getTypes() const
Definition ShapeInput.h:149
unsigned int getNumFeatures() const
Definition ShapeInput.h:151
const std::array< double, 3 > & calcCanonicalTranslation()
const std::vector< double > & getCoords() const
Definition ShapeInput.h:145
void transformCoords(RDGeom::Transform3D &xform)
const std::array< double, 9 > & calcCanonicalRotation()
virtual std::unique_ptr< RWMol > shapeToMol(bool includeColors=true) const
const boost::dynamic_bitset * getCarbonRadii() const
Definition ShapeInput.h:154
std::array< double, 3 > calcMomentsOfInertia(bool includeColors=false) const
RWMol is a molecule class that is intended to be edited.
Definition RWMol.h:32
#define RDKIT_GAUSSIANSHAPE_EXPORT
Definition export.h:233
constexpr double P
Definition ShapeInput.h:71
std::vector< std::tuple< unsigned int, RDGeom::Point3D, double > > CustomFeatures
Definition ShapeInput.h:73
RDKIT_GAUSSIANSHAPE_EXPORT void translateShape(std::vector< double > &shape, const RDGeom::Point3D &translation)
RDKIT_GAUSSIANSHAPE_EXPORT RDGeom::Transform3D quatTransToTransform(const double *quat, const double *trans)
constexpr double KAPPA
Definition ShapeInput.h:72
RDKIT_GAUSSIANSHAPE_EXPORT void applyTransformToShape(std::vector< double > &shape, RDGeom::Transform3D &xform)
RDKIT_GAUSSIANSHAPE_EXPORT RDGeom::Point3D computeFeaturePos(const ROMol &mol, int confId, const std::vector< unsigned int > &ats)
Std stuff.
void serialize(Archive &ar, boost::dynamic_bitset< Block, Allocator > &bs, const unsigned int)
Definition ShapeInput.h:42
ShapeInputOptions & operator=(const ShapeInputOptions &)=default
std::vector< unsigned int > atomSubset
Definition ShapeInput.h:95
std::vector< std::pair< unsigned int, double > > atomRadii
Definition ShapeInput.h:98
ShapeInputOptions(ShapeInputOptions &&)=default
ShapeInputOptions & operator=(ShapeInputOptions &&)=default
ShapeInputOptions(const ShapeInputOptions &)=default