Research Group of Prof. Dr. J. Garcke
Institute for Numerical Simulation

  author = {J. Garcke and M. Griebel},
  title = {Data mining with sparse grids using simplicial basis
  booktitle = {Proceedings of the Seventh ACM SIGKDD International
		  Conference on Knowledge Discovery and Data Mining, San
		  Francisco, USA},
  pages = {87--96},
  editor = {F. Provost and R. Srikant},
  optnote = {also as SFB 256 Preprint 713, Universit\"at Bonn, 2001},
  http = {},
  doi = {doi:10.1145/502512.502528},
  ps = { 1},
  pdf = { 1},
  abstract = {Recently we presented a new approach to the classification
		  problem arising in data mining. It is based on the
		  regularization network approach but, in contrast to other
		  methods which employ ansatz functions associated to data
		  points, we use a grid in the usually high-dimensional
		  feature space for the minimization process. To cope with
		  the curse of dimensionality, we employ sparse grids. Thus,
		  only $O(h_n^{-1} n^{d-1})$ instead of $O(h_n^{-d})$ grid
		  points and unknowns are involved. Here $d$ denotes the
		  dimension of the feature space and $h_n = 2^{-n}$ gives the
		  mesh size. We use the sparse grid combination technique
		  where the classification problem is discretized and solved
		  on a sequence of conventional grids with uniform mesh sizes
		  in each dimension. The sparse grid solution is then
		  obtained by linear combination. In contrast to our former
		  work, where $d$-linear functions were used, we apply now
		  linear basis functions based on a simplicial
		  discretization. These allow to handle more dimensions and
		  the algorithms needs less operations per data point.
		  We describe the sparse grid combination technique for the
		  classification problem, give implementational details and
		  discuss the complexity of the algorithm. It turns out that
		  the method scales linear with the number of given data
		  points. Finally we report on the quality of the classifier
		  built by our new method on data sets in up to 10
		  dimensions. It turns out that our new method achieves
		  correctness rates which are competitive to that of the best
		  existing methods. },
  year = {2001},
  annote = {proc_ref}