Research Group of Prof. Dr. J. Garcke
Institute for Numerical Simulation
maximize


@inproceedings{Garcke.Griebel:2001*1,
  author = {J. Garcke and M. Griebel},
  title = {On the parallelization of the sparse grid approach for
		  data mining},
  booktitle = {Large-Scale Scientific Computations, Third International
		  Conference, LSSC 2001, Sozopol, Bulgaria},
  series = {Lecture Notes in Computer Science},
  volume = 2179,
  pages = {22-32},
  publisher = {Springer},
  editor = {S. Margenov and J. Wasniewski and P. Yalamov},
  ps = {http://wissrech.ins.uni-bonn.de/research/pub/garcke/psm.ps.gz 1},
  pdf = {http://wissrech.ins.uni-bonn.de/research/pub/garcke/psm.pdf 1},
  abstract = {Recently we presented a new approach to the classification
		  problem arising in data mining. It is based on the
		  regularization network approach, but in contrast to other
		  methods which employ ansatz functions associated to data
		  points, we use basis functions coming from a grid in the
		  usually high-dimensional feature space for the minimization
		  process. To cope with the curse of dimensionality, we
		  employ sparse grids. To be precise we use the sparse grid
		  combination technique where the classification problem is
		  discretized and solved on a sequence of conventional grids
		  with uniform mesh sizes in each dimension. The sparse grid
		  solution is then obtained by linear combination. The method
		  scales linearly with the number of data points and is well
		  suited for data mining applications where the amount of
		  data is very large, but where the dimension of the feature
		  space is moderately high.
		  
		  The computation on each grid of the sequence of grids is
		  independent of each other and therefore can be done in
		  parallel already on a coarse grain level. A second level of
		  parallelization on a fine grain level can be introduced on
		  each grid through the use of threading on shared-memory
		  multi-processor computers.
		  
		  We describe the sparse grid combination technique for the
		  classification problem, the two ways of parallelisation,
		  and report on the results on a 10 dimensional data set. },
  year = {2001},
  annote = {series,other}
}