ABSTRACT

Listing B.1: configure.ac. 1 AC INIT ( [ ClusLib ] , [ 3 . 1 4 1 ] , [BUG−REPORT−ADDRESS ] ) 2 AC CONFIG SRCDIR ( [ c l / e r r o r s . hpp ] ) 3 AC CONFIG AUX DIR ( [ c on f i g ] ) 4 AC CONFIG HEADERS( [ c l / c on f i g . hpp ] ) 5 AC CONFIG MACRO DIR( [m4] ) 6

7 AM INIT AUTOMAKE([−Wall −Werror f o r e i g n ] ) 8

9 AC ARG WITH( [ boost−i n c l ude ] , 10 AC HELP STRING([−−with−boost−i n c l ude=INCLUDE PATH] , 11 [ Supply the l o c a t i o n o f Boost header f i l e s ] ) , 12 [ b oo s t i n c l ude path=”‘cd ${withva l} 2>/dev/ nu l l && pwd ‘ ” ] , 13 [ b oo s t i n c l ude path =””]) 14 i f t e s t [ −n ” $boos t i n c l ude path ” ] ; then 15 AC SUBST( [BOOST INCLUDE] , [”− I$ { boos t i n c l ude path }” ] ) 16 AC SUBST( [CPPFLAGS] , [ ” ${CPPFLAGS} −I$ { boos t i n c l ude path }” ] ) 17 f i 18

19 AC ARG WITH( [ boost−l i b ] , 20 AC HELP STRING([−−with−boost−l i b=LIB PATH ] , 21 [ Supply the l o c a t i o n o f Boost l i b r a r i e s ] ) , 22 [ b o o s t l i b pa t h=”‘cd ${withva l} 2>/dev/ nu l l && pwd ‘ ” ] , 23 [ b o o s t l i b pa t h =””]) 24 i f t e s t [ −n ” $b oo s t l i b p a t h ” ] ; then 25 AC SUBST( [ BOOST LIB] , [”−L${ boo s t l i b p a th }” ] ) 26 AC SUBST( [LDFLAGS] , [ ” ${LDFLAGS} −L${ boo s t l i b pa t h }” ] ) 27 f i 28

29 LT INIT 30 AC PROG CC 31 AC PROG CXX 32 AC PROG CPP 33 AC LANG(C++) 34

37 AC CONFIG FILES ( [ Make f i l e 38 c l /Make f i l e 39 c l / a lgor i thms /Make f i l e 40 c l / c l u s t e r s /Make f i l e 41 c l / da ta s e t s /Make f i l e 42 c l / d i s t anc e s /Make f i l e

46 examples/Make f i l e 47 examples/ agg lomerat ive /Make f i l e 48 examples/cmean/Make f i l e 49 examples/diana/Make f i l e 50 examples/ f s c /Make f i l e 51 examples/gkmode/Make f i l e 52 examples/gmc/Make f i l e 53 examples/kmean/Make f i l e 54 examples/ kprototype /Make f i l e 55 m4/Make f i l e 56 t e s t−s u i t e /Make f i l e ] ) 57 AC OUTPUT

Listing B.2: acinclude.m4. 1 # CHECK BOOST DEVEL 2 # −−−−−−−−−−−−−−−−−−−− 3 # Check whether the Boost headers are a v a i l a b l e 4 AC DEFUN( [CHECK BOOST DEVEL] , 5 [AC MSG CHECKING( [ f o r Boost development f i l e s ] ) 6 AC TRY COMPILE( 7 [@%:@include <boost / ve r s i on . hpp> 8 @%:@include <boost / sha r ed p t r . hpp> 9 @%:@include <boost / a s s e r t . hpp>

10 @%:@include <boost / c u r r e n t f u n c t i o n . hpp> ] , 11 [ ] , 12 [AC MSG RESULT( [ yes ] ) ] , 13 [AC MSG RESULT( [ no ] ) 14 AC MSG ERROR( [ Boost development f i l e s not found ] ) 15 ] ) 16 ] ) 17

18 # CHECK BOOST VERSION 19 # −−−−−−−−−−−−−−−−−−−−−− 20 # Check whether the Boost i n s t a l l a t i o n i s up to date 21 AC DEFUN( [CHECK BOOST VERSION] , 22 [AC MSG CHECKING( [ Boost ve r s i on ] ) 23 AC REQUIRE( [CHECK BOOST DEVEL] ) 24 AC TRY COMPILE( 25 [@%:@include <boost / ve r s i on . hpp> ] , 26 [@%:@if BOOST VERSION < 103100 27 @%:@error too o ld 28 @%:@endif ] , 29 [AC MSG RESULT( [ yes ] ) ] , 30 [AC MSG RESULT( [ no ] ) 31 AC MSG ERROR( [ outdated Boost i n s t a l l a t i o n ] ) 32 ] ) 33 ] ) 34

35 # CHECK BOOST PROGRAM OPTIONS 36 # −−−−−−−−−−−−−−−−−−−−−−−− 37 # Check whether the Boost program opt ions i s a v a i l a b l e 38 AC DEFUN( [CHECK BOOST PROGRAM OPTIONS] , 39 [AC MSG CHECKING( [ f o r Boost program opt ions ] ) 40 AC REQUIRE( [AC PROG CC] ) 41 o r i g i na l L IBS=$LIBS 42 original CXXFLAGS=$CXXFLAGS 43 CC BASENAME=‘basename $CC‘ 44 CC VERSION=‘echo ” GNUC GNUC MINOR ” | $CC −E −x c − | \

VERSION \ 48 boost program opt ions−$CC BASENAME \ 49 boost program opt ions \ 50 boost program opt ions−mt−$CC BASENAME$CC VERSION \ 51 boost program opt ions−$CC BASENAME$CC VERSION−mt \ 52 boost program opt ions−x$CC BASENAME$CC VERSION−mt \ 53 boost program opt ions−mt−$CC BASENAME \ 54 boost program opt ions−$CC BASENAME−mt \ 55 boost program opt ions−mt ; do 56 LIBS=”$or i g i na l L IBS − l $ b o o s t l i b ” 57 CXXFLAGS=”$original CXXFLAGS” 58 boost po found=no 59 AC LINK IFELSE( 60 [@%:@include <boost / program options . hpp> 61 us ing namespace boost : : program options ; 62 i n t main ( i n t argc , char∗∗ argv ) 63 { 64 op t i o n s d e s c r i p t i o n desc (” Allowed opt ions ” ) ; 65 desc . add opt ions ( ) 66 (” help ” , ” help msg”)(” p” ,”p ” ) ; 67 re turn 0 ; 68 } 69 ] , 70 [ boost po found=$boo s t l i b 71 break ] , 72 [ ] ) 73 done 74 LIBS=”$or i g i na l L IBS ” 75 CXXFLAGS=”$original CXXFLAGS” 76 i f t e s t ” $boost po found ” = no ; then 77 AC MSG RESULT( [ no ] ) 78 AC SUBST( [BOOST PROGRAM OPTIONS LIB] , [ ” ” ] ) 79 AC MSGWARN( [ Boost program opt ions not found . ] ) 80 e l s e 81 AC MSG RESULT( [ yes ] ) 82 AC SUBST( [BOOST PROGRAM OPTIONS LIB ] , [ $ b o o s t l i b ] ) 83 f i 84 ] ) 85

86 # CHECK BOOST 87 # −−−−−−−−−−−−−−−−−−−−−−−− 88 # Boost−r e l a t e d t e s t s 89 AC DEFUN( [CHECK BOOST] , 90 [AC REQUIRE( [CHECK BOOST DEVEL] ) 91 AC REQUIRE( [CHECK BOOST VERSION] ) 92 AC REQUIRE( [CHECK BOOST PROGRAM OPTIONS ] ) 93 ] )

Listing B.3: Makefile.am in ClusLib. 1 SUBDIRS = c l c on f i g examples m4 te s t−s u i t e 2

3 ACLOCALAMFLAGS = −I m4

Listing B.4: Makefile.am in cl. 1 SUBDIRS = algor i thms c l u s t e r s da ta se t s d i s t anc e s pa t t e rn s \ 2 u t i l i t i e s 3

4 AM CPPFLAGS = −I${ t o p s r c d i r } −I$ { t op bu i l dd i r } 5

6 t h i s i n c l u d e d i r = ${ i n c l u de d i r }/${ subd i r } 7 this include HEADERS = \ 8 c l d e f i n e s . hpp \ 9 c l u s l i b . hpp \

10 c on f i g . hpp \ 11 e r r o r s . hpp \ 12 types . hpp 13

14 libClusLib la SOURCES = \ 15 e r r o r s . cpp 16

18 lib LTLIBRARIES = l ibClu sL ib . l a 19 l ibClusLib la LIBADD = \ 20 a lgor i thms / l i bA lgo r i thms . l a \ 21 c l u s t e r s / l i bC l u s t e r s . l a \ 22 data se t s / l i bData se t s . l a \ 23 d i s t anc e s / l i bD i s t an c e s . l a \ 24 pa t t e rn s/ l i bPa t t e r n s . l a \ 25 u t i l i t i e s / l i b U t i l i t i e s . l a 26

27 c l u s l i b . hpp : Make f i l e .am 28 −−−−−−−→echo ”// This f i l e i s generated . P lease do not e d i t ! ” > $@ 29 −−−−−−−→echo >> $@ 30 −−−−−−−→echo ”#inc l ude <c l / c l d e f i n e s . hpp>” >> $@ 31 −−−−−−−→echo >> $@ 32 −−−−−−−→ f o r i in $ ( f i l t e r −out c on f i g . hpp c l u s l i b . hpp \ 33 −−−−−−−→ c l d e f i n e s . hpp , $ ( this include HEADERS ) ) ; do \ 34 −−−−−−−→echo ”#inc l ude <${ subd i r }/ $$i>” >> $@; \ 35 −−−−−−−→done 36 −−−−−−−→echo >> $@ 37 −−−−−−−→ subd i r s=’$ (SUBDIRS ) ’ ; f o r i in $$subd i r s ; do \ 38 −−−−−−−→echo ”#inc l ude <${ subd i r }/ $$ i / a l l . hpp>” >> $@ ; \ 39 −−−−−−−→done

Listing B.5: Macros. 1 // c l / c l d e f i n e s . hpp 2 #ifndef CLUSLIB CLDEFINES HPP 3 #define CLUSLIB CLDEFINES HPP 4

5 #include<boost / c on f i g . hpp> 6 #include<boost / ve r s i on . hpp> 7 #include<l im i t s > 8

13 #define CL VERSION ” 1 . 0 . 0 ” 14 #define CL LIB VERSION ” 1 . 0 . 0 ” 15

16 #i f de f i n ed (HAVE CONFIG H) 17 #inc lude<c l / c on f i g . hpp> 18 #endif 19

20 #define INTEGER int 21 #define BIGINTEGER long 22 #define REAL double 23

24 #define MIN INTEGER std : : numer i c l im i t s<INTEGER> : :min ( ) 25 #define MAX INTEGER std : : numer i c l im i t s<INTEGER> : :max( ) 26 #define MIN REAL −std : : numer i c l im i t s<REAL> : :max( ) 27 #define MAXREAL std : : numer i c l im i t s<REAL> : :max( ) 28 #define MIN POSITIVE REAL std : : numer i c l im i t s<REAL> : :min ( ) 29 #define EPSILON std : : nume r i c l im i t s<REAL> : : e p s i l o n ( ) 30 #define NULL INTEGER std : : numer i c l im i t s<INTEGER> : :max( ) 31 #define NULL SIZE std : : numer i c l im i t s<unsigned INTEGER> : :max( ) 32 #define NULL REAL std : : numer i c l im i t s<REAL> : :max( ) 33

Listing B.6: Types. 1 // c l / types . hpp 2 #ifndef CLUSLIB TYPES HPP 3 #define CLUSLIB TYPES HPP 4

5 #include<c l / c l d e f i n e s . hpp> 6 #include<cstdde f> 7

8 namespace ClusLib { 9

10 typedef INTEGER In tege r ; 11 typedef BIGINTEGER Big In t e ge r ; 12 typedef unsigned INTEGER Natural ; 13 typedef unsigned BIGINTEGER BigNatural ; 14 typedef REAL Real ; 15 typedef std : : s i z e t S i z e ; 16

Listing B.7: The header file of class Error. 1 // c l / errors . hpp 2 #ifndef CLUSLIB ERRORS HPP 3 #define CLUSLIB ERRORS HPP 4

5 #include<boost / a s s e r t . hpp> 6 #include<boost / c u r r e n t f u n c t i o n . hpp> 7 #include<boost / sha r ed p t r . hpp> 8 #include<except ion> 9 #include<sstream>

13 private : 14 boost : : shared ptr<std : : s t r i ng> msg ; 15 public : 16 Error ( const std : : s t r i n g& f i l e , 17 long l i n e , 18 const std : : s t r i n g& funct ion , 19 const std : : s t r i n g& msg = ”” ) ; 20 ˜Error ( ) throw ( ) {} 21 const char∗ what ( ) const throw ( ) ; 22 } ; 23

26 #define FAIL(msg) \ 27 std : : o s t r i ng s t r e am ss ; \ 28 s s << msg ; \ 29 throw ClusLib : : Error ( FILE , LINE , \ 30 BOOST CURRENT FUNCTION, s s . s t r ( ) ) ; 31

32 #define ASSERT( cond i t ion , msg) \ 33 i f ( ! ( c ond i t i on ) ) { \ 34 std : : o s t r i ng s t r e am ss ; \ 35 s s << msg ; \ 36 throw ClusLib : : Error ( FILE , LINE , \ 37 BOOST CURRENT FUNCTION, s s . s t r ( ) ) ; \ 38 } 39

Listing B.8: The source file of class Error. 1 // c l / errors . cpp 2 #include<c l / e r r o r s . hpp> 3 #include<stdexcept> 4

5 namespace { 6 std : : s t r i n g format ( const std : : s t r i n g& f i l e , 7 long l i n e , 8 const std : : s t r i n g& funct ion , 9 const std : : s t r i n g& msg) {

10 std : : o s t r i ng s t r e am ss ; 11 ss<<funct ion<<” : ” ; 12 ss<<”\n ”<< f i l e <<” ( ”<<l i n e <<” ) : \n”<<msg ; 13 return s s . s t r ( ) ; 14 } 15

18 namespace boost { 19 void a s s e r t i o n f a i l e d ( char const ∗ expr , 20 char const ∗ funct ion , 21 char const ∗ f i l e , 22 long l i n e ){ 23 throw std : : run t ime e r r o r ( format ( f i l e , l i n e , funct ion , 24 ”Boost a s s e r t i o n f a i l e d : ” + std : : s t r i n g ( expr ) ) ) ; 25 } 26 } 27

29 namespace ClusLib { 30 Error : : Error ( const std : : s t r i ng& f i l e , 31 long l i n e , 32 const std : : s t r i n g& funct ion , 33 const std : : s t r i n g& msg){

g ( , l i ion msg ) ) ) ; 36 } 37

38 const char∗ Error : : what ( ) const throw ( ) { 39 return msg−>c s t r ( ) ; 40 } 41 }

Listing B.9: Makefile.am in cl/algorithms. 1 noinst LTLIBRARIES = l ibA lgo r i thms . l a 2

3 AM CPPFLAGS = −I${ t o p s r c d i r } −I$ { t op bu i l dd i r } 4

5 t h i s i n c l u d e d i r=${ i n c l ud ed i r }/${ subd i r } 6 this include HEADERS = \ 7 a l l . hpp \ 8 a lgor i thm . hpp \ 9 average . hpp \

10 c en t ro i d . hpp \ 11 cmean . hpp \ 12 complete . hpp \ 13 diana . hpp \ 14 f s c . hpp \ 15 gkmode . hpp \ 16 gmc . hpp \ 17 kmean . hpp \ 18 kprototype . hpp \ 19 lw . hpp \ 20 median . hpp \ 21 s i n g l e . hpp \ 22 ward . hpp \ 23 weighted . hpp 24

25 l ibAlgorithms la SOURCES = \ 26 a lgor i thm . cpp \ 27 average . cpp \ 28 c en t ro i d . cpp \ 29 cmean . cpp \ 30 complete . cpp \ 31 diana . cpp \ 32 f s c . cpp \ 33 gkmode . cpp \ 34 gmc . cpp \ 35 kmean . cpp \ 36 kprototype . cpp \ 37 lw . cpp \ 38 median . cpp \ 39 s i n g l e . cpp \ 40 ward . cpp \ 41 weighted . cpp 42

P do not e d i t ! ” > $@ 46 echo >> $@ 47 f o r i in $ ( f i l t e r −out a l l . hpp , $ ( this include HEADERS ) ) ; \ 48 do \ 49 echo ”#inc l ude <${ subd i r }/ $$i>” >> $@; \ 50 done 51 echo >> $@ 52 subd i r s=’$ (SUBDIRS ) ’ ; f o r i in $$subd i r s ; do \ 53 echo ”#inc l ude <${ subd i r }/ $$ i / a l l . hpp>” >> $@ ; \ 54 done

Listing B.10: The header file of class Algorithm. 1 // c l / a lgor i thms/algorithm . hpp 2 #ifndef CLUSLIB ALGORITHM HPP 3 #define CLUSLIB ALGORITHM HPP 4

5 #include<c l / da ta s e t s / datase t . hpp> 6 #include<c l / d i s t anc e s / d i s t anc e . hpp> 7 #include<map> 8 #include<boost /any . hpp> 9

10 namespace ClusLib { 11

12 class Addit iona l { 13 public : 14 const boost : : any& get ( const std : : s t r i n g &name) const ; 15 void i n s e r t ( const std : : s t r i n g &name , 16 const boost : : any &val ) ; 17

18 std : : map<std : : s t r i ng , boost : : any> add i t i o n a l ; 19

20 protected : 21 Addit iona l ( ) {} 22 } ; 23

24 class Arguments : public Addit iona l { 25 public : 26 boost : : shared ptr<Dataset> ds ; 27 boost : : shared ptr<Distance> d i s t anc e ; 28 } ; 29

30 class Resu l t s : public Addit iona l { 31 public : 32 void r e s e t ( ) ; 33

34 std : : vector<Size> CM; 35 } ; 36

37 class Algorithm { 38 public : 39 virtual ˜Algorithm ( ) {} 40 virtual Arguments& getArguments ( ) ; 41 virtual const Resu l t s& ge tRe su l t s ( ) const ; 42 virtual void r e s e t ( ) const ; 43 virtual void c l u s t e r i z e ( ) ; 44

45 protected : 46 virtual void setupArguments ( ) ; 47 virtual void per formCluste r ing ( ) const = 0; =

51 mutable Resu l t s r e s u l t s ; 52 Arguments arguments ; 53 } ; 54 } 55

Listing B.11: The source file of class Algorithm. 1 // c l / a lgor i thms/algorithm . cpp 2 #include<c l / a lgor i thms / a lgor i thm . hpp> 3

4 namespace ClusLib { 5

6 const boost : : any& Addit iona l : : ge t ( const std : : s t r i n g &name) 7 const { 8 std : : map<std : : s t r i ng , boost : : any> : : c o n s t i t e r a t o r i t ; 9 i t = add i t i o n a l . f i nd (name ) ;

10 i f ( i t == add i t i o n a l . end ( ) ) { 11 FAIL(name << ” not found” ) ; 12 } 13

14 return i t−>second ; 15 } 16

17 void Addit iona l : : i n s e r t ( const std : : s t r i n g &name , 18 const boost : : any &val ) { 19 add i t i o n a l . i n s e r t ( std : : pair<std : : s t r i ng , 20 boost : : any>(name , va l ) ) ; 21 } 22

23 void Resu l t s : : r e s e t ( ) { 24 CM. c l e a r ( ) ; 25 add i t i o n a l . c l e a r ( ) ; 26 } 27

28 Arguments& Algorithm : : getArguments ( ) { 29 return arguments ; 30 } 31

32 const Resu l t s& Algorithm : : g e tRe su l t s ( ) const { 33 return r e s u l t s ; 34 } 35

36 void Algorithm : : r e s e t ( ) const { 37 r e s u l t s . r e s e t ( ) ; 38 } 39

40 void Algorithm : : c l u s t e r i z e ( ) { 41 setupArguments ( ) ; 42 per formCluste r ing ( ) ; 43 r e s e t ( ) ; 44 f e t chRe su l t s ( ) ; 45 } 46

47 void Algorithm : : setupArguments ( ) { 48 ds = arguments . ds ; 49 ASSERT( ds , ” datase t i s n u l l ” ) ; 50 } 51

Listing B.12: The header file of class Average. 1 // c l / a lgor i thms/average . hpp 2 #ifndef CLUSLIB AVERAGE HPP 3 #define CLUSLIB AVERAGE HPP 4

5 #include<c l / a lgor i thms / lw . hpp> 6

7 namespace ClusLib { 8

9 class Average : public LW { 10 private : 11 void update dm ( S i z e p , S i z e q , S i z e r ) const ; 12 } ; 13

Listing B.13: The source file of class Average. 1 // c l / a lgor i thms/average . cpp 2 #include<c l / a lgor i thms / average . hpp> 3

4 namespace ClusLib { 5

6 void Average : : update dm ( S i z e p , S i z e q , S i z e r ) 7 const { 8 Real d i s t ; 9 std : : set<Size > : : i t e r a t o r i t ;

10 for ( i t = unmergedClusters . begin ( ) ; 11 i t != unmergedClusters . end ( ) ; ++i t ) { 12 i f (∗ i t == r ) { 13 continue ; 14 } 15

16 d i s t = ( c l u s t e r S i z e [ p ]∗ dm(p ,∗ i t ) + 17 c l u s t e r S i z e [ q ]∗ dm(q ,∗ i t ) ) / c l u s t e r S i z e [ r ] ; 18 dm . add item ( r ,∗ i t , d i s t ) ; 19 } 20 } 21

Listing B.14: The header file of class Centroid. 1 // c l / a lgor i thms/ centroid . hpp 2 #ifndef CLUSLIB CENTROID HPP 3 #define CLUSLIB CENTROID HPP 4

5 #include<c l / a lgor i thms / lw . hpp> 6

7 namespace ClusLib { 8

9 class Centroid : public LW { 10 private : 11 void setupArguments ( ) ;

Listing B.15: The source file of class Centroid. 1 // c l / a lgor i thms/ centroid . cpp 2 #include<c l / a lgor i thms / c en t ro i d . hpp> 3 #include<c l / d i s t anc e s / euc l i d eand i s t anc e . hpp> 4 #include<cmath> 5

6 namespace ClusLib { 7

8 void Centroid : : setupArguments ( ) { 9 Algorithm : : setupArguments ( ) ;

11 d i s t anc e = boost : : shared ptr<Distance >(new 12 Euc l ideanDistance ( ) ) ; 13 } 14

15 void Centroid : : update dm ( S i z e p , S i z e q , S i z e r ) 16 const { 17 Real d i s t ; 18 std : : set<Size > : : i t e r a t o r i t ; 19 Real sp = c l u s t e r S i z e [ p ] ; 20 Real sq = c l u s t e r S i z e [ q ] ; 21 for ( i t = unmergedClusters . begin ( ) ; 22 i t != unmergedClusters . end ( ) ; ++i t ) { 23 i f (∗ i t == r ) { 24 continue ; 25 } 26

27 d i s t = std : : pow( dm(p ,∗ i t ) , 2 . 0 )∗ sp /( sp+sq ) + 28 std : : pow( dm(q ,∗ i t ) , 2 . 0 )∗ sq /( sp+sq ) − 29 std : : pow( dm(p , q ) , 2 . 0 )∗ sp∗ sq / ( ( sp+sq )∗ ( sp+sq ) ) ; 30 dm . add item ( r ,∗ i t , s td : : sq r t ( d i s t ) ) ; 31 } 32 } 33

Listing B.16: The header file of class Cmean. 1 // c l / a lgor i thms/cmean. hpp 2 #ifndef CLUSLIB CMEAN HPP 3 #define CLUSLIB CMEAN HPP 4

5 #include<c l / a lgor i thms / a lgor i thm . hpp> 6 #include<c l / a lgor i thms /kmean . hpp> 7 #include<c l / types . hpp> 8 #include<c l / da ta s e t s / datase t . hpp> 9 #include<c l / c l u s t e r s / c e n t e r c l u s t e r . hpp>

10 #include<c l / c l u s t e r s / p c l u s t e r i ng . hpp> 11 #include<c l / d i s t anc e s / d i s t anc e . hpp> 12 #include<c l / u t i l i t i e s /matrix . hpp> 13

14 namespace ClusLib {

17 private : 18 void setupArguments ( ) ; 19 void per formCluste r ing ( ) const ; 20 void f e t chRe su l t s ( ) const ; 21 void i n i t i a l i z a t i o n ( ) const ; 22 void i t e r a t i o n ( ) const ; 23 void updateCenter ( ) const ; 24 void updateFCM() const ; 25 void ca l cu l a t eOb j ( ) const ; 26

27 mutable std : : vector<boost : : shared ptr<CenterCluster> > 28 c l u s t e r s ; 29 mutable std : : vector<Size> CM; 30 mutable ublas : : matrix<Real> FCM; 31 mutable S i z e numiter ; 32 mutable Real dObj ; 33

34 Real th r e sho ld ; 35 Real a lpha ; 36 Real e p s i l o n ; 37 S i z e numclust ; 38 S i z e maxi te r ; 39 S i z e s e ed ; 40 boost : : shared ptr<Distance> d i s t anc e ; 41 } ; 42

Listing B.17: The source file of class Cmean. 1 // c l / a lgor i thms/cmean. cpp 2 #include<c l / a lgor i thms /cmean . hpp> 3 #include<c l / e r r o r s . hpp> 4 #include<c l / d i s t anc e s / euc l i d eand i s t anc e . hpp> 5 #include<iostream> 6 #include<cmath> 7 #include<boost /random . hpp> 8

9 namespace ClusLib { 10

11 void Cmean : : pe r formCluste r ing ( ) const { 12 i n i t i a l i z a t i o n ( ) ; 13 i t e r a t i o n ( ) ; 14 } 15

16 void Cmean : : setupArguments ( ) { 17 Algorithm : : setupArguments ( ) ; 18 ASSERT( ds−>i s numer i c ( ) , ”not a numeric datase t ” ) ; 19

20 e p s i l o n = boost : : any cast<Real>( 21 arguments . ge t ( ” e p s i l o n ” ) ) ; 22 ASSERT( ep s i l on >0, ” e p s i l o n must be p o s i t i v e ” ) ; 23

24 th r e sho ld = boost : : any cast<Real>( 25 arguments . ge t ( ” th r e sho ld” ) ) ; 26 ASSERT( th r e sho ld>EPSILON, ” i n v a l i d th r e sho ld” ) ; 27

28 a lpha = boost : : any cast<Real>( arguments . ge t ( ” alpha” ) ) ; 29 ASSERT( alpha >1, ” i n v a l i d alpha” ) ; 30

31 numclust = boost : : any cast<Size >( 32 arguments . ge t ( ”numclust” ) ) ; 33 ASSERT( numclust>=2 && numclust<= ds−>s i z e ( ) ,

36 maxite r = boost : : any cast<Size >( 37 arguments . ge t ( ”maxiter ” ) ) ; 38 ASSERT( maxiter >0, ” i n v a l i d e maxiter” ) ; 39

40 s e ed = boost : : any cast<Size >( 41 arguments . ge t ( ” seed ” ) ) ; 42 ASSERT( seed >0, ” i n v a l i d e seed ” ) ; 43

44 d i s t anc e = boost : : shared ptr<Distance >(new 45 Euc l ideanDistance ( ) ) ; 46 } 47

48 void Cmean : : f e t chRe su l t s ( ) const { 49 S i z e s ; 50 for ( S i z e i =0; i< ds−>s i z e ();++ i ) { 51 Real dMax = MIN REAL; 52 for ( S i z e k=0;k< numclust;++k) { 53 i f (dMax < FCM( i , k ) ) { 54 dMax = FCM( i , k ) ; 55 s = k ; 56 } 57 } 58 CM[ i ] = s ; 59 c l u s t e r s [ s]−>add ((∗ ds ) [ i ] ) ; 60 } 61

62 PCluste r ing pc ; 63 for ( S i z e i =0; i< c l u s t e r s . s i z e ();++ i ){ 64 pc . add ( c l u s t e r s [ i ] ) ; 65 } 66 r e s u l t s .CM = CM; 67 r e s u l t s . i n s e r t ( ”pc” , pc ) ; 68 r e s u l t s . i n s e r t ( ”fcm” , FCM) ; 69 r e s u l t s . i n s e r t ( ”numiter” , numiter ) ; 70 r e s u l t s . i n s e r t ( ”dObj” , dObj ) ; 71 } 72

73 void Cmean : : i t e r a t i o n ( ) const { 74 numiter = 0 ; 75 Real dPrevObj ; 76 while ( true ) { 77 updateCenter ( ) ; 78 updateFCM( ) ; 79

80 dPrevObj = dObj ; 81 ca l cu l a t eOb j ( ) ; 82

83 ++ numiter ; 84

85 i f ( std : : f abs ( dObj − dPrevObj ) < th r e sho ld ){ 86 break ; 87 } 88

89 i f ( numiter >= maxite r ){ 90 break ; 91 } 92 } 93 } 94

95 void Cmean : : updateCenter ( ) const { 96 Real dSum1 , dSum2 , dTemp; 97 boost : : shared ptr<Schema> schema = ds−>schema ( ) ; 98 for ( S i z e k=0;k< numclust;++k){ 99 for ( S i z e j =0; j<schema−>s i z e ();++ j ){

100 dSum1 = 0 . 0 ;

i ){ 103 dTemp = std : : pow( FCM( i , k ) , a lpha ) ; 104 dSum1 += dTemp ∗ 105 (∗ schema ) [ j ]−>g e t c v a l ( (∗ ds ) ( i , j ) ) ; 106 dSum2 += dTemp; 107 } 108 (∗ schema ) [ j ]−> s e t c v a l ( 109 (∗ c l u s t e r s [ k]−>c en t e r ( ) ) [ j ] , dSum1/dSum2 ) ; 110 } 111 } 112 } 113

114 void Cmean : : updateFCM() const { 115 Real dSum, dTemp; 116 std : : vector<Real> dvTemp( numclust ) ; 117 boost : : shared ptr<Schema> schema = ds−>schema ( ) ; 118 for ( S i z e i =0; i< ds−>s i z e ();++ i ){ 119 dSum = 0 . 0 ; 120 for ( S i z e k=0;k< numclust;++k){ 121 dTemp = (∗ d i s t anc e ) ( (∗ ds ) [ i ] , 122 c l u s t e r s [ k]−>c en t e r ( ) ) + ep s i l o n ; 123 dvTemp[ k ] = std : : pow(dTemp, 2/( alpha −1)) ; 124 dSum += 1 / dvTemp[ k ] ; 125 } 126 for ( S i z e k=0;k< numclust;++k){ 127 FCM( i , k ) = 1 . 0 / (dvTemp[ k ] ∗ dSum) ; 128 } 129 } 130 } 131

132 void Cmean : : i n i t i a l i z a t i o n ( ) const { 133 S i z e numRecords = ds−>s i z e ( ) ; 134 FCM. r e s i z e ( numRecords , numclust ) ; 135 CM. r e s i z e ( numRecords , Null<Size > ( ) ) ; 136

137 std : : vector<Size> index ( numRecords , 0 ) ; 138 for ( S i z e i =0; i<index . s i z e ();++ i ){ 139 index [ i ] = i ; 140 } 141

142 boost : : minstd rand generator ( s e ed ) ; 143 for ( S i z e i =0; i< numclust;++i ){ 144 boost : : un i form int<> u n i d i s t (0 , numRecords−i −1); 145 boost : : va r i a t e g ene ra to r <boost : : minstd rand&, 146 boost : : un i form int<> > uni ( generator , u n i d i s t ) ; 147 S i z e r = uni ( ) ; 148 boost : : shared ptr<Record> c r = 149 boost : : shared ptr<Record>(new Record (∗ (∗ ds ) [ r ] ) ) ; 150 boost : : shared ptr<CenterCluster> c = 151 boost : : shared ptr<CenterCluster >( 152 new CenterCluste r ( c r ) ) ; 153 c−>s e t i d ( i ) ; 154 c l u s t e r s . push back ( c ) ; 155 index . e r a s e ( index . begin ()+ r ) ; 156 } 157

158 updateFCM( ) ; 159 } 160

161 void Cmean : : c a l cu l a t eOb j ( ) const { 162 Real dSum = 0 . 0 ; 163 for ( S i z e i =0; i< ds−>s i z e ( ) ; ++i ) { 164 for ( S i z e j =0; j< numclust ; ++j ) { 165 Real dTemp = (∗ d i s t an c e ) ( (∗ ds ) [ i ] , 166 c l u s t e r s [ j ]−>c en t e r ( ) ) ; 167 dSum += std : : pow( FCM( i , j ) , a lpha ) ∗

170 } 171 dObj = dSum; 172 } 173 }

Listing B.18: The header file of class Complete. 1 // c l / a lgor i thms/complete . hpp 2 #ifndef CLUSLIB COMPLETE HPP 3 #define CLUSLIB COMPLETE HPP 4

5 #include<c l / a lgor i thms / lw . hpp> 6

7 namespace ClusLib { 8

9 class Complete : public LW { 10 private : 11 void update dm ( S i z e p , S i z e q , S i z e r ) const ; 12 } ; 13

Listing B.19: The source file of class Complete. 1 // c l / a lgor i thms/complete . cpp 2 #include<c l / a lgor i thms / complete . hpp> 3

4 namespace ClusLib { 5

6 void Complete : : update dm ( S i z e p , S i z e q , S i z e r ) 7 const { 8 Real d i s t ; 9 std : : set<Size > : : i t e r a t o r i t ;

10 for ( i t = unmergedClusters . begin ( ) ; 11 i t != unmergedClusters . end ( ) ; ++i t ) { 12 i f (∗ i t == r ) { 13 continue ; 14 } 15

16 d i s t = std : : max( dm(p ,∗ i t ) , dm(q ,∗ i t ) ) ; 17 dm . add item ( r ,∗ i t , d i s t ) ; 18 } 19 } 20

Listing B.20: The header file of class Diana. 1 // c l / a lgor i thms/diana . hpp 2 #ifndef CLUSLIB DIANA HPP

. 6 #include<c l / c l u s t e r s / h c l u s t e r i ng . hpp> 7 #include<c l / u t i l i t i e s /nnmap . hpp> 8 #include<c l / da ta s e t s / datase t . hpp> 9 #include<c l / d i s t anc e s / d i s t anc e . hpp>

10 #include<c l / pa t t e rn s/ n odev i s i t o r . hpp> 11 #include<c l / types . hpp> 12 #include<set> 13

14 namespace ClusLib { 15

16 class Diana : public Algorithm { 17 protected : 18 void setupArguments ( ) ; 19 void per formCluste r ing ( ) const ; 20 void f e t chRe su l t s ( ) const ; 21 virtual void create dm () const ; 22 virtual void i n i t i a l i z a t i o n ( ) const ; 23 virtual void d i v i s i o n ( ) const ; 24 virtual void d o s p l i t ( S i z e ind ) const ; 25 virtual void c r e a t e c l u s t e r ( const std : : set<Size> e l e , 26 S i z e ind ) const ; 27

28 mutable iirMapA dm ; 29 mutable std : : set<Size> un sp l i tC l u s t e r s ; 30 mutable std : : map<Size , Real> c l u s t e rD iame t e r ; 31 mutable std : : map<Size , boost : : shared ptr<LeafNode> > l e a f ; 32 mutable std : : map<Size , boost : : shared ptr<InternalNode> > 33 i n t e r n a l ; 34 mutable std : : set<Size> c l u s t e r ID ; 35 boost : : shared ptr<Distance> d i s t anc e ; 36 } ; 37 } 38

Listing B.21: The source file of class Diana. 1 // c l / a lgor i thms/diana . cpp 2 #include<c l / a lgor i thms / diana . hpp> 3 #include<c l / pa t t e rn s/ p c v i s i t o r . hpp> 4 #include<c l / types . hpp> 5 #include<iostream> 6 #include<boost / p o i n t e r c a s t . hpp> 7

8 namespace ClusLib { 9

10 void Diana : : setupArguments ( ) { 11 Algorithm : : setupArguments ( ) ; 12

13 d i s t anc e = arguments . d i s t anc e ; 14 ASSERT( d i s t anc e , ” d i s t anc e i s n u l l ” ) ; 15 } 16

17 void Diana : : create dm () const { 18 S i z e n = ds−>s i z e ( ) ; 19 for ( S i z e i =0; i<n−1;++i ){ 20 for ( S i z e j=i +1; j<n;++j ){ 21 dm . add item ( i , j , 22 (∗ d i s t an c e ) ( (∗ ds ) [ i ] , (∗ ds ) [ j ] ) ) ; 23 } 24 } 25 } 26

27 void Diana : : pe r formCluste r ing ( ) const {

30 d i v i s i o n ( ) ; 31 } 32

33 void Diana : : i n i t i a l i z a t i o n ( ) const { 34 S i z e n = ds−>s i z e ( ) ; 35 S i z e id = 2∗n−2; 36 boost : : shared ptr<InternalNode> pin (new Inte rna lNode( id ) ) ; 37 for ( S i z e s=0; s<n;++s ){ 38 boost : : shared ptr<LeafNode> pln (new 39 LeafNode ((∗ ds ) [ s ] , s ) ) ; 40 pln−>s e t l e v e l ( 0 ) ; 41 pin−>add ( pln ) ; 42 l e a f . i n s e r t ( std : : pair<Size , 43 boost : : shared ptr<LeafNode> >(s , p ln ) ) ; 44 } 45 i n t e r n a l . i n s e r t ( std : : pair<Size , 46 boost : : shared ptr<InternalNode> >(id , pin ) ) ; 47 un sp l i tC l u s t e r s . i n s e r t ( id ) ; 48

49 Real dMax = MIN REAL; 50 for ( S i z e i =0; i<n−1;++i ){ 51 for ( S i z e j=i +1; j<n;++j ){ 52 i f (dMax < dm( i , j ) ) { 53 dMax = dm( i , j ) ; 54 } 55 } 56 } 57 c l u s t e rD iame t e r . i n s e r t ( std : : pair<Size , Real>(id , dMax ) ) ; 58

59 for ( S i z e s=2∗n−3; s>n−1; −−s ) { 60 c l u s t e r ID . i n s e r t ( s ) ; 61 } 62 } 63

64 void Diana : : d i v i s i o n ( ) const { 65 S i z e n = ds−>s i z e ( ) ; 66 std : : set<Size > : : i t e r a t o r i t ; 67 Real dMax; 68 S i z e ind ; 69 for ( S i z e s=2∗n−2; s>n−1; −−s ) { 70 dMax= MIN REAL; 71 std : : vector<Size> nvTemp( un s p l i tC l u s t e r s . begin ( ) , 72 un s p l i tC l u s t e r s . end ( ) ) ; 73 for ( S i z e i =0; i<nvTemp. s i z e ( ) ; ++i ) { 74 i f (dMax < c l u s t e rD iame t e r [ nvTemp[ i ] ] ) { 75 dMax = c lu s t e rD iame t e r [ nvTemp[ i ] ] ; 76 ind = nvTemp[ i ] ; 77 } 78 } 79

80 i n t e r n a l [ ind]−> s e t l e v e l ( s−n+1); 81 i n t e r n a l [ ind]−> s e t i d ( s ) ; 82 i n t e r n a l [ ind]−> s e t j o i nVa l u e (dMax ) ; 83 do s p l i t ( ind ) ; 84 } 85 } 86

87 void Diana : : d o s p l i t ( S i z e ind ) const { 88 std : : vector<boost : : shared ptr<Node> > data = 89 i n t e r n a l [ ind]−>data ( ) ; 90 S i z e n = data . s i z e ( ) ; 91

92 S i z e ra ; 93 std : : set<Size> s p l i n t e r ; 94 std : : set<Size> remaining ;

e 97 remaining . i n s e r t ( id ) ; 98 } 99

100 std : : set<Size > : : i t e r a t o r i t , i t 1 ; 101 Real dMax = MIN REAL; 102 for ( i t = remaining . begin ( ) ; 103 i t != remaining . end ( ) ; ++i t ) { 104 Real dSum = 0 . 0 ; 105 for ( i t 1 = remaining . begin ( ) ; 106 i t 1 != remaining . end ( ) ; ++i t 1 ) { 107 i f (∗ i t == ∗ i t 1 ) { 108 continue ; 109 } 110 dSum += dm(∗ i t , ∗ i t 1 ) ; 111 } 112 i f (dMax < dSum){ 113 dMax = dSum; 114 ra = ∗ i t ; 115 } 116 } 117 s p l i n t e r . i n s e r t ( ra ) ; 118 remaining . e r a s e ( ra ) ; 119

120 bool bChanged = true ; 121 while ( bChanged ) { 122 bChanged = fa l se ; 123 for ( i t = remaining . begin ( ) ; 124 i t != remaining . end ( ) ; ++i t ) { 125 Real d1 = 0 . 0 ; 126 for ( i t 1 = s p l i n t e r . begin ( ) ; 127 i t 1 != s p l i n t e r . end ( ) ; ++i t 1 ) { 128 d1 += dm(∗ i t , ∗ i t 1 ) ; 129 } 130 d1 /= sp l i n t e r . s i z e ( ) ; 131

132 Real d2 = 0 . 0 ; 133 for ( i t 1 = remaining . begin ( ) ; 134 i t 1 != remaining . end ( ) ; ++i t 1 ) { 135 i f (∗ i t == ∗ i t 1 ) { 136 continue ; 137 } 138 d2 += dm(∗ i t , ∗ i t 1 ) ; 139 } 140 i f ( remaining . s i z e ( ) > 1) { 141 d2 /= ( remaining . s i z e ( ) −1 .0) ; 142 } 143

144 i f ( d1 < d2 ) { 145 bChanged = true ; 146 s p l i n t e r . i n s e r t (∗ i t ) ; 147 remaining . e r a s e ( i t ) ; 148 break ; 149 } 150 } 151 } 152

153 un sp l i tC l u s t e r s . e r a s e ( ind ) ; 154 i n t e r n a l [ ind]−> c l e a r ( ) ; 155 c r e a t e c l u s t e r ( s p l i n t e r , ind ) ; 156 c r e a t e c l u s t e r ( remaining , ind ) ; 157 } 158

159 void Diana : : c r e a t e c l u s t e r ( const std : : set<Size> e l e , 160 S i z e ind ) const { 161 std : : set<Size > : : i t e r a t o r i t ;

164 boost : : shared ptr<InternalNode> pin (new 165 Inte rna lNode (0 , i n t e r n a l [ ind ] ) ) ; 166 i n t e r n a l [ ind]−>add ( pin ) ; 167 for ( i t = e l e . begin ( ) ; i t != e l e . end ( ) ; ++i t ) { 168 pin−>add ( l e a f [∗ i t ] ) ; 169 } 170

171 i t = c l u s t e r ID . end ( ) ; 172 −− i t ; 173 S i z e id = ∗ i t ; 174 c l u s t e r ID . e ra s e ( i t ) ; 175

176 i n t e r n a l . i n s e r t ( std : : pair<Size , 177 boost : : shared ptr<InternalNode> >(id , pin ) ) ; 178 un s p l i tC l u s t e r s . i n s e r t ( id ) ; 179

180 dMax = MIN REAL; 181 std : : vector<Size> nvTemp( e l e . begin ( ) , e l e . end ( ) ) ; 182 for ( S i z e i =0; i<nvTemp. s i z e ( ) ; ++i ) { 183 for ( S i z e j=i +1; j<nvTemp. s i z e ( ) ; ++j ) { 184 i f (dMax < dm(nvTemp[ i ] , nvTemp[ j ] ) ) { 185 dMax = dm(nvTemp[ i ] , nvTemp[ j ] ) ; 186 } 187 } 188 } 189 c l u s t e rD iame t e r . i n s e r t ( 190 std : : pair<Size , Real>(id , dMax ) ) ; 191 } else { 192 i t = e l e . begin ( ) ; 193 i n t e r n a l [ ind]−>add ( l e a f [∗ i t ] ) ; 194 } 195 } 196

197 void Diana : : f e t chRe su l t s ( ) const { 198 S i z e n = ds−>s i z e ( ) ; 199 HCluster ing hc ( i n t e r n a l [2∗n−2 ] ) ; 200 r e s u l t s . i n s e r t ( ”hc” , hc ) ; 201 } 202 }

Listing B.22: The header file of class FSC. 1 // c l / a lgor i thms/ f sc . hpp 2 #ifndef CLUSLIB FSC HPP 3 #define CLUSLIB FSC HPP 4

5 #include<c l / a lgor i thms / a lgor i thm . hpp> 6 #include<c l / da ta s e t s / datase t . hpp> 7 #include<c l / c l u s t e r s / sub spac e c l u s t e r . hpp> 8

9 namespace ClusLib { 10

11 class FSC: public Algorithm { 12 protected : 13 void setupArguments ( ) ; 14 void per formCluste r ing ( ) const ; 15 void f e t chRe su l t s ( ) const ; 16 virtual void i n i t i a l i z a t i o n ( ) const ; 17 virtual void i t e r a t i o n ( ) const ; ;

j ( ; 21 Real d i s t ( const boost : : shared ptr<Record> &x , 22 const boost : : shared ptr<SubspaceCluster> &c 23 ) const ; 24

25 mutable std : : vector<boost : : shared ptr<SubspaceCluster> > 26 c l u s t e r s ; 27 mutable std : : vector<Size> CM; 28 mutable Real dObj ; 29 mutable S i z e numiter ; 30

31 S i z e s e ed ; 32 Real a lpha ; 33 Real e p s i l o n ; 34 Real th r e sho ld ; 35 S i z e numclust ; 36 S i z e maxi te r ; 37 } ; 38 } 39

Listing B.23: The source file of class FSC. 1 // c l / a lgor i thms/ f sc . cpp 2 #include<c l / a lgor i thms / f s c . hpp> 3 #include<c l / c l u s t e r s / p c l u s t e r i ng . hpp> 4 #include<c l / e r r o r s . hpp> 5 #include<iostream> 6 #include<boost /random . hpp> 7

8 namespace ClusLib { 9

10 void FSC : : pe r formCluste r ing ( ) const { 11 i n i t i a l i z a t i o n ( ) ; 12 i t e r a t i o n ( ) ; 13 } 14

15 void FSC : : setupArguments ( ) { 16 Algorithm : : setupArguments ( ) ; 17 ASSERT( ds−>i s numer i c ( ) , ” datase t i s not numeric” ) ; 18

19 e p s i l o n = boost : : any cast<Real>( 20 arguments . ge t ( ” e p s i l o n ” ) ) ; 21 ASSERT( ep s i l on >=0, ” i n v a l i d ep s i l on ” ) ; 22

23 th r e sho ld = boost : : any cast<Real>( 24 arguments . ge t ( ” th r e sho ld” ) ) ; 25 ASSERT( th r e sho ld>EPSILON, ” i n v a l i d th r e sho ld” ) ; 26

27 a lpha = boost : : any cast<Real>( arguments . ge t ( ” alpha” ) ) ; 28 ASSERT( alpha >1, ” i n v a l i d alpha” ) ; 29

30 numclust = boost : : any cast<Size >( 31 arguments . ge t ( ”numclust” ) ) ; 32 ASSERT( numclust>=2 && numclust<= ds−>s i z e ( ) , 33 ” i nv a l i d numclust” ) ; 34

35 maxite r = boost : : any cast<Size >( 36 arguments . ge t ( ”maxiter ” ) ) ; 37 ASSERT( maxiter >0, ” i n v a l i d e maxiter” ) ; 38

39 s e ed = boost : : any cast<Size >( 40 arguments . ge t ( ” seed ” ) ) ; 41 ASSERT( seed >0, ” i n v a l i d e seed ” ) ;

44 void FSC : : f e t chRe su l t s ( ) const { 45 PCluste r ing pc ; 46 for ( S i z e i =0; i< c l u s t e r s . s i z e ();++ i ){ 47 pc . add ( c l u s t e r s [ i ] ) ; 48 } 49 r e s u l t s .CM = CM; 50 r e s u l t s . i n s e r t ( ”pc” , boost : : any ( pc ) ) ; 51 r e s u l t s . i n s e r t ( ”numiter” , boost : : any ( numiter ) ) ; 52 r e s u l t s . i n s e r t ( ”dObj” , boost : : any ( dObj ) ) ; 53 } 54

55 void FSC : : i t e r a t i o n ( ) const { 56 Real dObjPre ; 57

58 updateWeight ( ) ; 59 updateCenter ( ) ; 60 numiter = 1 ; 61 while ( true ) { 62 In t e ge r s ; 63 Real dMin , dDist ; 64 for ( S i z e i =0; i< ds−>s i z e ();++ i ) { 65 dMin = MAXREAL; 66 for ( S i z e k=0;k< c l u s t e r s . s i z e ();++k) { 67 dDist = d i s t ( (∗ ds ) [ i ] , c l u s t e r s [ k ] ) ; 68 i f (dMin > dDist ) { 69 dMin = dDist ; 70 s = k ; 71 } 72 } 73

74 i f ( CM[ i ] != s ){ 75 c l u s t e r s [ CM[ i ]]−> e ra s e ( (∗ ds ) [ i ] ) ; 76 c l u s t e r s [ s]−>add ((∗ ds ) [ i ] ) ; 77 CM[ i ] = s ; 78 } 79 } 80

81 updateWeight ( ) ; 82 updateCenter ( ) ; 83

84 dObjPre = dObj ; 85 ca l cu l a t eOb j ( ) ; 86 i f ( std : : f abs ( dObjPre − dObj ) < th r e sho ld ){ 87 break ; 88 } 89

90 ++ numiter ; 91 i f ( numiter > maxite r ){ 92 break ; 93 } 94 } 95 } 96

97 void FSC : : updateCenter ( ) const { 98 Real dTemp; 99 boost : : shared ptr<Schema> schema = ds−>schema ( ) ;

100 for ( S i z e k=0;k< c l u s t e r s . s i z e ();++k){ 101 for ( S i z e j =0; j<schema−>s i z e ();++ j ){ 102 dTemp = 0 . 0 ; 103 for ( S i z e i =0; i< c l u s t e r s [ k]−> s i z e ();++ i ){ 104 boost : : shared ptr<Record> r e c = 105 (∗ c l u s t e r s [ k ] ) [ i ] ; 106 dTemp += (∗ schema ) [ j ]−> g e t c v a l ( (∗ r e c ) [ j ] ) ; 107 } 108 (∗ schema ) [ j ]−> s e t c v a l (

e r i z e ) ; 111 } 112 } 113 } 114

115 void FSC : : updateWeight ( ) const { 116 Real dVar , dSum; 117 boost : : shared ptr<Schema> schema = ds−>schema ( ) ; 118 boost : : shared ptr<Record> c , r ; 119 std : : vector<Real> w( schema−>s i z e ( ) , 0 . 0 ) ; 120 for ( S i z e k=0;k< c l u s t e r s . s i z e ();++k){ 121 c = c l u s t e r s [ k]−>c en t e r ( ) ; 122 dSum = 0 . 0 ; 123 for ( S i z e j =0; j<schema−>s i z e ();++ j ){ 124 dVar = 0 . 0 ; 125 for ( S i z e i =0; i< c l u s t e r s [ k]−> s i z e ();++ i ){ 126 r = (∗ c l u s t e r s [ k ] ) [ i ] ; 127 dVar += std : : pow((∗ schema ) [ j ]−>d i s t anc e ( 128 (∗ r ) [ j ] , (∗ c ) [ j ] ) , 2 . 0 ) ; 129 } 130 w[ j ] = std : : pow(1/( dVar + ep s i l o n ) , 1/ ( alpha −1)) ; 131 dSum += w[ j ] ; 132 } 133 for ( S i z e j =0; j<schema−>s i z e ();++ j ) { 134 c l u s t e r s [ k]−>w( j ) = w[ j ] / dSum; 135 } 136 } 137 } 138

139 void FSC : : i n i t i a l i z a t i o n ( ) const { 140 S i z e numRecords = ds−>s i z e ( ) ; 141 std : : vector<Intege r> index ( numRecords , 0 ) ; 142 CM. r e s i z e ( numRecords ) ; 143 for ( S i z e i =0; i<index . s i z e ();++ i ){ 144 index [ i ] = i ; 145 } 146

147 boost : : minstd rand generator ( s e ed ) ; 148 for ( S i z e i =0; i< numclust;++i ){ 149 boost : : un i form int<> u n i d i s t (0 , numRecords−i −1); 150 boost : : va r i a t e g ene ra to r <boost : : minstd rand&, 151 boost : : un i form int<> > uni ( generator , u n i d i s t ) ; 152 In t e ge r r = uni ( ) ; 153 boost : : shared ptr<Record> c r = 154 boost : : shared ptr<Record>(new Record (∗ (∗ ds ) [ r ] ) ) ; 155 boost : : shared ptr<SubspaceCluster> c = 156 boost : : shared ptr<SubspaceCluster >( 157 new SubspaceCluster ( c r ) ) ; 158 c−>s e t i d ( i ) ; 159 c l u s t e r s . push back ( c ) ; 160 index . e r a s e ( index . begin ()+ r ) ; 161 } 162

163 In t e ge r s ; 164 Real dMin , dDist ; 165 for ( S i z e i =0; i<numRecords;++i ){ 166 dMin = MAXREAL; 167 for ( S i z e j =0; j< numclust;++j ){ 168 dDist = d i s t ( (∗ ds ) [ i ] , c l u s t e r s [ j ] ) ; 169 i f ( dDist<dMin){ 170 s = j ; 171 dMin = dDist ; 172 } 173 } 174 c l u s t e r s [ s]−>add ((∗ ds ) [ i ] ) ; 175 CM[ i ] = s ;

179 Real FSC : : d i s t ( const boost : : shared ptr<Record> &x , 180 const boost : : shared ptr<SubspaceCluster> &c 181 ) const { 182 Real dTemp = 0 . 0 ; 183 boost : : shared ptr<Schema> schema = ds−>schema ( ) ; 184 for ( S i z e j =0; j<schema−>s i z e ();++ j ){ 185 dTemp += std : : pow( c−>w( j ) , a lpha ) ∗ 186 std : : pow((∗ schema ) [ j ]−>d i s t anc e ((∗ x ) [ j ] , 187 (∗ c−>c en t e r ( ) ) [ j ] ) , 2 . 0 ) ; 188 } 189

193 void FSC : : c a l cu l a t eOb j ( ) const { 194 Real dTemp = 0 . 0 ; 195 for ( S i z e i =0; i< ds−>s i z e ( ) ; ++i ){ 196 dTemp += d i s t ( (∗ ds ) [ i ] , c l u s t e r s [ CM[ i ] ] ) ; 197 } 198

199 dObj = dTemp; 200 } 201 }

Listing B.24: The header file of class GKmode. 1 // c l / a lgor i thms/gkmode. hpp 2 #ifndef CLUSLIB GKMODE HPP 3 #define CLUSLIB GKMODE HPP 4

5 #include<c l / a lgor i thms / a lgor i thm . hpp> 6 #include<c l / c l u s t e r s / c e n t e r c l u s t e r . hpp> 7 #include<c l / c l u s t e r s / p c l u s t e r i ng . hpp> 8 #include<c l / da ta s e t s / datase t . hpp> 9 #include<c l / d i s t anc e s / d i s t anc e . hpp>

10 #include<c l / u t i l i t i e s /matrix . hpp> 11

12 namespace ClusLib { 13

14 class GKmode : public Algorithm { 15 private : 16 void setupArguments ( ) ; 17 void per formCluste r ing ( ) const ; 18 void f e t chRe su l t s ( ) const ; 19 void i n i t i a l i z a t i o n ( ) const ; 20 void i t e r a t i o n ( ) const ; 21 void s e l e c t i o n ( S i z e g ) const ; 22 void mutation ( S i z e g ) const ; 23 void kmode( S i z e ind ) const ; 24 void c a l c u l a t e Lo s s ( S i z e ind ) const ; 25 bool get mode ( S i z e &mode , Real &var , 26 S i z e ind , S i z e k , S i z e j ) const ; 27 Real c a l cu l a t eRat i o ( S i z e ind ) const ; 28 void generateRN ( std : : vector<Size>& nv , S i z e s ) const ; 29

31 mutable std : : vector<boost : : shared ptr<CenterCluster> > 32 c l u s t e r s ;

37 S i z e numclust ; 38 S i z e numpop ; 39 S i z e maxgen ; 40 Real c ; 41 Real cm ; 42 Real Pm; 43 } ; 44

Listing B.25: The source file of class GKmode. 1 // c l / a lgor i thms/gkmode. cpp 2 #include<c l / a lgor i thms /gkmode . hpp> 3 #include<boost /random . hpp> 4 #include<c l / types . hpp> 5 #include<set> 6 #include<map> 7

8 namespace ClusLib { 9

10 void GKmode : : setupArguments ( ) { 11 Algorithm : : setupArguments ( ) ; 12 ASSERT( ds−> i s c a t e g o r i c a l ( ) , 13 ” datase t i s not c a t e g o r i c a l ” ) ; 14

15 numclust = boost : : any cast<Size >( 16 arguments . ge t ( ”numclust” ) ) ; 17 ASSERT( numclust>=2 && numclust<= ds−>s i z e ( ) , 18 ” i n v a l i d numclust” ) ; 19

20 numpop = boost : : any cast<Size >( 21 arguments . ge t ( ”numpop” ) ) ; 22 ASSERT( numpop>0, ” i nv a l i d e numpop” ) ; 23

24 maxgen = boost : : any cast<Size >( 25 arguments . ge t ( ”maxgen” ) ) ; 26 ASSERT( maxgen>0, ” i nv a l i d e maxgen” ) ; 27

28 c = boost : : any cast<Real>( arguments . ge t ( ”c” ) ) ; 29 ASSERT( c>0 && c <3, ” c must be in range (0 , 3) ” ) ; 30

31 cm = boost : : any cast<Real>( arguments . ge t ( ”cm” ) ) ; 32 ASSERT( cm>0, ”cm must be p o s i t i v e ” ) ; 33

34 Pm = boost : : any cast<Real>( arguments . ge t ( ”pm” ) ) ; 35 ASSERT( Pm>0, ”pm must be p o s i t i v e ” ) ; 36 } 37

38 void GKmode : : per formCluste r ing ( ) const { 39 i n i t i a l i z a t i o n ( ) ; 40 i t e r a t i o n ( ) ; 41 } 42

43 void GKmode : : f e t chRe su l t s ( ) const { 44 Real dMin = MAXREAL; 45 S i z e s ; 46 for ( S i z e i =0; i< numpop ; ++i ) { 47 i f (dMin > dvFit [ i ] ) { 48 dMin = dvFit [ i ] ; 49 s = i ;

53 std : : vector<Size> CM( ds−>s i z e ( ) ) ; 54 for ( S i z e i =0; i< ds−>s i z e ( ) ; ++i ){ 55 CM[ i ] = mP( s , i ) ; 56 } 57 r e s u l t s .CM = CM; 58

59 Real dVar ; 60 S i z e mode ; 61 PCluste r ing pc ; 62 boost : : shared ptr<Schema> schema = ds−>schema ( ) ; 63 for ( S i z e k=0; k< numclust ; ++k) { 64 boost : : shared ptr<Record> r (new Record ( schema ) ) ; 65 for ( S i z e j =0; j< ds−>num attr ( ) ; ++j ) { 66 i f ( get mode (mode , dVar , s , k , j ) ) { 67 (∗ schema ) [ j ]−> s e t d v a l ( (∗ r ) [ j ] , mode ) ; 68 } 69 } 70 boost : : shared ptr<CenterCluster> c (new 71 CenterCluste r ( r ) ) ; 72 pc . add ( c ) ; 73 } 74

75 for ( S i z e i =0; i< ds−>s i z e ( ) ; ++i ){ 76 pc [CM[ i ]]−>add ( (∗ ds ) [ i ] ) ; 77 } 78

79 r e s u l t s . i n s e r t ( ”pc” , boost : : any ( pc ) ) ; 80 } 81

82 void GKmode : : i n i t i a l i z a t i o n ( ) const { 83 mP. r e s i z e ( numpop , ds−>s i z e ( ) ) ; 84 for ( S i z e i =0; i< numpop ; ++i ) { 85 for ( S i z e j =0; j< ds−>s i z e ( ) ; ++j ) { 86 mP( i , j ) = Null<Size >() ; 87 } 88 } 89

90 std : : vector<Size> nvTemp( numclust , 0 ) ; 91 boost : : minstd rand generator (42u ) ; 92 boost : : un i form int<> un i d i s t (0 , numclust −1); 93 boost : : va r i a t e g ene ra to r <boost : : minstd rand&, 94 boost : : un i form int<> > uni ( generator , u n i d i s t ) ; 95 for ( S i z e i =0; i< numpop;++i ){ 96 generateRN (nvTemp, i ) ; 97 for ( S i z e j =0; j< numclust ; ++j ) { 98 mP( i , nvTemp[ j ] ) = j ; 99 }

100 for ( S i z e j =0; j< ds−>s i z e ();++ j ) { 101 i f ( mP( i , j ) == Null<Size >()) { 102 mP( i , j ) = uni ( ) ; 103 } 104 } 105 } 106

107 dvFit . r e s i z e ( numpop , 0 . 0 ) ; 108 bvLegal . r e s i z e ( numpop , true ) ; 109 } 110

111 void GKmode : : i t e r a t i o n ( ) const { 112 S i z e g = 0 ; 113 while ( g < maxgen ) { 114 ++g ; 115 Real Lmax = MIN REAL; 116 for ( S i z e i =0; i< numpop ; ++i ) {

[ i { 119 Lmax = dvFit [ i ] ; 120 } 121 } 122

123 Real dTemp = 0 . 0 ; 124 for ( S i z e i =0; i< numpop ; ++i ) { 125 i f ( bvLegal [ i ] ) { 126 dvFit [ i ] = c ∗Lmax − dvFit [ i ] ; 127 } else { 128 dvFit [ i ] = ca l cu l a t eRat i o ( i )∗ ( c−1)∗Lmax ; 129 } 130 dTemp += dvFit [ i ] ; 131 } 132

133 for ( S i z e i =0; i< numpop ; ++i ) { 134 dvFi t [ i ] /= dTemp; 135 } 136

137 s e l e c t i o n ( g ) ; 138 mutation ( g ) ; 139

140 for ( S i z e i =0; i< numpop ; ++i ) { 141 kmode( i ) ; 142 } 143 } 144

145 for ( S i z e i =0; i< numpop ; ++i ) { 146 c a l c u l a t e Los s ( i ) ; 147 } 148 } 149

150 void GKmode : : s e l e c t i o n ( S i z e g ) const { 151 boost : : minstd rand generator ( 152 static cast<unsigned int >(g+1)); 153 boost : : un i f o rm rea l<> un i d i s t (0 , 1 ) ; 154 boost : : va r i a t e g ene ra to r <boost : : minstd rand&, 155 boost : : un i f o rm rea l<> > uni ( generator , u n i d i s t ) ; 156 Real dRand , dTemp; 157 S i z e s ; 158 ublas : : matrix<Size> mP = mP; 159 for ( S i z e i =0; i< numpop;++i ){ 160 dTemp = dvFit [ 0 ] ; 161 dRand = uni ( ) ; 162 s = 0 ; 163 while (dTemp < dRand) { 164 dTemp += dvFit[++s ] ; 165 } 166 for ( S i z e j =0; j< ds−>s i z e ( ) ; ++j ) { 167 mP( i , j ) = mP( s , j ) ; 168 } 169 } 170 } 171

172 void GKmode : : mutation ( S i z e g ) const { 173 boost : : minstd rand generator ( 174 static cast<unsigned int >(g+1)); 175 boost : : un i f o rm rea l<> un i d i s t (0 , 1 ) ; 176 boost : : va r i a t e g ene ra to r <boost : : minstd rand&, 177 boost : : un i f o rm rea l<> > uni ( generator , u n i d i s t ) ; 178 Real dRand , dMax, dVar , dTemp; 179 S i z e mode ; 180 bool i s L e g a l ; 181 std : : map<Size , Size> mTemp; 182 std : : vector<Real> dvProb( numclust , 0 . 0 ) ; 183 for ( S i z e i =0; i< numpop ; ++i ) {

) { 186 dRand = uni ( ) ; 187 i f (dRand >= Pm ) { 188 continue ; 189 } 190

191 dMax = MIN REAL; 192 for ( S i z e k=0; k< numclust ; ++k) { 193 dvProb [ k ] = 0 . 0 ; 194 for ( S i z e d=0; d< ds−>num attr ( ) ; ++d) { 195 i s L e g a l = get mode (mode , dVar , i , k , d ) ; 196 i f ( ! i s L e g a l ) { 197 break ; 198 } 199 dvProb [ k ] += dVar ; 200 } 201

202 i f ( dvProb [ k ] >dMax) { 203 dMax = dvProb [ k ] ; 204 } 205 } 206

207 dTemp = 0 . 0 ; 208 for ( S i z e k=0; k< numclust ; ++k) { 209 dvProb [ k ] = cm∗dMax − dvProb [ k ] ; 210 dTemp += dvProb [ k ] ; 211 } 212 for ( S i z e k=0; k< numclust ; ++k) { 213 dvProb [ k ] /= dTemp; 214 } 215

216 dRand = uni ( ) ; 217 dTemp = dvProb [ 0 ] ; 218 S i z e k = 0 ; 219 while ( dTemp < dRand) { 220 dTemp += dvProb[++k ] ; 221 } 222 mTemp. i n s e r t ( std : : pair<Size , Size >(j , k ) ) ; 223 } 224

225 std : : map<Size , Size > : : i t e r a t o r i t ; 226 for ( i t = mTemp. begin ( ) ; i t !=mTemp. end ( ) ; ++i t ) { 227 mP( i , i t−> f i r s t ) = i t−>second ; 228 } 229 } 230

233 void GKmode : : kmode( S i z e ind ) const { 234 ublas : : matrix<Size> 235 nmMode( numclust , ds−>num attr ( ) ) ; 236

237 bool i s L e g a l ; 238 Real dVar ; 239 for ( S i z e k=0; k< numclust ; ++k) { 240 for ( S i z e j =0; j< ds−>num attr ( ) ; ++j ) { 241 i s L e g a l = get mode (nmMode(k , j ) , dVar , ind , k , j ) ; 242 i f ( ! i s L e g a l ) { 243 return ; 244 } 245 } 246 } 247

248 Real dDist ; 249 Real dMin ; 250 S i z e h ;

e ( { 253 dMin = MAXREAL; 254 for ( S i z e k=0; k< numclust ; ++k) { 255 dDist = 0 . 0 ; 256 for ( S i z e j =0; j< ds−>num attr ( ) ; ++j ) { 257 i f (nmMode(k , j ) != (∗ schema ) [ j ]−>g e t d va l ( 258 (∗ (∗ ds ) [ i ] ) [ j ] ) ) { 259 dDist += 1 ; 260 } 261 } 262 i f (dMin > dDist ) { 263 dMin = dDist ; 264 h = k ; 265 } 266 } 267 mP( ind , i ) = h ; 268 } 269 } 270

271 void GKmode : : c a l c u l a t e Lo s s ( S i z e ind ) const { 272 Real dLoss = 0 . 0 ; 273 S i z e mode ; 274 Real dVar ; 275 bool i s L e g a l ; 276 for ( S i z e k=0; k< numclust ; ++k){ 277 for ( S i z e j =0; j< ds−>num attr ( ) ; ++j ) { 278 i s L e g a l = get mode (mode , dVar , ind , k , j ) ; 279 dLoss += dVar ; 280 i f ( ! i s L e g a l ) { 281 break ; 282 } 283 } 284 } 285

286 bvLegal [ ind ] = i sL e g a l ; 287 i f ( i s L e g a l ) { 288 dvFit [ ind ] = dLoss ; 289 } else { 290 dvFit [ ind ] = MAXREAL; 291 } 292 } 293

294 bool GKmode : : get mode ( S i z e &mode , Real &var , 295 S i z e ind , S i z e k , S i z e j ) const { 296 boost : : shared ptr<Schema> schema = ds−>schema ( ) ; 297 S i z e nValues = (∗ schema ) [ j ]−> c a s t t o d ( ) . num values ( ) ; 298 std : : vector<Size> nvFreq ( nValues , 0 ) ; 299

300 S i z e va l ; 301 S i z e nCount = 0 ; 302 for ( S i z e i =0; i< ds−>s i z e ( ) ; ++i ) { 303 i f ( mP( ind , i ) == k) { 304 va l = (∗ schema ) [ j ]−>g e t d va l ( (∗ ds ) ( i , j ) ) ; 305 ++nvFreq [ va l ] ; 306 ++nCount ; 307 } 308 } 309 va l = 0 ; 310 for ( S i z e i =0; i<nValues ; ++i ){ 311 i f ( va l < nvFreq [ i ] ) { 312 va l = nvFreq [ i ] ; 313 mode = i ; 314 } 315 } 316

317 var = nCount − nvFreq [mode ] ;

320 return fa l se ; 321 } else { 322 return true ; 323 } 324 } 325

326 Real GKmode : : c a l cu l a t eRat i o ( S i z e ind ) const { 327 std : : set<Size> va l s ; 328 for ( S i z e i =0; i< ds−>s i z e ( ) ; ++i ) { 329 va l s . i n s e r t ( mP( ind , i ) ) ; 330 } 331

332 return ( Real ) va l s . s i z e ( ) / numclust ; 333 } 334

335 void GKmode : : generateRN ( std : : vector<Size>& nv , 336 S i z e s ) const { 337 boost : : minstd rand generator ( 338 static cast<unsigned int >(s +1)); 339 for ( S i z e i =0; i< numclust;++i ){ 340 boost : : un i form int<> u n i d i s t (0 , ds−>s i z e ()− i −1); 341 boost : : va r i a t e g ene ra to r <boost : : minstd rand&, 342 boost : : un i form int<> > uni ( generator , u n i d i s t ) ; 343 nv [ i ] = uni ( ) ; 344 } 345 } 346 }

Listing B.26: The header file of class GMC. 1 // c l / a lgor i thms/gmc. hpp 2 #ifndef CLUSLIB GMC HPP 3 #define CLUSLIB GMC HPP 4

5 #include<c l / a lgor i thms / a lgor i thm . hpp> 6 #include<c l / c l u s t e r s / c e n t e r c l u s t e r . hpp> 7 #include<c l / da ta s e t s / datase t . hpp> 8 #include<c l / d i s t anc e s / d i s t anc e . hpp> 9 #include<c l / u t i l i t i e s /matrix . hpp>

11 namespace ClusLib { 12

13 class GMC: public Algorithm { 14 protected : 15 void setupArguments ( ) ; 16 void per formCluste r ing ( ) const ; 17 void f e t chRe su l t s ( ) const ; 18 virtual void i n i t i a l i z a t i o n ( ) const ; 19 virtual void i t e r a t i o n ( ) const ; 20 virtual void e step ( ) const ; 21 virtual void mstep ( ) const ; 22

23 mutable ublas : : matrix<Real> mu ; 24 mutable std : : vector<ublas : : symmetric matrix<Real> > 25 s igma ; 26 mutable std : : vector<Real> p ; 27 mutable ublas : : matrix<Real> data ; 28 mutable Real l l ; 29 mutable ublas : : matrix<Real> pos t ; >

33 mutable bool converged ; 34 mutable S i z e numiter ; 35

36 S i z e s e ed ; 37 S i z e numclust ; 38 S i z e maxi te r ; 39 Real th r e sho ld ; 40 Real e p s i l o n ; 41 } ; 42 } 43

Listing B.27: The source file of class GMC. 1 // c l / a lgor i thms/gmc. cpp 2 #include<c l / a lgor i thms /gmc . hpp> 3 #include<c l / c l u s t e r s / p c l u s t e r i ng . hpp> 4 #include<c l / d i s t anc e s /mahalanob i sd i s tance . hpp> 5 #include<boost /random . hpp> 6 #include<cmath> 7

8 namespace ClusLib { 9

10 void GMC: : setupArguments ( ) { 11 Algorithm : : setupArguments ( ) ; 12 ASSERT( ds−>i s numer i c ( ) , ” datase t i s not numeric” ) ; 13

14 s e ed = boost : : any cast<Size >( 15 arguments . ge t ( ” seed ” ) ) ; 16 ASSERT( seed >0, ” seed must be a p o s i t i v e number” ) ; 17

18 numclust = boost : : any cast<Size >( 19 arguments . ge t ( ”numclust” ) ) ; 20 ASSERT( numclust>=2 && numclust<= ds−>s i z e ( ) , 21 ” i nv a l i d numclust” ) ; 22

23 maxite r = boost : : any cast<Size >( 24 arguments . ge t ( ”maxiter ” ) ) ; 25 ASSERT( maxiter >0, ” i n v a l i d e maxiter” ) ; 26

27 th r e sho ld = boost : : any cast<Real>( 28 arguments . ge t ( ” th r e sho ld” ) ) ; 29 ASSERT( th r e sho ld >0, ” th r e sho ld must be p o s i t i v e ” ) ; 30

31 e p s i l o n = boost : : any cast<Real>( 32 arguments . ge t ( ” e p s i l o n ” ) ) ; 33 ASSERT( ep s i l on >=0, ” ep s i l o n must be nonnegative ” ) ; 34 } 35

36 void GMC: : per formCluste r ing ( ) const { 37 i n i t i a l i z a t i o n ( ) ; 38 i t e r a t i o n ( ) ; 39 } 40

41 void GMC: : f e t chRe su l t s ( ) const { 42 for ( S i z e i =0; i< ds−>s i z e ( ) ; ++i ) { 43 Real dMax = MIN REAL; 44 In t e ge r k ; 45 for ( S i z e j =0; j< numclust ; ++j ) { 46 i f (dMax < pos t ( i , j ) ){ 47 dMax = pos t ( i , j ) ; 48 k = j ; 49 }

52 c l u s t e r s [ k]−>add ((∗ ds ) [ i ] ) ; 53 } 54

55 PCluste r ing pc ; 56 for ( S i z e i =0; i< c l u s t e r s . s i z e ();++ i ){ 57 pc . add ( c l u s t e r s [ i ] ) ; 58 } 59 r e s u l t s . i n s e r t ( ”pc” , boost : : any ( pc ) ) ; 60 r e s u l t s .CM = CM; 61 r e s u l t s . i n s e r t ( ” converged” , boost : : any ( converged ) ) ; 62 r e s u l t s . i n s e r t ( ”p” , boost : : any ( p ) ) ; 63 r e s u l t s . i n s e r t ( ”numiter” , boost : : any ( numiter ) ) ; 64 r e s u l t s . i n s e r t ( ” l l ” , boost : : any ( l l ) ) ; 65 r e s u l t s . i n s e r t ( ”mu” , boost : : any ( mu ) ) ; 66 } 67

68 void GMC: : i t e r a t i o n ( ) const { 69 Real p r e l l = MIN REAL; 70 converged = fa l se ; 71 numiter = 0 ; 72 for ( S i z e i t e r =0; i t e r < maxite r ; ++i t e r ) { 73 e step ( ) ; 74

75 Real dTemp = l l − p r e l l ; 76 i f ( dTemp >=0 && dTemp < th r e sho ld ) { 77 converged = true ; 78 break ; 79 } 80 p r e l l = l l ; 81

82 mstep ( ) ; 83 ++ numiter ; 84 } 85 } 86

87 void GMC: : e step ( ) const { 88 S i z e numRecords = ds−>s i z e ( ) ; 89 S i z e numAttr = ds−>num attr ( ) ; 90

91 ublas : : matrix<Real> l o g l l ( numRecords , numclust ) ; 92 ub las : : t r i angu l a r mat r i x<Real> L(numAttr , numAttr ) ; 93

94 for ( S i z e j =0; j< numclust ; ++j ) { 95 S i z e k = chol ( s igma [ j ] , L ) ; 96 ASSERT(k==0, ” i n v a l i d covar iance matrix ” << j ) ; 97

98 Real logDet = 0 . 0 ; 99 for ( S i z e i =0; i<numAttr ; ++i ) {

100 logDet += 2 .0 ∗ std : : l og (L( i , i ) ) ; 101 } 102

103 MahalanobisDistance md( sigma [ j ] ) ; 104 for ( S i z e i =0; i<numRecords ; ++i ) { 105 Real d i s t = md((∗ ds ) [ i ] , c l u s t e r s [ j ]−>c en t e r ( ) ) ; 106 l o g l l ( i , j ) = 107 − 0 . 5 ∗ d i s t ∗ d i s t 108 − 0 . 5 ∗ logDet + std : : l og ( p [ j ] ) 109 − 0 . 5 ∗ numAttr ∗ std : : l og (2∗3 .141592653589) ; 110 } 111 } 112

113 ublas : : vector<Real> mlog l l ( numRecords ) ; 114 ublas : : vector<Real> ones ( numclust ) ; 115 for ( S i z e i =0; i< numclust ; ++i ) { 116 ones ( i ) = 1 . 0 / numclust ;

119 ublas : : axpy prod ( l o g l l , ones , m log l l , true ) ; 120 Real temp = 0 . 0 ; 121 Real d en s i t y ; 122 l l = 0 . 0 ; 123 for ( S i z e i =0; i<numRecords ; ++i ) { 124 den s i t y = 0 . 0 ; 125 for ( S i z e j =0; j< numclust ; ++j ) { 126 pos t ( i , j ) = std : : exp ( l o g l l ( i , j )−mlog l l ( i ) ) ; 127 den s i t y += pos t ( i , j ) ; 128 } 129 for ( S i z e j =0; j< numclust ; ++j ) { 130 pos t ( i , j ) /= den s i t y ; 131 } 132 l l += std : : l og ( d en s i t y ) + m l o g l l ( i ) ; 133 } 134 } 135

136 void GMC: : mstep ( ) const { 137 boost : : shared ptr<Schema> schema = ds−>schema ( ) ; 138 S i z e numRecords = ds−>s i z e ( ) ; 139 S i z e numAttr = ds−>num attr ( ) ; 140 Real psum = 0 . 0 ; 141 for ( S i z e j =0; j< numclust ; ++j ) { 142 p [ j ] = ublas : : sum( ub las : : column ( post , j ) ) ; 143 psum += p [ j ] ; 144 } 145

146 ublas : : matrix<Real> cente red ( numRecords , numAttr ) ; 147 for ( S i z e k=0; k< numclust ; ++k) { 148 for ( S i z e j =0; j<numAttr ; ++j ) { 149 mu(k , j ) = ublas : : inner prod ( 150 ublas : : column ( post , k ) , 151 ublas : : column ( data , j ) ) / p [ k ] ; 152 (∗ schema ) [ j ]−> s e t c v a l ( 153 (∗ c l u s t e r s [ k]−>c en t e r ( ) ) [ j ] , mu(k , j ) ) ; 154 } 155

156 for ( S i z e i =0; i<numRecords ; ++i ) { 157 for ( S i z e j =0; j<numAttr ; ++j ) { 158 cente red ( i , j ) = std : : sq r t ( po s t ( i , k ) ) ∗ 159 ( data ( i , j ) − mu(k , j ) ) ; 160 } 161 } 162 for ( S i z e i =0; i<numAttr ; ++i ) { 163 for ( S i z e j =0; j<=i ; ++j ) { 164 s igma [ k ] ( i , j ) = 165 ublas : : inner prod ( 166 ub las : : column ( centered , i ) , 167 ub las : : column ( centered , j ) ) / p [ k ] ; 168 } 169 s igma [ k ] ( i , i ) += ep s i l o n ; 170 } 171 } 172

173 for ( S i z e j =0; j< numclust ; ++j ) { 174 p [ j ] /= psum ; 175 } 176 } 177

178 void GMC: : i n i t i a l i z a t i o n ( ) const { 179 S i z e numRecords = ds−>s i z e ( ) ; 180 S i z e numAttr = ds−>num attr ( ) ; 181

182 mu . r e s i z e ( numclust , numAttr ) ; 183 s igma . r e s i z e ( numclust ) ;

; 186 sigma [ i ] . r e s i z e ( numAttr ) ; 187 p [ i ] = 1 . 0 / numclust ; 188 } 189 data . r e s i z e ( numRecords , numAttr ) ; 190 pos t . r e s i z e ( numRecords , numclust ) ; 191

192 std : : vector<Real> mean(numAttr , 0 . 0 ) ; 193 boost : : shared ptr<Schema> schema = ds−>schema ( ) ; 194 for ( S i z e i =0; i<numRecords ; ++i ) { 195 for ( S i z e j =0; j<numAttr ; ++j ) { 196 Real va l = (∗ schema ) [ j ]−>g e t c v a l ( (∗ ds ) ( i , j ) ) ; 197 data ( i , j ) = val ; 198 mean [ j ] += val ; 199 } 200 } 201

202 for ( S i z e j =0; j<numAttr ; ++j ) { 203 mean [ j ] /= numRecords ; 204 } 205

206 for ( S i z e i =0; i<numAttr ; ++i ) { 207 for ( S i z e j =0; j<i ; ++j ) { 208 sigma [ 0 ] ( i , j ) = 0 . 0 ; 209 } 210 sigma [ 0 ] ( i , i ) = ( ub las : : inner prod ( 211 ublas : : column ( data , i ) , 212 ublas : : column ( data , i ) ) − 213 numRecords∗mean [ i ]∗mean [ i ] ) 214 / ( numRecords − 1 . 0 ) + ep s i l o n ; 215 } 216

217 for ( S i z e i =1; i< numclust ; ++i ) { 218 s igma [ i ] = sigma [ 0 ] ; 219 } 220

221 CM. r e s i z e ( numRecords ) ; 222 std : : vector<Intege r> index ( numRecords , 0 ) ; 223 for ( S i z e i =0; i<index . s i z e ();++ i ){ 224 index [ i ] = i ; 225 } 226

227 boost : : minstd rand generator ( 228 static cast<unsigned int >( s e ed ) ) ; 229 for ( S i z e i =0; i< numclust;++i ){ 230 boost : : un i form int<> u n i d i s t (0 , numRecords−i −1); 231 boost : : va r i a t e g ene ra to r <boost : : minstd rand&, 232 boost : : un i form int<> > uni ( generator , u n i d i s t ) ; 233 In t e ge r r = uni ( ) ; 234 for ( S i z e j =0; j<numAttr ; ++j ) { 235 mu( i , j ) = data ( r , j ) ; 236 } 237

238 boost : : shared ptr<Record> r e c = (∗ ds ) [ r ] ; 239 boost : : shared ptr<Record> c r = boost : : sha r ed p t r 240 <Record>(new Record (∗ r e c ) ) ; 241 boost : : shared ptr<CenterCluster> c = boost : : sha r ed p t r 242 <CenterCluster >(new Cente rCluste r ( c r ) ) ; 243 c−>s e t i d ( i ) ; 244 c l u s t e r s . push back ( c ) ; 245 index . e r a s e ( index . begin ()+ r ) ; 246 } 247 } 248 }

Listing B.28: The header file of class Kmean. 1 // c l / a lgor i thms/kmean. hpp 2 #ifndef CLUSLIB KMEAN HPP 3 #define CLUSLIB KMEAN HPP 4

5 #include<c l / a lgor i thms / a lgor i thm . hpp> 6 #include<c l / c l u s t e r s / c e n t e r c l u s t e r . hpp> 7 #include<c l / c l u s t e r s / p c l u s t e r i ng . hpp> 8 #include<c l / da ta s e t s / datase t . hpp> 9 #include<c l / d i s t anc e s / d i s t anc e . hpp>

10 #include<c l / types . hpp> 11

12 namespace ClusLib { 13

14 class Kmean: public Algorithm { 15 protected : 16 void setupArguments ( ) ; 17 void per formCluste r ing ( ) const ; 18 void f e t chRe su l t s ( ) const ; 19 virtual void i n i t i a l i z a t i o n ( ) const ; 20 virtual void i t e r a t i o n ( ) const ; 21 virtual void updateCenter ( ) const ; 22

23 mutable std : : vector<boost : : shared ptr<CenterCluster> > 24 c l u s t e r s ; 25 mutable std : : vector<Size> CM; 26 mutable Real e r r o r ; 27 mutable S i z e numiter ; 28

29 S i z e numclust ; 30 S i z e maxi te r ; 31 S i z e s e ed ; 32 boost : : shared ptr<Distance> d i s t anc e ; 33 } ; 34

Listing B.29: The source file of class Kmean. 1 // c l / a lgor i thms/kmean. cpp 2 #include<c l / a lgor i thms /kmean . hpp> 3 #include<c l / e r r o r s . hpp> 4 #include<iostream> 5 #include<boost /random . hpp> 6

7 namespace ClusLib { 8

9 void Kmean : : pe r formCluste r ing ( ) const { 10 i n i t i a l i z a t i o n ( ) ; 11 i t e r a t i o n ( ) ; 12 } 13

14 void Kmean : : setupArguments ( ) { 15 Algorithm : : setupArguments ( ) ; 16 ASSERT( ds−>i s numer i c ( ) , ” datase t i s not numeric” ) ; 17

18 d i s t anc e = arguments . d i s t anc e ; 19 ASSERT( d i s t anc e , ” d i s t anc e i s n u l l ” ) ; 20

23 ASSERT( numclust>=2 && numclust<= ds−>s i z e ( ) , 24 ” i nv a l i d numclust” ) ; 25

26 maxite r = boost : : any cast<Size >( 27 arguments . ge t ( ”maxiter ” ) ) ; 28 ASSERT( maxiter >0, ” i n v a l i d e maxiter” ) ; 29

30 s e ed = boost : : any cast<Size >( 31 arguments . ge t ( ” seed ” ) ) ; 32 ASSERT( seed >0, ” i n v a l i d e seed ” ) ; 33 } 34

35 void Kmean : : f e t chRe su l t s ( ) const { 36 PCluste r ing pc ; 37 for ( S i z e i =0; i< c l u s t e r s . s i z e ();++ i ){ 38 pc . add ( c l u s t e r s [ i ] ) ; 39 } 40 r e s u l t s .CM = CM; 41 r e s u l t s . i n s e r t ( ”pc” , boost : : any ( pc ) ) ; 42

43 e r r o r = 0 . 0 ; 44 for ( S i z e i =0; i< ds−>s i z e ();++ i ) { 45 e r r o r += (∗ d i s t an c e ) ( (∗ ds ) [ i ] , 46 c l u s t e r s [ CM[ i ]]−> c en t e r ( ) ) ; 47 } 48 r e s u l t s . i n s e r t ( ” e r r o r ” , boost : : any ( e r r o r ) ) ; 49 r e s u l t s . i n s e r t ( ”numiter” , boost : : any ( numiter ) ) ; 50 } 51

52 void Kmean : : i t e r a t i o n ( ) const { 53 bool bChanged = true ; 54

55 updateCenter ( ) ; 56 numiter = 1 ; 57 while ( bChanged ) { 58 bChanged = fa l se ; 59 S i z e s ; 60 Real dMin , dDist ; 61 for ( S i z e i =0; i< ds−>s i z e ();++ i ) { 62 dMin = MAXREAL; 63 for ( S i z e k=0;k< c l u s t e r s . s i z e ();++k) { 64 dDist = (∗ d i s t an c e ) ( (∗ ds ) [ i ] , 65 c l u s t e r s [ k]−>c en t e r ( ) ) ; 66 i f (dMin > dDist ) { 67 dMin = dDist ; 68 s = k ; 69 } 70 } 71

72 i f ( CM[ i ] != s ){ 73 c l u s t e r s [ CM[ i ]]−> e ra s e ( (∗ ds ) [ i ] ) ; 74 c l u s t e r s [ s]−>add ((∗ ds ) [ i ] ) ; 75 CM[ i ] = s ; 76 bChanged = true ; 77 } 78 } 79

80 updateCenter ( ) ; 81 ++ numiter ; 82 i f ( numiter > maxite r ){ 83 break ; 84 } 85 } 86 } 87

90 boost : : shared ptr<Schema> schema = ds−>schema ( ) ; 91 for ( S i z e k=0;k< c l u s t e r s . s i z e ();++k){ 92 for ( S i z e j =0; j<schema−>s i z e ();++ j ){ 93 dTemp = 0 . 0 ; 94 for ( S i z e i =0; i< c l u s t e r s [ k]−> s i z e ();++ i ){ 95 boost : : shared ptr<Record> r e c = 96 (∗ c l u s t e r s [ k ] ) [ i ] ; 97 dTemp += (∗ schema ) [ j ]−> g e t c v a l ( (∗ r e c ) [ j ] ) ; 98 } 99 (∗ schema ) [ j ]−> s e t c v a l (

100 (∗ c l u s t e r s [ k]−>c en t e r ( ) ) [ j ] , 101 dTemp/ c l u s t e r s [ k]−> s i z e ( ) ) ; 102 } 103 } 104 } 105

106 void Kmean : : i n i t i a l i z a t i o n ( ) const { 107 S i z e numRecords = ds−>s i z e ( ) ; 108 std : : vector<Size> index ( numRecords , 0 ) ; 109 CM. r e s i z e ( numRecords ) ; 110 for ( S i z e i =0; i<index . s i z e ();++ i ){ 111 index [ i ] = i ; 112 } 113

114 boost : : minstd rand generator ( s e ed ) ; 115 for ( S i z e i =0; i< numclust;++i ){ 116 boost : : un i form int<> u n i d i s t (0 , numRecords−i −1); 117 boost : : va r i a t e g ene ra to r <boost : : minstd rand&, 118 boost : : un i form int<> > uni ( generator , u n i d i s t ) ; 119 S i z e r = uni ( ) ; 120 boost : : shared ptr<Record> c r = boost : : sha r ed p t r 121 <Record>(new Record (∗ (∗ ds ) [ r ] ) ) ; 122 boost : : shared ptr<CenterCluster> c = boost : : sha r ed p t r 123 <CenterCluster >(new Cente rCluste r ( c r ) ) ; 124 c−>s e t i d ( i ) ; 125 c l u s t e r s . push back ( c ) ; 126 index . e r a s e ( index . begin ()+ r ) ; 127 } 128

129 S i z e s ; 130 Real dMin , dDist ; 131 for ( S i z e i =0; i<numRecords;++i ){ 132 dMin = MAXREAL; 133 for ( S i z e j =0; j< numclust;++j ){ 134 dDist = (∗ d i s t anc e ) ( (∗ ds ) [ i ] , 135 c l u s t e r s [ j ]−>c en t e r ( ) ) ; 136 i f ( dDist<dMin){ 137 s = j ; 138 dMin = dDist ; 139 } 140 } 141 c l u s t e r s [ s]−>add ((∗ ds ) [ i ] ) ; 142 CM[ i ] = s ; 143 } 144 } 145 }

Listing B.30: The header file of class Kprototype. 1 // c l / a lgor i thms/kprototype . hpp 2 #ifndef CLUSLIB KPROTOTYPE HPP 3 #define CLUSLIB KPROTOTYPE HPP 4

5 #include<c l / a lgor i thms /kmean . hpp> 6

7 namespace ClusLib { 8

9 class Kprototype : public Kmean { 10 private : 11 void setupArguments ( ) ; 12 void updateCenter ( ) const ; 13 } ; 14

Listing B.31: The source file of class Kprototype. 1 // c l / a lgor i thms/kprototype . cpp 2 #include<c l / a lgor i thms / kprototype . hpp> 3 #include<c l / e r r o r s . hpp> 4 #include<iostream> 5 #include<map> 6

7 namespace ClusLib { 8

9 void Kprototype : : setupArguments ( ) { 10 Algorithm : : setupArguments ( ) ; 11

12 d i s t anc e = arguments . d i s t anc e ; 13 ASSERT( d i s t anc e , ” d i s t anc e i s n u l l ” ) ; 14

15 numclust = boost : : any cast<Size >( 16 arguments . ge t ( ”numclust” ) ) ; 17 ASSERT( numclust>=2 && numclust<= ds−>s i z e ( ) , 18 ” i nv a l i d numclust” ) ; 19

20 maxite r = boost : : any cast<Size >( 21 arguments . ge t ( ”maxiter ” ) ) ; 22 ASSERT( maxiter >0, ” i n v a l i d e maxiter” ) ; 23

24 s e ed = boost : : any cast<Size >( 25 arguments . ge t ( ” seed ” ) ) ; 26 ASSERT( seed >0, ” i n v a l i d e seed ” ) ; 27 } 28

29 void Kprototype : : updateCenter ( ) const { 30 Real dTemp; 31 boost : : shared ptr<Schema> schema = ds−>schema ( ) ; 32 for ( S i z e k=0;k< c l u s t e r s . s i z e ();++k){ 33 for ( S i z e j =0; j<schema−>s i z e ();++ j ){ 34 i f ( (∗ schema ) [ j ]−> c an c a s t t o c ( ) ) { 35 dTemp = 0 . 0 ; 36 for ( S i z e i =0; i< c l u s t e r s [ k]−> s i z e ();++ i ){ 37 boost : : shared ptr<Record> r e c = 38 (∗ c l u s t e r s [ k ] ) [ i ] ; 39 dTemp+=(∗schema ) [ j ]−>g e t c v a l ( (∗ r e c ) [ j ] ) ; 40 }

e r en t ( ) ) [ j ] , 43 dTemp/ c l u s t e r s [ k]−> s i z e ( ) ) ; 44 } else { 45 DAttrInfo da = (∗ schema ) [ j ]−>c a s t t o d ( ) ; 46 std : : map<Size , Size> f r e q ; 47 for ( S i z e i =0; i<da . num values ( ) ; ++i ){ 48 f r e q . i n s e r t ( 49 std : : pair<Size , Size >(i , 0 ) ) ; 50 } 51

52 for ( S i z e i =0; i< c l u s t e r s [ k]−> s i z e ();++ i ){ 53 boost : : shared ptr<Record> r e c = 54 (∗ c l u s t e r s [ k ] ) [ i ] ; 55 f r e q [ (∗ schema ) [ j ]−>g e t d va l ( (∗ r e c ) [ j ] ) ] 56 += 1; 57 } 58

59 S i z e nMax = 0 ; 60 S i z e s = 0 ; 61 for ( S i z e i =0; i<da . num values ( ) ; ++i ){ 62 i f (nMax < f r e q [ i ] ) { 63 nMax = f r e q [ i ] ; 64 s = i ; 65 } 66 } 67 da . s e t d v a l ( (∗ c l u s t e r s [ k]−>c en t e r ( ) ) [ j ] , s ) ; 68 } 69 } 70 } 71 } 72 }

Listing B.32: The header file of class LW. 1 // c l / a lgor i thms/lw . hpp 2 #ifndef CLUSLIB LW HPP 3 #define CLUSLIB LW HPP 4

5 #include<c l / a lgor i thms / a lgor i thm . hpp> 6 #include<c l / c l u s t e r s / h c l u s t e r i ng . hpp> 7 #include<c l / u t i l i t i e s /nnmap . hpp> 8 #include<c l / da ta s e t s / datase t . hpp> 9 #include<c l / d i s t anc e s / d i s t anc e . hpp>

10 #include<c l / types . hpp> 11 #include<set> 12

13 namespace ClusLib { 14

15 class LW: public Algorithm { 16 protected : 17 typedef std : : map<Size , boost : : shared ptr<HCluster ing> > 18 Forest ; 19 typedef std : : map<Size , Size> SizeMap ; 20

21 void setupArguments ( ) ; 22 void per formCluste r ing ( ) const ; 23 void f e t chRe su l t s ( ) const ; 24 virtual void create dm () const ; 25 virtual void i n i t f o r e s t ( ) const ; 26 virtual void l i nkage ( ) const ; i z e q , S i z e r )

30 mutable iirMapA dm ; 31 mutable std : : set<Size> unmergedClusters ; 32 mutable Forest f o r e s t ; 33 mutable SizeMap c l u s t e r S i z e ; 34 boost : : shared ptr<Distance> d i s t anc e ; 35 } ; 36 } 37

Listing B.33: The source file of class LW. 1 // c l / a lgor i thms/lw . cpp 2 #include<c l / a lgor i thms / lw . hpp> 3 #include<c l / types . hpp> 4 #include<iostream> 5

6 namespace ClusLib { 7

8 void LW: : setupArguments ( ) { 9 Algorithm : : setupArguments ( ) ;

11 d i s t anc e = arguments . d i s t anc e ; 12 ASSERT( d i s t anc e , ” d i s t anc e i s n u l l ” ) ; 13 } 14

15 void LW: : create dm () const { 16 S i z e n = ds−>s i z e ( ) ; 17 for ( S i z e i =0; i<n−1;++i ){ 18 for ( S i z e j=i +1; j<n;++j ){ 19 dm . add item ( i , j , 20 (∗ d i s t an c e ) ( (∗ ds ) [ i ] , (∗ ds ) [ j ] ) ) ; 21 } 22 } 23 } 24

25 void LW: : per formCluste r ing ( ) const { 26 create dm ( ) ; 27 i n i t f o r e s t ( ) ; 28 l i nkage ( ) ; 29 } 30

31 void LW: : i n i t f o r e s t ( ) const { 32 S i z e n = ds−>s i z e ( ) ; 33 for ( S i z e s=0; s<n;++s ){ 34 boost : : shared ptr<Node> pln (new 35 LeafNode ((∗ ds ) [ s ] , s ) ) ; 36 pln−>s e t l e v e l ( 0 ) ; 37 boost : : shared ptr<HCluster ing> phc (new 38 HCluster ing ( pln ) ) ; 39 f o r e s t . i n s e r t ( Fore st : : va lue type ( s , phc ) ) ; 40 c l u s t e r S i z e . i n s e r t ( SizeMap : : va lue type ( s , 1 ) ) ; 41 unmergedClusters . i n s e r t ( s ) ; 42 } 43 } 44

45 void LW: : l i nkage ( ) const { 46 S i z e n = ds−>s i z e ( ) ; 47 std : : set<Size > : : i t e r a t o r i t ; 48 Real dMin , dTemp; 49 S i z e m, s1 , s2 ; 50 for ( S i z e s=0; s<n−1;++s ){ 51 dMin = MAXREAL; 52 std : : vector<Intege r> nvTemp( unmergedClusters . begin ( ) ,

55 for ( S i z e i =0; i<m;++i ) { 56 for ( S i z e j=i +1; j<m;++j ){ 57 dTemp = dm(nvTemp[ i ] , nvTemp[ j ] ) ; 58 i f (dTemp < dMin) { 59 dMin = dTemp; 60 s1 = nvTemp[ i ] ; 61 s2 = nvTemp[ j ] ; 62 } 63 } 64 } 65 boost : : shared ptr<Node> node = 66 f o r e s t [ s1]−>jo inWith (∗ f o r e s t [ s2 ] , dMin ) ; 67 node−>s e t i d (n+s ) ; 68 node−>s e t l e v e l ( s +1); 69 boost : : shared ptr<HCluster ing> phc = 70 boost : : shared ptr<HCluster ing>(new 71 HCluster ing ( node ) ) ; 72 f o r e s t . i n s e r t ( Fore st : : va lue type (n+s , phc ) ) ; 73 c l u s t e r S i z e . i n s e r t ( SizeMap : : va lue type (n+s , 74 c l u s t e r S i z e [ s1 ]+ c l u s t e r S i z e [ s2 ] ) ) ; 75 unmergedClusters . e r a s e ( s1 ) ; 76 unmergedClusters . e r a s e ( s2 ) ; 77 unmergedClusters . i n s e r t (n+s ) ; 78 update dm ( s1 , s2 , n+s ) ; 79 } 80 } 81

82 void LW: : f e t chRe su l t s ( ) const { 83 S i z e n = ds−>s i z e ( ) ; 84 r e s u l t s . i n s e r t ( ”hc” , HCluster ing ( f o r e s t [2∗n−2]−>root ( ) ) ) ; 85 } 86 }

Listing B.34: The header file of class Median. 1 // c l / a lgor i thms/median . hpp 2 #ifndef CLUSLIB MEDIAN HPP 3 #define CLUSLIB MEDIAN HPP 4

5 #include<c l / a lgor i thms / lw . hpp> 6

7 namespace ClusLib { 8

9 class Median : public LW { 10 private : 11 void setupArguments ( ) ; 12 void update dm ( S i z e p , S i z e q , S i z e r ) const ; 13 } ; 14

Listing B.35: The source file of class Median. 1 // c l / a lgor i thms/median . cpp 2 #include<c l / a lgor i thms /median . hpp> 3 #include<c l / d i s t anc e s / euc l i d eand i s t anc e . hpp>

6 namespace ClusLib { 7

8 void Median : : setupArguments ( ) { 9 Algorithm : : setupArguments ( ) ;

11 d i s t anc e = boost : : shared ptr<Distance >(new 12 Euc l ideanDistance ( ) ) ; 13 } 14

15 void Median : : update dm ( S i z e p , S i z e q , S i z e r ) const { 16 Real d i s t ; 17 std : : set<Size > : : i t e r a t o r i t ; 18 for ( i t = unmergedClusters . begin ( ) ; 19 i t != unmergedClusters . end ( ) ; ++i t ) { 20 i f (∗ i t == r ) { 21 continue ; 22 } 23

24 d i s t = 0.5∗ std : : pow( dm(p ,∗ i t ) , 2.0)+ 25 0 .5∗ std : : pow( dm(q ,∗ i t ) , 2.0)− 26 0 .25∗ std : : pow( dm(p , q ) , 2 . 0 ) ; 27 dm . add item ( r ,∗ i t , s td : : sq r t ( d i s t ) ) ; 28 } 29 } 30

Listing B.36: The header file of class Single. 1 // c l / a lgor i thms/ s ing l e . hpp 2 #ifndef CLUSLIB SINGLE HPP 3 #define CLUSLIB SINGLE HPP 4

5 #include<c l / a lgor i thms / lw . hpp> 6

7 namespace ClusLib { 8

9 class S ing l e : public LW { 10 private : 11 void update dm ( S i z e p , S i z e q , S i z e r ) const ; 12 } ; 13

Listing B.37: The source file of class Single. 1 // c l / a lgor i thms/ s ing l e . cpp 2 #include<c l / a lgor i thms / s i n g l e . hpp> 3

4 namespace ClusLib { 5

6 void S ing l e : : update dm ( S i z e p , S i z e q , S i z e r ) const { 7 Real d i s t ; 8 std : : set<Size > : : i t e r a t o r i t ; 9 for ( i t = unmergedClusters . begin ( ) ;

10 i t != unmergedClusters . end ( ) ; ++i t ) {

15 d i s t = std : : min ( dm(p ,∗ i t ) , dm(q ,∗ i t ) ) ; 16 dm . add item ( r ,∗ i t , d i s t ) ; 17 } 18 } 19

Listing B.38: The header file of class Ward. 1 // c l / a lgor i thms/ward . hpp 2 #ifndef CLUSLIB WARD HPP 3 #define CLUSLIB WARD HPP 4

5 #include<c l / a lgor i thms / lw . hpp> 6

7 namespace ClusLib { 8

9 class Ward : public LW { 10 private : 11 void setupArguments ( ) ; 12 void update dm ( S i z e p , S i z e q , S i z e r ) const ; 13 } ; 14

Listing B.39: The source file of class Ward. 1 // c l / a lgor i thms/ward . cpp 2 #include<c l / a lgor i thms /ward . hpp> 3 #include<c l / d i s t anc e s / euc l i d eand i s t anc e . hpp> 4 #include<cmath> 5

6 namespace ClusLib { 7

8 void Ward : : setupArguments ( ) { 9 Algorithm : : setupArguments ( ) ;

11 d i s t anc e = boost : : shared ptr<Distance >(new 12 Euc l ideanDistance ( ) ) ; 13 } 14

15 void Ward : : update dm( S i z e p , S i z e q , S i z e r ) const { 16 Real d i s t ; 17 std : : set<Size > : : i t e r a t o r i t ; 18 Real sp = c l u s t e r S i z e [ p ] ; 19 Real sq = c l u s t e r S i z e [ q ] ; 20 for ( i t = unmergedClusters . begin ( ) ; 21 i t != unmergedClusters . end ( ) ; ++i t ) { 22 i f (∗ i t == r ) { 23 continue ; 24 } 25

26 Real sk = c l u s t e r S i z e [∗ i t ] ; 27 Real s t = sp+sq+sk ;

. 0 ) sk+sq )/ s t − 30 std : : pow( dm(p , q ) , 2 . 0 )∗ sk / s t ; 31 dm . add item ( r ,∗ i t , s td : : sq r t ( d i s t ) ) ; 32 } 33 } 34

Listing B.40: The header file of class Weighted. 1 // c l / a lgor i thms/weighted . hpp 2 #ifndef CLUSLIB WEIGHTED HPP 3 #define CLUSLIB WEIGHTED HPP 4

5 #include<c l / a lgor i thms / lw . hpp> 6

7 namespace ClusLib { 8

9 class Weighted : public LW { 10 private : 11 void update dm ( S i z e p , S i z e q , S i z e r ) const ; 12 } ; 13

Listing B.41: The source file of class Weighted. 1 // c l / a lgor i thms/weighted . cpp 2 #include<c l / a lgor i thms /weighted . hpp> 3

4 namespace ClusLib { 5

6 void Weighted : : update dm ( S i z e p , S i z e q , S i z e r ) 7 const { 8 Real d i s t ; 9 std : : set<Size > : : i t e r a t o r i t ;

10 for ( i t = unmergedClusters . begin ( ) ; 11 i t != unmergedClusters . end ( ) ; ++i t ) { 12 i f (∗ i t == r ) { 13 continue ; 14 } 15

16 d i s t = ( dm(p ,∗ i t ) + dm(q ,∗ i t ) ) / 2 ; 17 dm . add item ( r ,∗ i t , d i s t ) ; 18 } 19 } 20

Listing B.42: Makefile.am in cl/clusters. 1 noinst LTLIBRARIES = l i bC l u s t e r s . l a 2

3 AM CPPFLAGS = −I${ t o p s r c d i r } −I$ { t op bu i l dd i r } 4

5 t h i s i n c l u d e d i r=${ i n c l ud ed i r }/${ subd i r } 6 this include HEADERS = \ 7 a l l . hpp \ 8 c e n t e r c l u s t e r . hpp \ 9 c l u s t e r . hpp \

10 h c l u s t e r i ng . hpp \ 11 p c l u s t e r i ng . hpp \ 12 sub spac e c l u s t e r . hpp 13

14 l ibClusters la SOURCES = \ 15 c e n t e r c l u s t e r . cpp \ 16 h c l u s t e r i ng . cpp \ 17 p c l u s t e r i ng . cpp \ 18 sub spac e c l u s t e r . cpp 19

21 a l l . hpp : Make f i l e . am 22 echo ”// This f i l e i s generated . P lease do not e d i t ! ” > $@ 23 echo >> $@ 24 f o r i in $ ( f i l t e r −out a l l . hpp , $ ( this include HEADERS ) ) ; \ 25 do \ 26 echo ”#inc l ude <${ subd i r }/ $$i>” >> $@; \ 27 done 28 echo >> $@ 29 subd i r s=’$ (SUBDIRS ) ’ ; f o r i in $$subd i r s ; do \ 30 echo ”#inc l ude <${ subd i r }/ $$ i / a l l . hpp>” >> $@ ; \ 31 done

Listing B.43: The header file of class CenterCluster. 1 // c l / c l u s t e r s / cente rc lu s t e r . hpp 2 #ifndef CLUSLIB CENTERCLUSTER HPP 3 #define CLUSLIB CENTERCLUSTER HPP 4

5 #include<c l / c l u s t e r s / c l u s t e r . hpp> 6 #include<c l / da ta s e t s / record . hpp> 7

8 namespace ClusLib { 9

10 class CenterCluste r : public Cluste r { 11 public : 12 CenterCluste r ( ) {} 13 CenterCluste r ( const boost : : shared ptr<Record>& cen te r ) ; 14 const boost : : shared ptr<Record>& cen te r ( ) const ; 15 boost : : shared ptr<Record>& cen te r ( ) ; 16

Listing B.44: The source file of class CenterCluster. 1 // c l / c l u s t e r s / cente rc lu s t e r . cpp 2 #include<c l / c l u s t e r s / c e n t e r c l u s t e r . hpp> 3

4 namespace ClusLib { 5

6 CenterCluste r : : Cente rCluste r ( 7 const boost : : shared ptr<Record>& cen te r ) 8 : c e n t e r ( c en t e r ) { 9 }

11 const boost : : shared ptr<Record>& CenterCluste r : : c en t e r ( ) 12 const { 13 return c e n t e r ; 14 } 15

16 boost : : shared ptr<Record>& CenterCluste r : : c en t e r ( ) { 17 return c e n t e r ; 18 } 19 }

Listing B.45: The header file of class Cluster. 1 // c l / c l u s t e r s / c l u s t e r . hpp 2 #ifndef CLUSLIB CLUSTER HPP 3 #define CLUSLIB CLUSTER HPP 4

5 #include<vector> 6 #include<c l / da ta s e t s / record . hpp> 7 #include<c l / u t i l i t i e s / c on ta in e r . hpp> 8

9 namespace ClusLib { 10

11 class Cluste r : public Container<boost : : shared ptr<Record> > { 12 public : 13 virtual ˜Cluste r ( ) {} 14

15 void s e t i d ( S i z e id ) ; 16 S i z e g e t i d ( ) const ; 17

18 protected : 19 S i z e i d ; 20 } ; 21

22 in l ine void Cluste r : : s e t i d ( S i z e id ) { 23 i d = id ; 24 } 25

26 in l ine S i z e Cluste r : : g e t i d ( ) const { 27 return i d ; 28 }

Listing B.46: The header file of class HClustering. 1 // c l / c l u s t e r s / hc lu s t e r ing . hpp 2 #ifndef CLUSLIB HCLUSTERING HPP 3 #define CLUSLIB HCLUSTERING HPP 4

5 #include<c l / pa t t e rn s/ in t e rna lnode . hpp> 6 #include<c l / pa t t e rn s/ l e a f node . hpp> 7 #include<c l / pa t t e rn s/node . hpp> 8 #include<c l / u t i l i t i e s /dendrogram . hpp> 9 #include<c l / c l u s t e r s / p c l u s t e r i ng . hpp>

11 namespace ClusLib { 12

13 class HCluster ing { 14 public : 15 HCluster ing ( ) {} 16 HCluster ing ( const boost : : shared ptr<Node>& root ) ; 17

18 boost : : shared ptr<Node> jo inWith ( HCluster ing& hc , 19 Real jo inValue ) ; 20 const boost : : shared ptr<Node>& root ( ) const ; 21 boost : : shared ptr<Node>& root ( ) ; 22 PCluste r ing ge t p c ( S i z e maxclust ) const ; 23 void save ( const std : : s t r i n g &f i l ename , S i z e p=100) const ; 24

25 private : 26 boost : : shared ptr<Node> r oo t ; 27 } ; 28

29 in l ine const boost : : shared ptr<Node>& HCluster ing : : root ( ) 30 const { 31 return r oo t ; 32 } 33

34 in l ine boost : : shared ptr<Node>& HCluster ing : : root ( ) { 35 return r oo t ; 36 } 37 } 38

Listing B.47: The source file of class HClustering. 1 // c l / c l u s t e r s / hc lu s t e r ing . cpp 2 #include<c l / c l u s t e r s / h c l u s t e r i ng . hpp> 3 #include<c l / e r r o r s . hpp> 4 #include<c l / pa t t e rn s/ p c v i s i t o r . hpp> 5 #include<c l / pa t t e rn s/ j o i n v a l u e v i s i t o r . hpp> 6 #include<c l / pa t t e rn s/ dendrogramvis i tor . hpp> 7 #include<algorithm> 8

9 namespace ClusLib { 10

11 HCluster ing : : HCluster ing ( const boost : : shared ptr<Node>& root ) 12 : r oo t ( root ) {

15 boost : : shared ptr<Node> HCluster ing : : jo inWith ( 16 HCluster ing& hc , Real jo inValue ) { 17 Inte rna lNode∗ p = new Inte rna lNode( jo inValue ) ; 18 boost : : shared ptr<Node> node (p ) ; 19

20 boost : : shared ptr<Node>& cn1 = roo t ; 21 const boost : : shared ptr<Node>& cn2 = hc . root ( ) ; 22

23 cn1−>s e t pa r en t ( node ) ; 24 cn2−>s e t pa r en t ( node ) ; 25 p−>add ( cn1 ) ; 26 p−>add ( cn2 ) ; 27

28 return node ; 29

32 PCluste r ing HCluster ing : : g e t p c ( S i z e maxclust ) const { 33 ASSERT(maxclust>0, ” i n v a l i d e maxclust ” ) ; 34 S i z e c u t l e v e l = root−>g e t l e v e l ( ) − maxclust + 2 ; 35 PCluste r ing pc ; 36 PCVisitor pcv ( pc , c u t l e v e l ) ; 37 root−>accept ( pcv ) ; 38

39 return pc ; 40 } 41

42 void HCluster ing : : save ( const std : : s t r i n g &f i l ename , 43 S i z e p) const { 44 Jo inVa lueV i s i t o r jvv ; 45 root−>accept ( jvv ) ; 46 std : : set<iirMapA : : va lue type , compare i i r> j o i nVa lu e s 47 = jvv . ge t j o i nVa lu e s ( ) ; 48 std : : set<iirMapA : : va lue type , compare i i r > : : i t e r a t o r i t ; 49 Real l j v , hjv ; 50 S i z e l l e v e l , h l e v e l ; 51 i t = j o inVa lu e s . end ( ) ; 52 −− i t ; 53 hjv = i t−>second ; 54 h l ev e l = root−>g e t l e v e l ( ) ; 55 i f (p == 0) { 56 i t = j o inVa lu e s . begin ( ) ; 57 l j v = i t−>second ; 58 l l e v e l = 0 ; 59 } else { 60 i t = j o inVa lu e s . begin ( ) ; 61 for ( S i z e i =0; i<j o i nVa lu e s . s i z e ( ) − p + 1 ; ++i ) { 62 ++i t ; 63 } 64 l j v = i t−>second ; 65 l l e v e l = root−>g e t l e v e l ( ) − p + 1 ; 66 } 67 DendrogramVisitor dgv ( hjv , l l e v e l , h l e v e l ) ; 68 root−>accept ( dgv ) ; 69 dgv . save ( f i l ename ) ; 70 } 71 }

Listing B.48: The header file of class PClustering. 1 // c l / c l u s t e r s / pc lu s t e r ing . hpp 2 #ifndef CLUSLIB PCLUSTERING HPP 3 #define CLUSLIB PCLUSTERING HPP 4

5 #include<c l / c l u s t e r s / c l u s t e r . hpp> 6 #include<c l / u t i l i t i e s / c on ta in e r . hpp> 7 #include<c l / u t i l i t i e s /nnmap . hpp> 8

9 namespace ClusLib { 10

11 class PCluste r ing : 12 public Container<boost : : shared ptr<Cluster> > { 13 public : 14 friend std : : ostream& operator<<(std : : ostream& os , 15 PCluste r ing& pc ) ; 16

17 PCluste r ing ( ) ; 18 void removeEmptyClusters ( ) ; 19 void c r e a t eC lu s t e r ID ( ) ; 20 void save ( const std : : s t r i n g& f i l ename ) ; 21

22 private : 23 void pr in t ( std : : ostream& os ) ; 24 void c a l c u l a t e ( ) ; 25 void c ro s s t ab ( ) ; 26

27 bool bCalcu lated ; 28 S i z e numclust ; 29 S i z e numclustGiven ; 30 std : : vector<Size> c l u s t s i z e ; 31 std : : vector<std : : s t r i ng> c l u s tLabe l ; 32 std : : vector<Size> CM; 33 std : : vector<Size> CMGiven ; 34 i i iMapB c r o s s t ab ; 35 } ; 36

Listing B.49: The source file of class PClustering. 1 // c l / c l u s t e r s / pc lu s t e r ing . cpp 2 #include<c l / c l u s t e r s / p c l u s t e r i ng . hpp> 3 #include<c l / e r r o r s . hpp> 4 #include<algorithm> 5 #include<f stream> 6 #include<iomanip> 7

8 namespace ClusLib { 9

10 PCluste r ing : : PCluste r ing ( ) : bCalcu lated ( fa l se ) , 11 numclustGiven ( Null<Size >()) { 12 } 13

14 std : : ostream& operator<<(std : : ostream& os , 15 PCluste r ing& pc ) { 16 pc . p r i n t ( os ) ; 17 return os ; 18 } 19

20 void PCluste r ing : : removeEmptyClusters ( ) {

( ) ) ; 23 data . c l e a r ( ) ; 24 for ( i t e r a t o r i t=temp . begin ( ) ; i t !=temp . end();++ i t ){ 25 i f ( (∗ i t )−> s i z e ( ) == 0) { 26 continue ; 27 } 28 data . push back (∗ i t ) ; 29 } 30 } 31

32 void PCluste r ing : : c r e a t eC lu s t e r ID ( ) { 33 removeEmptyClusters ( ) ; 34 for ( S i z e i =0; i< data . s i z e ();++ i ){ 35 data [ i ]−> s e t i d ( i ) ; 36 } 37 } 38

39 void PCluste r ing : : p r i n t ( std : : ostream& os ) { 40 c a l cu l a t e ( ) ; 41

42 os<<” Clu s t e r i ng Summary :\n” ; 43 os<<”Number o f c l u s t e r s : ”<< numclust<< ’ \n ’ ; 44 for ( S i z e i =0; i< numclust;++i ){ 45 os<<” S i z e o f Cluste r ”<<i<<” : ”<< c l u s t s i z e [ i ]<< ’ \n ’ ; 46 } 47 os<< ’ \n ’ ; 48 i f ( numclustGiven != Null<Size >()){ 49 os<<”Number o f g iven c l u s t e r s : ” 50 << numclustGiven<< ’\n ’ ; 51 os<<”Cross Tabulat ion :\n” ; 52 std : : vector<Size> w; 53 w. push back ( 1 3 ) ; 54 os<<std : : setw (w[0])<< std : : l e f t <<”Cluste r ID” ; 55 for ( S i z e j =0; j< numclustGiven ;++j ) { 56 w. push back ( c l u s tLab e l [ j ] . s i z e ( )+3) ; 57 os<<std : : setw (w[ j+1])<<std : : l e f t << c l u s tLabe l [ j ] ; 58 } 59 os<< ’\n ’ ; 60 for ( S i z e i =0; i< numclust;++i ){ 61 os<<std : : setw (w[0])<< std : : l e f t <<i ; 62 for ( S i z e j =0; j< numclustGiven ;++j ) { 63 i f ( c r o s s t ab . conta in key ( i , j ) ){ 64 os<<std : : setw (w[ j+1])<< std : : l e f t 65 << c r o s s t ab ( i , j ) ; 66 } else { 67 os<<std : : setw (w[ j+1])<< std : : l e f t <<0; 68 } 69 } 70 os<< ’\n ’ ; 71 } 72 } 73 } 74

75 void PCluste r ing : : save ( const std : : s t r i n g& f i l ename ) { 76 std : : o f s t ream o f ; 77 o f . open ( f i l ename . c s t r ( ) ) ; 78 pr in t ( o f ) ; 79

80 of<<”\nCluste r Membership\n” ; 81 of<<”Record ID , Cluste r Index , Cluste r Index Given\n” ; 82 for ( S i z e i =0; i<CM. s i z e ();++ i ) { 83 of<<i+1<<” , ”<< CM[ i ] ; 84 i f ( numclustGiven == Null<Size >()){ 85 of<<” , NA\n” ; 86 continue ; 87 }

90 o f . c l o s e ( ) ; 91 } 92

93 void PCluste r ing : : c r o s s t ab ( ) { 94 S i z e c1 , c2 ; 95 for ( S i z e i =0; i<CM. s i z e ();++ i ) { 96 c1 = CM[ i ] ; 97 c2 = CMGiven [ i ] ; 98 i f ( c r o s s t ab . conta in key ( c1 , c2 ) ) { 99 c r o s s t ab ( c1 , c2 ) += 1 ;

100 } else { 101 c r o s s t ab . add item ( c1 , c2 , 1 ) ; 102 } 103 } 104 } 105

106 void PCluste r ing : : c a l cu l a t e ( ) { 107 i f ( bCalcu lated ) { 108 return ; 109 } 110

111 c r e a t eC lu s t e r ID ( ) ; 112 numclust = data . s i z e ( ) ; 113 boost : : shared ptr<Cluster> c ; 114 boost : : shared ptr<Record> r ; 115

116 CM. r e s i z e ( 117 (∗ data [0]) [0]−> schema()−> i d I n f o ()−>num values ( ) ) ; 118 for ( S i z e i =0; i< numclust;++i ){ 119 c = data [ i ] ; 120 c l u s t s i z e . push back ( c−>s i z e ( ) ) ; 121 for ( S i z e j =0; j<c−>s i z e ();++ j ){ 122 r = (∗ c ) [ j ] ; 123 CM[ r−>g e t i d ( ) ] = c−>g e t i d ( ) ; 124 } 125 } 126

127 boost : : shared ptr<DAttrInfo> i n f o = 128 (∗ data [0]) [0]−> schema()−> l a b e l I n f o ( ) ; 129 i f ( ! i n f o ) { 130 bCalcu lated = true ; 131 return ; 132 } 133

134 numclustGiven = in fo−>num values ( ) ; 135 for ( S i z e i =0; i< numclustGiven ;++i ){ 136 c l u s tLabe l . push back ( in fo−> i n t t o s t r ( i ) ) ; 137 } 138

139 CMGiven . r e s i z e ( CM. s i z e ( ) ) ; 140 for ( S i z e i =0; i< numclust;++i ){ 141 c = data [ i ] ; 142 for ( S i z e j =0; j<c−>s i z e ();++ j ){ 143 r = (∗ c ) [ j ] ; 144 CMGiven [ r−>g e t i d ( ) ] = r−>g e t l a b e l ( ) ; 145 } 146 } 147

148 c ro s s t ab ( ) ; 149 bCalcu lated = true ; 150 } 151 }

Listing B.50: The header file of class SubspaceCluster. 1 // c l / c l u s t e r s / sub spacec lu s t e r . hpp 2 #ifndef CLUSLIB SUBSPACECLUSTERHPP 3 #define CLUSLIB SUBSPACECLUSTERHPP 4

5 #include<c l / c l u s t e r s / c e n t e r c l u s t e r . hpp> 6 #include<vector> 7

8 namespace ClusLib { 9

10 class SubspaceCluster : public CenterCluste r { 11 public : 12 SubspaceCluster ( const boost : : shared ptr<Record>& cen te r ) ; 13 std : : vector<Real>& w( ) ; 14 const std : : vector<Real>& w() const ; 15 Real& w( S i z e i ) ; 16 const Real& w( S i z e i ) const ; 17

18 protected : 19 std : : vector<Real> w ; 20 } ; 21 } 22

Listing B.51: The source file of class SubspaceCluster. 1 // c l / c l u s t e r s / sub spacec lu s t e r . cpp 2 #include<c l / c l u s t e r s / sub spac e c l u s t e r . hpp> 3

4 namespace ClusLib { 5

6 SubspaceCluster : : SubspaceCluster ( const 7 boost : : shared ptr<Record>& cen te r ) 8 : Cente rCluste r ( c en t e r ) { 9 w . r e s i z e ( c en t e r−>s i z e ( ) , 1 . 0 / c en t e r−>s i z e ( ) ) ;

12 std : : vector<Real>& SubspaceCluster : :w( ) { 13 return w ; 14 } 15

16 const std : : vector<Real>& SubspaceCluster : :w( ) const { 17 return w ; 18 } 19

20 Real& SubspaceCluster : :w( S i z e i ) { 21 ASSERT( i>=0 && i< w . s i z e ( ) , ” index out o f range” ) ; 22 return w [ i ] ; 23 } 24

25 const Real& SubspaceCluster : :w( S i z e i ) const { 26 ASSERT( i>=0 && i< w . s i z e ( ) , ” index out o f range” ) ; 27 return w [ i ] ; 28 } 29 }

Listing B.52: Makefile.am in cl/datasets. 1 noinst LTLIBRARIES = l ibData se t s . l a 2

3 AM CPPFLAGS = −I${ t o p s r c d i r } −I$ { t op bu i l dd i r } 4

5 t h i s i n c l u d e d i r=${ i n c l ud ed i r }/${ subd i r } 6 this include HEADERS = \ 7 a l l . hpp \ 8 a t t r i n f o . hpp \ 9 a t t rva lu e . hpp \

10 c a t t r i n f o . hpp \ 11 datase t . hpp \ 12 da t t r i n f o . hpp \ 13 record . hpp \ 14 schema . hpp 15

16 l ibDatasets la SOURCES = \ 17 a t t r i n f o . cpp \ 18 c a t t r i n f o . cpp \ 19 datase t . cpp \ 20 da t t r i n f o . cpp \ 21 record . cpp \ 22 schema . cpp 23

25 a l l . hpp : Make f i l e . am 26 echo ”// This f i l e i s generated . P lease do not e d i t ! ” > $@ 27 echo >> $@ 28 f o r i in $ ( f i l t e r −out a l l . hpp , $ ( this include HEADERS ) ) ; \ 29 do \ 30 echo ”#inc l ude <${ subd i r }/ $$i>” >> $@; \ 31 done 32 echo >> $@ 33 subd i r s=’$ (SUBDIRS ) ’ ; f o r i in $$subd i r s ; do \ 34 echo ”#inc l ude <${ subd i r }/ $$ i / a l l . hpp>” >> $@ ; \ 35 done

Listing B.53: The header file of class AttrValue. 1 // c l / da tase t s / a t t r va lue . hpp 2 #ifndef CLUSLIB ATTRVALUE HPP 3 #define CLUSLIB ATTRVALUE HPP 4

5 #include<boost / va r i an t . hpp> 6 #include<c l / types . hpp> 7 #include<c l / u t i l i t i e s / nu l l . hpp> 8

9 namespace ClusLib { 10

11 class AttrValue { 12 public :

16 typedef boost : : var iant<Real , S ize> va lue type ; 17 AttrValue ( ) ; 18

19 private : 20 va lue type va lu e ; 21 } ; 22

23 in l ine AttrValue : : AttrValue ( ) : va lu e ( Null<Size >()) { 24 } 25

Listing B.54: The header file of class AttrInfo. 1 // c l / da tase t s / a t t r i n f o . hpp 2 #ifndef CLUSLIB ATTRINFO HPP 3 #define CLUSLIB ATTRINFO HPP 4

5 #include<c l / da ta se t s / a t t rva lue . hpp> 6 #include<s t r i ng> 7

8 namespace ClusLib { 9

10 enum AttrType { 11 Unknow , 12 Continuous , 13 Disc r e t e 14 } ; 15

16 class DAttrInfo ; 17 class CAttrInfo ; 18

19 class Att r In f o { 20 public : 21 Att r In f o ( const std : : s t r i ng &name , AttrType type ) ; 22 virtual ˜At t r In f o ( ) {} 23

24 const std : : s t r i n g& name( ) const ; 25 std : : s t r i n g& name ( ) ; 26 AttrType type ( ) const ; 27

28 virtual bool operator==(const Att r In f o& in f o ) const ; 29 virtual bool operator !=( const Att r In f o& in f o ) const ; 30 virtual Att r In f o∗ c lone ( ) const = 0; 31 virtual Real d i s t anc e ( const AttrValue&, 32 const AttrValue&) const = 0; 33 virtual void s e t d v a l ( AttrValue&, S i z e ) const ; 34 virtual S i z e g e t d va l ( const AttrValue&) const ; 35 virtual void s e t c v a l ( AttrValue&, Real ) const ; 36 virtual Real g e t c v a l ( const AttrValue&) const ; 37 virtual void set unknown ( AttrValue&) const = 0; 38 virtual bool is unknown ( const AttrValue&) const = 0; 39 virtual DAttrInfo& ca s t t o d ( ) ; 40 virtual const DAttrInfo& ca s t t o d ( ) const ; 41 virtual CAttrInfo& c a s t t o c ( ) ; 42 virtual const CAttrInfo& c a s t t o c ( ) const ; 43 virtual bool c an c a s t t o d ( ) const ; ;

47 bool equa l sha l l ow ( const Att r In f o&) const ; 48

49 private : 50 std : : s t r i n g name ; 51 AttrType type ; 52 } ; 53

54 in l ine const std : : s t r i n g& Att r In f o : : name ( ) const { 55 return name ; 56 } 57

58 in l ine std : : s t r i n g& Att r In f o : : name ( ) { 59 return name ; 60 } 61

62 in l ine AttrType At t r In f o : : type ( ) const { 63 return type ; 64 } 65

66 in l ine bool Att r In f o : : operator==(const Att r In f o& i n f o ) const { 67 return equa l sha l l ow ( i n f o ) ; 68 } 69

70 in l ine bool Att r In f o : : operator !=(const Att r In f o& i n f o ) const { 71 return ! e qua l sha l l ow ( i n f o ) ; 72 } 73

74 in l ine bool Att r In f o : : c a n c a s t t o d ( ) const { 75 return fa l se ; 76 } 77

78 in l ine bool Att r In f o : : c a n c a s t t o c ( ) const { 79 return fa l se ; 80 } 81

Listing B.55: The source file of class AttrInfo. 1 // c l / da tase t s / a t t r i n f o . cpp 2 #include<c l / da ta s e t s / a t t r i n f o . hpp> 3 #include<c l / e r r o r s . hpp> 4

5 namespace ClusLib { 6

7 Att r In f o : : At t r In f o ( const std : : s t r i n g &name , AttrType type ) 8 : name (name ) , type ( type ) { 9 }

11 bool Att r In f o : : e qua l sha l l ow ( const Att r In f o &i n f o ) const { 12 i f ( name != in f o . name ( ) ){ 13 return fa l se ; 14 } 15

16 i f ( type != in f o . type ( ) ){ 17 return fa l se ; 18 } 19

20 return true ; 21 } 22

23 void Att r In f o : : s e t d v a l ( AttrValue&, S i z e ) const {

27 S i z e At t r In f o : : g e t d v a l ( const AttrValue&) const { 28 FAIL( ”can not be ca l l e d ” ) ; 29 return 0 ; 30 } 31

32 void Att r In f o : : s e t c v a l ( AttrValue&, Real ) const { 33 FAIL( ”can not be ca l l e d ” ) ; 34 } 35

36 Real At t r In f o : : g e t c v a l ( const AttrValue&) const { 37 FAIL( ”can not be ca l l e d ” ) ; 38 return 0 . 0 ; 39 } 40

41 DAttrInfo& Att r In f o : : c a s t t o d ( ) { 42 FAIL( ”can not c a s t an At t r In f o to DAttrInfo” ) ; 43 return ∗( DAttrInfo∗)NULL; 44 } 45

46 const DAttrInfo& Att r In f o : : c a s t t o d ( ) const { 47 FAIL( ”can not c a s t an At t r In f o to DAttrInfo” ) ; 48 return ∗( DAttrInfo∗)NULL; 49 } 50

51 CAttrInfo& Att r In f o : : c a s t t o c ( ) { 52 FAIL( ”can not c a s t an At t r In f o to CAttrInfo” ) ; 53 return ∗( CAttrInfo ∗)NULL; 54 } 55

56 const CAttrInfo& Att r In f o : : c a s t t o c ( ) const { 57 FAIL( ”can not c a s t an At t r In f o to CAttrInfo” ) ; 58 return ∗( CAttrInfo ∗)NULL; 59 } 60

Listing B.56: The header file of class CAttrInfo. 1 // c l / da tase t s / c a t t r i n f o . hpp 2 #ifndef CLUSLIB CATTRINFO HPP 3 #define CLUSLIB CATTRINFO HPP 4

5 #include<c l / da ta s e t s / a t t r i n f o . hpp> 6

7 namespace ClusLib { 8

9 class CAttrInfo : public Att r In f o { 10 public : 11 CAttrInfo ( const std : : s t r i n g& name ) ; 12

13 CAttrInfo& c a s t t o c ( ) ; 14 const CAttrInfo& c a s t t o c ( ) const ; 15 bool c an ca s t t o c ( ) const ; 16 CAttrInfo∗ c lone ( ) const ; 17 Real d i s t anc e ( const AttrValue&, const AttrValue&) const ; 18 void s e t c v a l ( AttrValue&, Real ) const ; 19 Real g e t c v a l ( const AttrValue&) const ; 20 void set unknown ( AttrValue&) const ; ;

24 Real get min ( ) const ; 25 Real get max ( ) const ; 26 bool equa l ( const Att r In f o&) const ; 27

28 protected : 29 Real min ; 30 Real max ; 31 } ; 32

33 in l ine Real CAttrInfo : : g e t c v a l ( const AttrValue& av ) 34 const { 35 return boost : : get<Real>(av . va lu e ) ; 36 } 37

38 in l ine CAttrInfo& CAttrInfo : : c a s t t o c ( ) { 39 return ∗ this ; 40 } 41

42 in l ine const CAttrInfo& CAttrInfo : : c a s t t o c ( ) const { 43 return ∗ this ; 44 } 45

46 in l ine bool CAttrInfo : : c a n ca s t t o c ( ) const { 47 return true ; 48 } 49

50 in l ine void CAttrInfo : : se t min ( Real min ) { 51 min = min ; 52 } 53

54 in l ine void CAttrInfo : : set max ( Real max) { 55 max = max ; 56 } 57

58 in l ine Real CAttrInfo : : get min ( ) const { 59 return min ; 60 } 61

62 in l ine Real CAttrInfo : : get max ( ) const { 63 return max ; 64 } 65 } 66

Listing B.57: The source file of class CAttrInfo. 1 // c l / da tase t s / c a t t r i n f o . cpp 2 #include<c l / da ta se t s / c a t t r i n f o . hpp> 3 #include<c l / e r r o r s . hpp> 4 #include<c l / u t i l i t i e s / nu l l . hpp> 5 #include<boost / va r i an t/ get . hpp> 6

7 namespace ClusLib { 8

9 CAttrInfo : : CAttrInfo ( const std : : s t r i n g& name) 10 : A t t r In f o (name , Continuous ) { 11 min = Null<Real >() ; 12 max = Null<Real >() ; 13 } 14

15 bool CAttrInfo : : equa l ( const Att r In f o& a i ) const { 16 i f ( ! e qua l sha l l ow ( a i ) ){ 17 return fa l se ;

20 i f ( ! a i . c a n c a s t t o c ( ) ){ 21 return fa l se ; 22 } 23

24 return true ; 25 } 26

27 CAttrInfo∗ CAttrInfo : : c lone ( ) const { 28 return new CAttrInfo (∗ this ) ; 29 } 30

31 Real CAttrInfo : : d i s t anc e ( const AttrValue& av1 , 32 const AttrValue& av2 ) const { 33 i f ( is unknown ( av1 ) && is unknown ( av2 )){ 34 return 0 . 0 ; 35 } 36

37 i f ( is unknown ( av1 ) ˆ is unknown ( av2 )){ 38 return 1 . 0 ; 39 } 40

41 return boost : : get<Real>(av1 . va lu e ) − 42 boost : : get<Real>(av2 . va lu e ) ; 43 } 44

45 void CAttrInfo : : s e t c v a l ( AttrValue& av , Real va lue ) const { 46 av . va lu e = value ; 47 } 48

49 void CAttrInfo : : set unknown ( AttrValue& av ) const { 50 av . va lu e = Null<Real >() ; 51 } 52

53 bool CAttrInfo : : is unknown ( const AttrValue& av ) const { 54 return ( boost : : get<Real>(av . va lu e ) == Null<Real > ( ) ) ; 55 } 56

Listing B.58: The header file of class DAttrInfo. 1 // c l / da tase t s / da t t r i n f o . hpp 2 #ifndef CLUSLIB DATTRINFO HPP 3 #define CLUSLIB DATTRINFO HPP 4

5 #include<c l / da ta s e t s / a t t r i n f o . hpp> 6 #include<vector> 7

8 namespace ClusLib { 9

10 class DAttrInfo : public Att r In f o { 11 public : 12 DAttrInfo ( const std : : s t r i n g& name ) ; 13

14 S i z e num values ( ) const ; 15 const std : : s t r i n g& i n t t o s t r ( S i z e i ) const ; 16 S i z e s t r t o i n t ( const std : : s t r i ng&) const ; 17 S i z e add value ( const std : : s t r i n g&, 18 bool bAllowDupl icate = true ) ; n g &);

; 22 Real d i s t anc e ( const AttrValue&, const AttrValue&) const ; 23 void s e t d v a l ( AttrValue&, S i z e ) const ; 24 S i z e ge t d v a l ( const AttrValue&) const ; 25 void set unknown ( AttrValue&) const ; 26 bool is unknown ( const AttrValue&) const ; 27 DAttrInfo& ca s t t o d ( ) ; 28 const DAttrInfo& c a s t t o d ( ) const ; 29 bool c an c a s t t o d ( ) const ; 30 bool operator==(const Att r In f o& i n f o ) const ; 31 bool operator !=( const Att r In f o& i n f o ) const ; 32

33 protected : 34 typedef std : : vector<std : : s t r i ng > : : i t e r a t o r i t e r a t o r ; 35 typedef std : : vector<std : : s t r i ng > : : c o n s t i t e r a t o r 36 c o n s t i t e r a t o r ; 37

38 bool equa l ( const Att r In f o&) const ; 39

40 std : : vector<std : : s t r i ng> va lue s ; 41 } ; 42

43 in l ine S i z e DAttrInfo : : g e t d v a l ( 44 const AttrValue& av ) const { 45 return boost : : get<Size >(av . va lu e ) ; 46 } 47

48 in l ine DAttrInfo& DAttrInfo : : c a s t t o d ( ) { 49 return ∗ this ; 50 } 51

52 in l ine const DAttrInfo& 53 DAttrInfo : : c a s t t o d ( ) const { 54 return ∗ this ; 55 } 56

57 in l ine bool DAttrInfo : : c a n c a s t t o d ( ) const { 58 return true ; 59 } 60

61 in l ine bool DAttrInfo : : operator==(const Att r In f o& i n f o ) 62 const { 63 return equa l ( i n f o ) ; 64 } 65

66 in l ine bool DAttrInfo : : operator !=( const Att r In f o& i n f o ) 67 const { 68 return ! equa l ( i n f o ) ; 69 } 70

Listing B.59: The source file of class DAttrInfo. 1 // c l / da tase t s / da t t r i n f o . cpp 2 #include<c l / da ta se t s / da t t r i n f o . hpp> 3 #include<c l / e r r o r s . hpp> 4 #include<boost / va r i an t/ get . hpp> 5 #include<algorithm> 6

7 namespace ClusLib { 8

9 DAttrInfo : : DAttrInfo ( const std : : s t r i n g& name)

13 S i z e DAttrInfo : : num values ( ) const { 14 return va lue s . s i z e ( ) ; 15 } 16

17 const std : : s t r i n g& DAttrInfo : : i n t t o s t r ( S i z e i ) const { 18 ASSERT( i>=0 && i< va lu e s . s i z e ( ) , ” index out o f range” ) ; 19 return va lue s [ i ] ; 20 } 21

22 S i z e DAttrInfo : : s t r t o i n t ( const std : : s t r i ng& s ) const { 23 for ( S i z e i =0; i< va lu e s . s i z e ();++ i ) { 24 i f ( va lu e s [ i ] == s ) 25 return i ; 26 } 27

28 return Null<Size >() ; 29 } 30

31 S i z e DAttrInfo : : add value ( const std : : s t r i n g& s , 32 bool bAllowDupl icate ) { 33 S i z e ind = Null<Size >() ; 34 for ( S i z e i =0; i< va lu e s . s i z e ();++ i ) { 35 i f ( va lu e s [ i ] == s ) { 36 ind = i ; 37 break ; 38 } 39 } 40

41 i f ( ind == Null<Size >()) { 42 va lu e s . push back ( s ) ; 43 return va lue s . s i z e ()−1; 44 } else { 45 i f ( bAl lowDupl icate ) { 46 return ind ; 47 } else { 48 FAIL( ” va lue ”<<s<<” a l ready e x i s t s ” ) ; 49 return Null<Size >() ; 50 } 51 } 52 } 53

54 void DAttrInfo : : remove value ( const std : : s t r i n g& val ) { 55 i t e r a t o r i t = std : : f i nd ( va lu e s . begin ( ) , va lu e s . end ( ) , 56 va l ) ; 57 i f ( i t != va lue s . end ( ) ){ 58 va lue s . e r a s e ( i t ) ; 59 } 60 } 61

62 void DAttrInfo : : remove value ( S i z e i ) { 63 i f ( i>=0 | | i< va lue s . s i z e ( ) ){ 64 va lue s . e r a s e ( va lu e s . begin ( ) + i ) ; 65 } 66 } 67

68 bool DAttrInfo : : equa l ( const Att r In f o& i n f o ) const { 69 i f ( ! e qua l sha l l ow ( i n f o ) ) 70 return fa l se ; 71

72 const DAttrInfo& nai = in f o . c a s t t o d ( ) ; 73 i f ( na i . num values ( ) != va lue s . s i z e ( ) ) 74 return fa l se ; 75

76 for ( S i z e i =0; i< va lu e s . s i z e ();++ i ){

82 return true ; 83 } 84

85 DAttrInfo∗ DAttrInfo : : c lone ( ) const { 86 return new DAttrInfo (∗ this ) ; 87 } 88

89 Real DAttrInfo : : d i s t anc e ( const AttrValue& av1 , 90 const AttrValue& av2 ) const { 91 i f ( is unknown ( av1 ) && is unknown ( av2 ) ) { 92 return 0 . 0 ; 93 } 94

95 i f ( is unknown ( av1 ) ˆ is unknown ( av2 ) ) { 96 return 1 . 0 ; 97 } 98

99 i f ( boost : : get<Size >(av1 . va lu e ) == 100 boost : : get<Size >(av2 . va lu e ) ) { 101 return 0 . 0 ; 102 } else { 103 return 1 . 0 ; 104 } 105 } 106

107 void DAttrInfo : : s e t d v a l ( AttrValue& av , S i z e va lue ) const { 108 ASSERT( value>=0 && value< va lue s . s i z e ( ) , 109 ” i nv a l i d va lue ”<<va lue ) ; 110 av . va lu e = value ; 111 } 112

113 void DAttrInfo : : set unknown ( AttrValue& av ) const { 114 av . va lu e = Null<Size >() ; 115 } 116

117 bool DAttrInfo : : is unknown ( const AttrValue& av ) const { 118 return ( boost : : get<Size >(av . va lu e ) == Null<Size > ( ) ) ; 119 } 120 }

Listing B.60: The header file of class Record. 1 // c l / da tase t s /record . hpp 2 #ifndef CLUSLIB RECORD HPP 3 #define CLUSLIB RECORD HPP 4

5 #include<boost / sha r ed p t r . hpp> 6 #include<c l / types . hpp> 7 #include<c l / e r r o r s . hpp> 8 #include<c l / da ta s e t s /schema . hpp> 9 #include<c l / da ta s e t s / a t t r i n f o . hpp>

10 #include<c l / u t i l i t i e s / c on ta in e r . hpp> 11 #include<vector> 12

13 namespace ClusLib { 14

schema ) ; 18

19 const boost : : shared ptr<Schema>& schema ( ) const ; 20 AttrValue& labe lValue ( ) ; 21 const AttrValue& labe lValue ( ) const ; 22 AttrValue& idValue ( ) ; 23 const AttrValue& idValue ( ) const ; 24 S i z e g e t i d ( ) const ; 25 S i z e g e t l a b e l ( ) const ; 26

27 private : 28 boost : : shared ptr<Schema> schema ; 29 AttrValue l a b e l ; 30 AttrValue i d ; 31 } ; 32

33 in l ine const boost : : shared ptr<Schema>& Record : : schema ( ) 34 const { 35 return schema ; 36 } 37

38 in l ine AttrValue& Record : : l abe lValue ( ) { 39 return l a b e l ; 40 } 41

42 in l ine const AttrValue& Record : : l abe lValue ( ) const { 43 return l a b e l ; 44 } 45

46 in l ine AttrValue& Record : : idValue ( ) { 47 return i d ; 48 } 49

50 in l ine const AttrValue& Record : : idValue ( ) const { 51 return i d ; 52 } 53

54 in l ine S i z e Record : : g e t i d ( ) const { 55 return schema−>i d I n f o ()−> ge t d va l ( i d ) ; 56 } 57

58 in l ine S i z e Record : : g e t l a b e l ( ) const { 59 return schema−>l a b e l I n f o ()−> g e t d va l ( l a b e l ) ; 60 } 61

Listing B.61: The source file of class Record. 1 // c l / da tase t s /record . cpp 2 #include<c l / da ta s e t s / record . hpp> 3 #include<c l / u t i l i t i e s / nu l l . hpp> 4 #include<boost / va r i an t/ get . hpp> 5

6 namespace ClusLib { 7

8 Record : : Record ( const boost : : shared ptr<Schema>& schema ) 9 : schema( schema ) {

10 data . r e s i z e ( schema−>s i z e ( ) ) ; 11 for ( S i z e i =0; i< schema−>s i z e ();++ i ){ 12 (∗ schema ) [ i ]−>set unknown ( data [ i ] ) ; 13 } 14 }

: : r , 17 const std : : s t r i n g& val ) { 18 S i z e l a b e l = l ab e l I n f o−>add value ( va l ) ; 19 l a b e l I n f o−>s e t d v a l ( r−>l abe lValue ( ) , l a b e l ) ; 20 } 21

22 void Schema : : s e t i d ( boost : : shared ptr<Record>& r , 23 const std : : s t r i n g& val ) { 24 S i z e id = id In f o−>add value ( val , fa l se ) ; 25 i d In f o−>s e t d v a l ( r−>idValue ( ) , id ) ; 26 } 27

Listing B.62: The header file of class Schema. 1 // c l / da tase t s /schema . hpp 2 #ifndef CLUSLIB SCHEMA HPP 3 #define CLUSLIB SCHEMA HPP 4

5 #include<c l / types . hpp> 6 #include<c l / da ta s e t s / a t t r i n f o . hpp> 7 #include<c l / da ta se t s / c a t t r i n f o . hpp> 8 #include<c l / da ta se t s / da t t r i n f o . hpp> 9 #include<c l / u t i l i t i e s / c on ta in e r . hpp>

10 #include<boost / sha r ed p t r . hpp> 11 #include<vector> 12

13 namespace ClusLib { 14

15 class Record ; 16

17 class Schema : public Container<boost : : shared ptr<Attr In fo> > { 18 public : 19 virtual ˜Schema( ) {} 20

21 Schema∗ c lone ( ) const ; 22 boost : : shared ptr<DAttrInfo>& l ab e l I n f o ( ) ; 23 const boost : : shared ptr<DAttrInfo>& l a b e l I n f o ( ) const ; 24 boost : : shared ptr<DAttrInfo>& id I n f o ( ) ; 25 const boost : : shared ptr<DAttrInfo>& id In f o ( ) const ; 26 void s e t l a b e l ( boost : : shared ptr<Record>& r , 27 const std : : s t r i n g& val ) ; 28 void s e t i d ( boost : : shared ptr<Record>& r , 29 const std : : s t r i n g& val ) ; 30 bool i s l a b e l l e d ( ) const ; 31

32 virtual bool equa l ( const Schema& o ) const ; 33 virtual bool e qua l no l a be l ( const Schema& o ) const ; 34 virtual bool operator==(const Schema& o ) const ; 35 virtual bool operator !=( const Schema& o ) const ; 36 virtual bool is member ( const Att r In f o& i n f o ) const ; 37

38 protected : 39 boost : : shared ptr<DAttrInfo> l a b e l I n f o ; 40 boost : : shared ptr<DAttrInfo> i d I n f o ; 41 } ; 42

43 in l ine bool Schema : : operator==(const Schema& o ) const { 44 return equa l ( o ) ;

o ) const { 48 return ! equa l ( o ) ; 49 } 50

51 in l ine boost : : shared ptr<DAttrInfo>& Schema : : l a b e l I n f o ( ) { 52 return l a b e l I n f o ; 53 } 54

55 in l ine const boost : : shared ptr<DAttrInfo>& Schema : : l a b e l I n f o ( ) 56 const { 57 return l a b e l I n f o ; 58 } 59

60 in l ine boost : : shared ptr<DAttrInfo>& Schema : : i d I n f o ( ) { 61 return i d I n f o ; 62 } 63

64 in l ine const boost : : shared ptr<DAttrInfo>& Schema : : i d In f o ( ) 65 const { 66 return i d I n f o ; 67 } 68 } 69 #endif

Listing B.63: The source file of class Schema. 1 // c l / da tase t s /schema . cpp 2 #include<c l / da ta s e t s /schema . hpp> 3 #include<c l / e r r o r s . hpp> 4

5 namespace ClusLib { 6

7 Schema∗ Schema : : c lone ( ) const { 8 Schema∗ r e t = new Schema ( ) ; 9 for ( S i z e i =0; i< data . s i z e ();++ i ){

10 re t−>add ( 11 boost : : shared ptr<Attr In fo >( data [ i ]−>c lone ( ) ) ) ; 12 } 13 re t−>l a b e l I n f o ( ) = boost : : shared ptr<DAttrInfo>( 14 l a b e l I n f o−>c lone ( ) ) ; 15 re t−>i d In f o ( ) = boost : : shared ptr<DAttrInfo>( 16 i d In f o−>c lone ( ) ) ; 17 return r e t ; 18 } 19

20 bool Schema : : i s l a b e l l e d ( ) const { 21 i f ( l a b e l I n f o ){ 22 return true ; 23 } else { 24 return fa l se ; 25 } 26 } 27

28 bool Schema : : equa l ( const Schema& o ) const { 29 i f ( i s l a b e l l e d ( ) ˆ o . i s l a b e l l e d ( ) ){ 30 return fa l se ; 31 } 32

33 i f ( i s l a b e l l e d ( ) && ∗ l a b e l I n f o != ∗( o . l a b e l I n f o ( ) ) ){ 34 return fa l se ; 35 } 36

37 return e qu a l n o l a b e l ( o ) ; 38 } 39

e ( 42 return fa l se ; 43 } 44

45 for ( S i z e i =0; i< data . s i z e ();++ i ){ 46 i f (∗ ( data [ i ] ) != ∗( o [ i ] ) ) { 47 return fa l se ; 48 } 49 } 50

51 return true ; 52 } 53

54 bool Schema : : is member ( const Att r In f o& i n f o ) const { 55 for ( S i z e i =0; i< data . s i z e ();++ i ){ 56 i f (∗ ( data [ i ] ) == in f o ){ 57 return true ; 58 } 59 } 60

61 return fa l se ; 62 } 63 }

Listing B.64: The header file of class Dataset. 1 // c l / da tase t s / datase t . hpp 2 #ifndef CLUSLIB DATASET HPP 3 #define CLUSLIB DATASET HPP 4

5 #include<c l / da ta s e t s / record . hpp> 6 #include<c l / da ta s e t s /schema . hpp> 7 #include<c l / u t i l i t i e s / c on ta in e r . hpp> 8 #include<vector> 9 #include<iostream>

11 namespace ClusLib { 12

13 class Dataset : public Container<boost : : shared ptr<Record> > { 14 public : 15 friend std : : ostream& operator<<(std : : ostream& os , 16 const Dataset& ds ) ; 17

18 Dataset ( const boost : : shared ptr<Schema>&); 19 Dataset ( const Dataset&); 20

21 S i z e num attr ( ) const ; 22 const boost : : shared ptr<Schema>& schema ( ) const ; 23 AttrValue& operator ( ) ( S i z e i , S i z e j ) ; 24 const AttrValue& operator ( ) ( S i z e i , S i z e j ) const ; 25 bool i s numer i c ( ) const ; 26 bool i s c a t e g o r i c a l ( ) const ; 27 void save ( const std : : s t r i n g& f i l ename ) const ; 28 std : : vector<Size> get CM () const ; 29 Dataset& operator=(const Dataset&); 30

31 protected : 32 void pr in t ( std : : ostream& os ) const ; 33

34 boost : : shared ptr<Schema> schema ;

( { 38 return schema−>s i z e ( ) ; 39 } 40

41 in l ine const boost : : shared ptr<Schema>& Dataset : : schema ( ) 42 const { 43 return schema ; 44 } 45

46 in l ine AttrValue& Dataset : : operator ( ) ( S i z e i , S i z e j ) { 47 return (∗ data [ i ] ) [ j ] ; 48 } 49

50 in l ine const AttrValue& 51 Dataset : : operator ( ) ( S i z e i , S i z e j ) const { 52 return (∗ data [ i ] ) [ j ] ; 53 } 54

Listing B.65: The source file of class Dataset. 1 // c l / da tase t s / datase t . cpp 2 #include<c l / da ta s e t s / datase t . hpp> 3 #include<boost / l e x i c a l c a s t . hpp> 4 #include<f stream> 5 #include<sstream> 6

7 namespace ClusLib { 8

9 std : : ostream& operator<<(std : : ostream& os , 10 const Dataset& ds ) { 11 ds . p r i n t ( os ) ; 12 return os ; 13 } 14

15 void Dataset : : p r i n t ( std : : ostream& os ) const { 16 os<<”Number o f r e c o rd s : ”<<s i z e ()<< ’ \n ’ ; 17 os<<”Number o f a t t r i b u t e s : ”<<num attr()<< ’ \n ’ ; 18 In t e ge r n = 0 ; 19 for ( S i z e i =0; i<num attr ( ) ; ++i ) { 20 i f ( (∗ schema ) [ i ]−> c an ca s t t o c ( ) ){ 21 ++n ; 22 } 23 } 24 os<<”Number o f numerical a t t r i b u t e s : ”<<n<< ’\n ’ ; 25 os<<”Number o f c a t e g o r i c a l a t t r i b u t e s : ” 26 <<num attr ()−n<< ’ \n ’ ; 27 } 28

29 Dataset : : Dataset ( const boost : : shared ptr<Schema>& schema ) 30 : schema( schema ) { 31 } 32

33 Dataset : : Dataset ( const Dataset& other ) { 34 schema = 35 boost : : shared ptr<Schema>(other . schema()−>c lone ( ) ) ; 36

37 for ( S i z e i =0; i<other . s i z e ();++ i ) { 38 boost : : shared ptr<Record> tmp = 39 boost : : shared ptr<Record>(new Record ( schema ) ) ; 40 for ( S i z e j =0; j< schema−>s i z e ();++ j ){ 41 (∗ tmp ) [ j ] = other ( i , j ) ;

) ; 44 } 45 } 46

47 Dataset& Dataset : : operator=(const Dataset& other ) { 48 i f ( this != &other ) { 49 schema = boost : : shared ptr<Schema>( 50 other . schema()−> c lone ( ) ) ; 51

52 for ( S i z e i =0; i<other . s i z e ();++ i ) { 53 boost : : shared ptr<Record> tmp = 54 boost : : shared ptr<Record>( 55 new Record ( schema ) ) ; 56 for ( S i z e j =0; j< schema−>s i z e ();++ j ){ 57 (∗ tmp ) [ j ] = other ( i , j ) ; 58 } 59 data . push back (tmp ) ; 60 } 61 } 62

63 return ∗ this ; 64 } 65

66 bool Dataset : : i s numer i c ( ) const { 67 bool r e t = true ; 68 for ( S i z e i =0; i< schema−>s i z e ();++ i ){ 69 i f ( ! (∗ schema ) [ i ]−>c an c a s t t o c ( ) ) { 70 r e t = fa l se ; 71 } 72 } 73

74 return r e t ; 75 } 76

77 bool Dataset : : i s c a t e g o r i c a l ( ) const { 78 bool r e t = true ; 79 for ( S i z e i =0; i< schema−>s i z e ();++ i ){ 80 i f ( ! (∗ schema ) [ i ]−>c an c a s t t o d ( ) ) { 81 r e t = fa l se ; 82 } 83 } 84

85 return r e t ; 86 } 87

88 void Dataset : : save ( const std : : s t r i n g& f i l ename ) const { 89 boost : : shared ptr<DAttrInfo> l a b e l = schema−>l a b e l I n f o ( ) ; 90 boost : : shared ptr<DAttrInfo> id = schema−>i d In f o ( ) ; 91 std : : s t r i ng s t r e am ss ; 92 for ( S i z e i =0; i< data . s i z e ( ) ; ++i ) { 93 ss<<id−> i n t t o s t r ( data [ i ]−>g e t i d ( ) ) ; 94 for ( S i z e j =0; j< schema−>s i z e ( ) ; ++j ) { 95 ss<<” , ” ; 96 i f ( (∗ schema ) [ j ]−> c a n c a s t t o c ( ) ) { 97 ss <<(∗ schema ) [ j ]−>g e t c v a l ( (∗ data [ i ] ) [ j ] ) ; 98 } else { 99 S i z e va l =

100 (∗ schema ) [ j ]−>g e t d va l ( (∗ data [ i ] ) [ j ] ) ; 101 ss <<(∗ schema ) [ j ]−>c a s t t o d ( ) . i n t t o s t r ( 102 va l ) ; 103 } 104 } 105 i f ( l a b e l ) { 106 ss<<” , ”<<l ab e l−>i n t t o s t r ( data [ i ]−>g e t l a b e l ( ) ) ; 107 } 108 ss<<std : : endl ;

111 std : : o f s t ream o f ( f i l ename . c s t r ( ) ) ; 112 ASSERT( o f . good ( ) , ” can not open f i l e ” << f i l ename ) ; 113 of<<s s . s t r ( ) ; 114 o f . c l o s e ( ) ; 115

116 s s . s t r ( ”” ) ; 117 ss<<”This i s the schema f i l e f o r datase t ” 118 <<f i l ename<<std : : endl ; 119 ss<<” /// : schema”<<std : : endl ; 120 ss<<” 1 , RecordID”<<std : : endl ; 121 for ( S i z e j =0; j< schema−>s i z e ( ) ; ++j ) { 122 ss<<j+2<<” , ” ; 123 i f ( (∗ schema ) [ j ]−> c a n c a s t t o c ( ) ) { 124 ss<<”Continuous” ; 125 } else { 126 ss<<” Di s c r e t e ” ; 127 } 128 ss<<std : : endl ; 129 } 130 i f ( l a b e l ) { 131 ss<< schema−>s i z e ()+2<<” , Class ”<<std : : endl ; 132 } 133

134 S i z e ind = f i l ename . f i n d l a s t o f ( ’ . ’ ) ; 135 ASSERT( ind != std : : s t r i n g : : npos , 136 f i l ename << ” i s an i n v a l i d f i l e name” ) ; 137 std : : s t r i n g schema f i l e = 138 f i l ename . sub s t r (0 , ind+1) + ”names” ; 139

140 o f . open ( s ch ema f i l e . c s t r ( ) ) ; 141 ASSERT( o f . good ( ) , ” can not open f i l e ” << s ch ema f i l e ) ; 142 of<<s s . s t r ( ) ; 143 o f . c l o s e ( ) ; 144 } 145

146 std : : vector<Size> Dataset : : get CM () const { 147 std : : vector<Size> CM; 148 boost : : shared ptr<DAttrInfo> l a b e l = schema−>l a b e l I n f o ( ) ; 149 i f ( ! l a b e l ) { 150 return CM; 151 } 152

153 for ( S i z e i =0; i< data . s i z e ( ) ; ++i ) { 154 CM. push back ( l ab e l−>g e t d va l ( 155 data [ i ]−> l abe lValue ( ) ) ) ; 156 } 157

Listing B.66: Makefile.am in cl/distances. 1 noinst LTLIBRARIES = l ibD i s t an c e s . l a 2

3 AM CPPFLAGS = −I${ t o p s r c d i r } −I$ { t op bu i l dd i r } 4

5 t h i s i n c l u d e d i r=${ i n c l ud ed i r }/${ subd i r } 6 this include HEADERS = \ 7 a l l . hpp \ 8 d i s t anc e . hpp \ 9 euc l i d e and i s t anc e . hpp \

10 mahalanob i sd i stance . hpp \ 11 minkowskidistance . hpp \ 12 mixedd i stance . hpp \ 13 s implematch ingd i stance . hpp 14

15 l ibDistances la SOURCES = \ 16 euc l i d e and i s t anc e . cpp \ 17 mahalanob i sd i stance . cpp \ 18 minkowskidistance . cpp \ 19 mixedd i stance . cpp \ 20 s implematch ingd i stance . cpp 21

22 a l l . hpp : Make f i l e . am 23 echo ”// This f i l e i s generated . P lease do not e d i t ! ” > $@ 24 echo >> $@ 25 f o r i in $ ( f i l t e r −out a l l . hpp , $ ( this include HEADERS ) ) ; \ 26 do \ 27 echo ”#inc l ude <${ subd i r }/ $$i>” >> $@; \ 28 done 29 echo >> $@ 30 subd i r s=’$ (SUBDIRS ) ’ ; f o r i in $$subd i r s ; do \ 31 echo ”#inc l ude <${ subd i r }/ $$ i / a l l . hpp>” >> $@ ; \ 32 done

Listing B.67: The header file of class Distance. 1 // c l / dis tances /dis tance . hpp 2 #ifndef CLUSLIB DISTANCE HPP 3 #define CLUSLIB DISTANCE HPP 4

5 #include<c l / types . hpp> 6 #include<c l / da ta s e t s / record . hpp> 7 #include<c l / e r r o r s . hpp> 8 #include<f un c t i ona l> 9 #include<boost / sha r ed p t r . hpp>

12 namespace ClusLib { 13

14 class Distance : std : : b i na ry func t i on< 15 boost : : shared ptr<Record >, {

19 Distance ( const std : : s t r i ng &name ) ; 20

21 const std : : s t r i n g& name( ) const ; 22 virtual Real operator ( ) ( const boost : : shared ptr<Record>&, 23 const boost : : shared ptr<Record>& ) const = 0; 24

25 protected : 26 std : : s t r i n g name ; 27 } ; 28

29 in l ine Distance : : Distance ( const std : : s t r i n g& name) 30 : name (name) { 31 } 32

33 in l ine const std : : s t r i n g& Distance : : name ( ) const { 34 return name ; 35 } 36

Listing B.68: The header file of class EuclideanDistance. 1 // c l / dis tances / euc l ideandis tance . hpp 2 #ifndef CLUSLIB EUCLIDEAN DISTANCE 3 #define CLUSLIB EUCLIDEAN DISTANCE 4

5 #include<c l / d i s t anc e s /minkowskidistance . hpp> 6

8 namespace ClusLib { 9

10 class Euc l ideanDistance : public MinkowskiDistance { 11 public : 12 Euc l ideanDistance ( ) ; 13 Real operator ( ) ( const boost : : shared ptr<Record>&, 14 const boost : : shared ptr<Record>& ) const ; 15 } ; 16

Listing B.69: The source file of class EuclideanDistance. 1 // c l / dis tances / euc l ideandis tance . cpp 2 #include<c l / d i s t anc e s / euc l i d eand i s t anc e . hpp> 3

4 namespace ClusLib { 5

6 Euc l ideanDistance : : Euc l ideanDistance ( ) 7 : MinkowskiDistance ( 2 . 0 ) { 8 name = ”Eucl idean d i s t anc e ” ; 9 }

11 Real Euc l ideanDistance : : operator ( ) ( 12 const boost : : shared ptr<Record> &x ,

: : ( ) ( , y ) ; 15 } 16 }

Listing B.70: The header file of class MahalanobisDistance. 1 // c l / dis tances /mahalanobisdistance . hpp 2 #ifndef CLUSLIB MAHALANOBISDISTANCE HPP 3 #define CLUSLIB MAHALANOBISDISTANCE HPP 4

5 #include<c l / d i s t anc e s / d i s t anc e . hpp> 6 #include<c l / e r r o r s . hpp> 7 #include<c l / types . hpp> 8 #include<c l / u t i l i t i e s /matrix . hpp> 9

10 namespace ClusLib { 11

12 class MahalanobisDistance : public Distance { 13 public : 14 MahalanobisDistance ( const 15 ublas : : symmetric matrix<Real> &sigma ) ; 16 Real operator ( ) ( const boost : : shared ptr<Record>&, 17 const boost : : shared ptr<Record>&) const ; 18

19 protected : 20 ub las : : t r i angu l a r mat r i x<Real> A ; 21 } ; 22

23 in l ine MahalanobisDistance : : MahalanobisDistance ( const 24 ublas : : symmetric matrix<Real> &sigma ) 25 : D i stance ( ”Mahalanobis d i s t anc e ” ) { 26 ub las : : t r i angu l a r mat r i x<Real> Lm; 27 Lm. r e s i z e ( sigma . s i z e 1 ( ) , sigma . s i z e 1 ( ) ) ; 28 A . r e s i z e ( sigma . s i z e 1 ( ) , sigma . s i z e 1 ( ) ) ; 29 S i z e k = chol ( sigma , Lm) ; 30 ASSERT(k==0, ” i n v a l i d e covar iance matrix ” ) ; 31 k = t r i a n gu l a r ma t r i x i nv e r s e (Lm, A ) ; 32 ASSERT(k==0, ” i n v e r s i n g t r i a ngu l a r matrix f a i l e d ” ) ; 33 } 34

Listing B.71: The source file of class MahalanobisDistance. 1 // c l / dis tances /mahalanobisdistance . cpp 2 #include<c l / d i s t anc e s /mahalanob i sd i s tance . hpp> 3 #include<c l / da ta s e t s / record . hpp> 4 #include<cmath> 5

6 namespace ClusLib { 7

8 Real MahalanobisDistance : : operator ( ) ( 9 const boost : : shared ptr<Record> &x ,

10 const boost : : shared ptr<Record> &y) const { 11 boost : : shared ptr<Schema> schema = x−>schema ( ) ; 12 ASSERT(∗ schema==∗(y−>schema ( ) ) , ”schema does not match” ) ; 13 ASSERT( schema−>s i z e ( ) == A . s i z e 1 ( ) ,

;

16 ublas : : vector<Real> v( schema−>s i z e ( ) ) ; 17 for ( S i z e i =0; i<schema−>s i z e ( ) ; ++i ) { 18 v ( i ) = (∗ schema ) [ i ]−>d i s t anc e ((∗ x ) [ i ] , ( ∗ y ) [ i ] ) ; 19 } 20

21 ublas : : vector<Real> w; 22 w. r e s i z e (v . s i z e ( ) ) ; 23 ublas : : axpy prod ( A , v , w, true ) ; 24

25 return std : : sq r t ( ub las : : inner prod (w,w) ) ; 26 } 27

Listing B.72: The header file of class MinkowskiDistance. 1 // c l / dis tances /minkowskidistance . hpp 2 #ifndef CLUSLIB MINKOWSKIDISTANCE HPP 3 #define CLUSLIB MINKOWSKIDISTANCE HPP 4

5 #include<c l / d i s t anc e s / d i s t anc e . hpp> 6 #include<c l / e r r o r s . hpp> 7

8 namespace ClusLib { 9

10 class MinkowskiDistance : public Distance { 11 public : 12 MinkowskiDistance ( ) ; 13 MinkowskiDistance ( Real p ) ; 14 Real operator ( ) ( const boost : : shared ptr<Record>&, 15 const boost : : shared ptr<Record>&) const ; 16

17 protected : 18 Real p ; 19 } ; 20

21 in l ine MinkowskiDistance : : MinkowskiDistance ( ) 22 : D i stance ( ”Minkowski d i s t anc e ” ) , p ( 2 . 0 ) { 23 } 24

25 in l ine MinkowskiDistance : : MinkowskiDistance ( Real p) 26 : D i stance ( ”Minkowski d i s t anc e ” ) , p (p) { 27 ASSERT( p>=1, ” i n va l i d e parameter ” ) ; 28 } 29

Listing B.73: The source file of class MinkowskiDistance. 1 // c l / dis tances /minkowskidistance . cpp 2 #include<c l / d i s t anc e s /minkowskidistance . hpp> 3 #include<c l / da ta s e t s / record . hpp> 4 #include<cmath> 5

6 namespace ClusLib { 7

,

10 const boost : : shared ptr<Record> &y) const { 11 boost : : shared ptr<Schema> schema = x−>schema ( ) ; 12 ASSERT(∗ schema==∗(y−>schema ( ) ) , ”schema does not match” ) ; 13

14 Real temp = 0 . 0 ; 15 for ( S i z e i =0; i<schema−>s i z e ();++ i ){ 16 temp += std : : pow( std : : f abs ( 17 (∗ schema ) [ i ]−>d i s t anc e ((∗ x ) [ i ] , ( ∗ y ) [ i ] ) ) , p ) ; 18 } 19

20 return std : : pow( temp ,1/ p ) ; 21 } 22

Listing B.74: The header file of class MixedDistance. 1 // c l / dis tances /mixeddistance . hpp 2 #ifndef CLUSLIB MIXEDDISTANCE HPP 3 #define CLUSLIB MIXEDDISTANCE HPP 4

5 #include<c l / d i s t anc e s / euc l i d eand i s t anc e . hpp> 6 #include<c l / d i s t anc e s / s implematch ingd i stance . hpp> 7 #include<c l / types . hpp> 8

9 namespace ClusLib { 10

11 class MixedDistance : public Distance { 12 public : 13 MixedDistance ( ) ; 14 MixedDistance ( Real beta ) ; 15 Real operator ( ) ( const boost : : shared ptr<Record>&, 16 const boost : : shared ptr<Record>& ) const ; 17

18 protected : 19 Real beta ; 20 } ; 21

22 in l ine MixedDistance : : MixedDistance ( ) 23 : D i stance ( ”Mixed Distance” ) , beta ( 1 . 0 ) { 24 } 25

26 in l ine MixedDistance : : MixedDistance ( Real beta ) 27 : D i stance ( ”Mixed Distance” ) , beta ( beta ) { 28 } 29 } 30

Listing B.75: The source file of class MixedDistance. 1 // c l / dis tances /mixeddistance . cpp 2 #include<c l / d i s t anc e s /mixedd i stance . hpp> 3 #include<cmath> 4 #include<iostream> 5

6 namespace ClusLib { 7

,

10 const boost : : shared ptr<Record> &y) const { 11 boost : : shared ptr<Schema> schema = x−>schema ( ) ; 12 ASSERT(∗ schema==∗(y−>schema ( ) ) , ”schema does not match” ) ; 13

14 Real d1 = 0 . 0 ; 15 Real d2 = 0 . 0 ; 16 for ( S i z e i =0; i<schema−>s i z e ();++ i ){ 17 i f ( (∗ schema ) [ i ]−> c an c a s t t o c ( ) ) { 18 d1 += std : : pow( std : : f abs ( 19 (∗ schema ) [ i ]−>d i s t anc e ((∗ x ) [ i ] , ( ∗ y ) [ i ] ) ) , 2 . 0 ) ; 20 } else { 21 d2 += (∗ schema ) [ i ]−>d i s t anc e ((∗ x ) [ i ] , ( ∗ y ) [ i ] ) ; 22 } 23 } 24

25 return d1 + beta ∗d2 ; 26 } 27 }

Listing B.76: The header file of class SimpleMatchingDistance. 1 // c l / dis tances /simplematchingdistance . hpp 2 #ifndef CLUSLIB SIMPLEMATCHINGDISTANCE HPP 3 #define CLUSLIB SIMPLEMATCHINGDISTANCE HPP 4

5 #include<c l / d i s t anc e s / d i s t anc e . hpp> 6

7 namespace ClusLib { 8

9 class SimpleMatchingDistance : public Distance { 10 public : 11 SimpleMatchingDistance ( ) ; 12 Real operator ( ) ( const boost : : shared ptr<Record>&, 13 const boost : : shared ptr<Record>&) const ; 14 } ; 15

16 in l ine SimpleMatchingDistance : : S impleMatchingDistance ( ) 17 : D i stance ( ”Simple Matching d i s t anc e ” ) { 18 } 19

Listing B.77: The source file of class SimpleMatchingDistance. 1 // c l / dis tances /simplematchingdistance . cpp 2 #include<c l / d i s t anc e s / s implematch ingd i stance . hpp> 3 #include<c l / da ta s e t s / record . hpp> 4 #include<cmath> 5

6 namespace ClusLib { 7

8 Real SimpleMatchingDistance : : operator ( ) ( 9 const boost : : shared ptr<Record> &x ,

10 const boost : : shared ptr<Record> &y) const { 11 boost : : shared ptr<Schema> schema = x−>schema ( ) ; 12 ASSERT(∗ schema==∗(y−>schema ( ) ) , ”schema does not match” ) ;

15 for ( S i z e i =0; i<schema−>s i z e ();++ i ){ 16 temp += (∗ schema ) [ i ]−>d i s t anc e ((∗ x ) [ i ] , ( ∗ y ) [ i ] ) ; 17 } 18

19 return temp ; 20 } 21

Listing B.78: Makefile.am in cl/patterns. 1 noinst LTLIBRARIES = l i bPa t t e r n s . l a 2

3 AM CPPFLAGS = −I${ t o p s r c d i r } −I$ { t op bu i l dd i r } 4

5 t h i s i n c l u d e d i r=${ i n c l ud ed i r }/${ subd i r } 6 this include HEADERS = \ 7 a l l . hpp \ 8 dendrogramvis i tor . hpp \ 9 i n t e rna lnode . hpp \

10 j o i n v a l u e v i s i t o r . hpp \ 11 l e a f node . hpp \ 12 node . hpp \ 13 nod ev i s i t o r . hpp \ 14 p c v i s i t o r . hpp 15

17 l ibPatterns la SOURCES = \ 18 dendrogramvis i tor . cpp \ 19 i n t e rna lnode . cpp \ 20 j o i n v a l u e v i s i t o r . cpp \ 21 l e a f node . cpp \ 22 p c v i s i t o r . cpp 23

24 a l l . hpp : Make f i l e . am 25 echo ”// This f i l e i s generated . P lease do not e d i t ! ” > $@ 26 echo >> $@ 27 f o r i in $ ( f i l t e r −out a l l . hpp , $ ( this include HEADERS ) ) ; \ 28 do \ 29 echo ”#inc l ude <${ subd i r }/ $$i>” >> $@; \ 30 done 31 echo >> $@ 32 subd i r s=’$ (SUBDIRS ) ’ ; f o r i in $$subd i r s ; do \ 33 echo ”#inc l ude <${ subd i r }/ $$ i / a l l . hpp>” >> $@ ; \ 34 done

Listing B.79: The header file of class DendrogramVisitor. 1 // c l / pat terns /dendrogramvisitor . hpp 2 #ifndef CLUSLIB DENDROGRAMVISITORHPP 3 #define CLUSLIB DENDROGRAMVISITORHPP 4

5 #include<c l / pa t t e rn s/ l e a f node . hpp> 6 #include<c l / pa t t e rn s/ in t e rna lnode . hpp> 7 #include<c l / pa t t e rn s/ n odev i s i t o r . hpp> 8 #include<c l / u t i l i t i e s /dendrogram . hpp> 9 #include<iostream>

12 namespace ClusLib { 13

14 class DendrogramVisitor : public NodeVis i tor { 15 public : 16 DendrogramVisitor( Real hjv , 17 S i z e l l e v e l , S i z e h l e v e l ) ; 18 void v i s i t ( LeafNode& node ) ; 19 void v i s i t ( Inte rna lNode& node ) ; 20 void save ( const std : : s t r i n g &f i l ename ) ; 21

22 private : 23 Dendrogram dg ; 24 S i z e c u t l e v e l ; 25 S i z e count ; 26 Real l e f tMarg in ; 27 Real bottomMargin ; 28 Real boxx ; 29 Real boxy ; 30 Real h e i gh t ; 31 Real width ; 32 Real h j v ; 33 Real gap ; 34 bool drawLabel ; 35 std : : map<Size , s td : : pair<Size , Size> > l i n e s ; 36 std : : map<Size , s td : : pair<Real , Real> > po i n t s ; 37

38 Real ge t x ( S i z e id ) ; 39 void drawLink ( S i z e id0 , S i z e id1 ) ; 40 } ; 41

Listing B.80: The source file of class DendrogramVisitor. 1 // c l / pat terns /dendrogramvisitor . cpp 2 #include<c l / pa t t e rn s/ dendrogramvis i tor . hpp> 3 #include<c l / e r r o r s . hpp> 4 #include<sstream> 5

6 namespace ClusLib { 7

8 DendrogramVisitor : : DendrogramVisitor ( Real hjv , 9 S i z e l l e v e l , S i z e h l e v e l ) : c u t l e v e l ( l l e v e l ) , count (0 ) ,

10 l e f tMarg in (30 ) , bottomMargin (20 ) , 11 h jv ( hjv ) , gap (15 ) , drawLabel ( true ) { 12 ASSERT( h l eve l>=l l e v e l , ” h l e v e l must >= l l e v e l ” ) ; 13

14 Real x1 , y1 , x2 , y2 ;

17 width = 390; 18 he i gh t = 540; 19 S i z e numLeaves = h l ev e l − l l e v e l + 1 ; 20 i f ( numLeaves > 60) { 21 drawLabel = fa l se ; 22 } 23 i f ( gap ∗numLeaves > he i gh t − bottomMargin ) { 24 gap = ( he i gh t − bottomMargin ) / numLeaves ; 25 } else { 26 he i gh t = gap ∗numLeaves + bottomMargin ; 27 } 28

29 dg . setbox ( boxx , boxy , boxx+ width , boxy+ he i gh t ) ; 30 } 31

32 void DendrogramVisitor : : v i s i t ( LeafNode& node ) { 33 Real x = boxx + l e f tMarg in ; 34 Real y = bottomMargin + boxy + gap ∗ count ; 35 ++ count ; 36 dg . drawCirc l e (x , y , 1 . 5 ) ; 37 i f ( drawLabel ) { 38 std : : s t r i ng s t r e am ss ; 39 ss<<node . g e t i d ( ) ; 40 dg . drawText (x , y , s s . s t r ( ) ) ; 41 } 42 po i n t s . i n s e r t ( std : : pair<Size , s td : : pair<Real , Real> >( 43 node . g e t i d ( ) , 44 std : : pair<Real , Real>(x , y ) ) ) ; 45 } 46

47 void DendrogramVisitor : : v i s i t ( Inte rna lNode& node ) { 48 i f ( node . num chi ldren ( ) != 2){ 49 FAIL( ”DendrogramVisitor only hand le s ” << 50 ”nodes with 2 ch i l d r en ” ) ; 51 } 52

53 Real x , y ; 54 i f ( node . g e t l e v e l ( ) > c u t l e v e l ) { 55 l i n e s . i n s e r t ( std : : pair<Size , s td : : pair<Size , Size> >( 56 node . g e t i d ( ) , 57 std : : pair<Size , Size >(node[0]−> g e t i d ( ) , 58 node[1]−> g e t i d ( ) ) ) ) ; 59 x = ( node . g e t j o i nVa lue ( ) ) ∗ ( width − l e f tMarg in ) 60 / h jv + l e f tMarg in + boxx ; 61 po i n t s . i n s e r t ( std : : pair<Size , s td : : pair<Real , Real> >( 62 node . g e t i d ( ) , 63 std : : pair<Real , Real>(x , Null<Real > ( ) ) ) ) ; 64 node[0]−> accept (∗ this ) ; 65 node[1]−> accept (∗ this ) ; 66 } else { 67 x = boxx + l e f tMarg in ; 68 y = bottomMargin + boxy + gap ∗ count ; 69 ++ count ; 70 dg . drawDot (x , y ) ; 71 i f ( drawLabel ) { 72 std : : s t r i ng s t r e am ss ; 73 ss<<node . g e t i d ( ) ; 74 dg . drawText (x , y , s s . s t r ( ) ) ; 75 } 76 po i n t s . i n s e r t ( std : : pair<Size , s td : : pair<Real , Real> >( 77 node . g e t i d ( ) , 78 std : : pair<Real , Real>(x , y ) ) ) ; 79 } 80 } 81

) { : : i t e r a t o r i t ; 84 S i z e top id = 0 ; 85 for ( i t = l i n e s . begin ( ) ; i t != l i n e s . end ( ) ; ++i t ) { 86 i f ( i t−> f i r s t > top id ) { 87 top id = i t−> f i r s t ; 88 } 89 } 90 po i n t s [ top id ] . second = ge t x ( top id ) ; 91 for ( i t = l i n e s . begin ( ) ; i t != l i n e s . end ( ) ; ++i t ) { 92 drawLink ( i t−>second . f i r s t , i t−> f i r s t ) ; 93 drawLink ( i t−>second . second , i t−> f i r s t ) ; 94 } 95 dg . save ( f i l ename ) ; 96 } 97

98 Real DendrogramVisitor : : g e t x ( S i z e id ) { 99 S i z e id0 = l i n e s [ id ] . f i r s t ;

100 S i z e id1 = l i n e s [ id ] . second ; 101

102 Real x1 , x2 ; 103 i f ( p o i n t s [ id0 ] . second == Null<Real >()) { 104 x1 = ge t x ( id0 ) ; 105 po i n t s [ id0 ] . second = x1 ; 106 i f ( p o i n t s [ id1 ] . second == Null<Real >()) { 107 x2 = ge t x ( id1 ) ; 108 po i n t s [ id1 ] . second = x2 ; 109 } else { 110 x2 = po i n t s [ id1 ] . second ; 111 } 112 } else { 113 x1 = po i n t s [ id0 ] . second ; 114 i f ( p o i n t s [ id1 ] . second == Null<Real >()) { 115 x2 = ge t x ( id1 ) ; 116 po i n t s [ id1 ] . second = x2 ; 117 } else { 118 x2 = po i n t s [ id1 ] . second ; 119 } 120 } 121

122 return 0 . 5∗ ( x1 + x2 ) ; 123 } 124

125 void DendrogramVisitor : : drawLink ( S i z e id0 , S i z e id1 ) { 126 Real x1 = po i n t s [ id0 ] . f i r s t ; 127 Real y1 = po i n t s [ id0 ] . second ; 128 Real x2 = po i n t s [ id1 ] . f i r s t ; 129 Real y2 = po i n t s [ id1 ] . second ; 130 i f ( x1 == boxx + l e f tMarg in ) { 131 x1 += 1 . 5 ; 132 } 133 dg . drawLine (x1 , y1 , x2 , y1 ) ; 134 dg . drawLine (x2 , y1 , x2 , y2 ) ; 135 } 136 }

Listing B.81: The header file of class InternalNode. 1 // c l / pat terns / interna lnode . hpp 2 #ifndef CLUSLIB INTERNALNODE HPP 3 #define CLUSLIB INTERNALNODE HPP

9 namespace ClusLib { 10

11 class Inte rna lNode : public Node , 12 public Container<boost : : shared ptr<Node> >{ 13 public : 14 Inte rna lNode( S i z e id = 0 , 15 const boost : : shared ptr<Node> p 16 = boost : : shared ptr<Node>() ) ; 17 Inte rna lNode( Real joinValue , 18 S i z e id = 0 , 19 const boost : : shared ptr<Node> p 20 = boost : : shared ptr<Node>() ) ; 21

22 void accept ( NodeVis i tor &v ) ; 23 S i z e num chi ldren ( ) const ; 24 S i z e num records ( ) const ; 25 Real ge t j o i nVa lue ( ) ; 26 void s e t j o i nVa l u e ( Real jo inValue ) ; 27

28 private : 29 Real j o i nVa lue ; 30 } ; 31

32 in l ine S i z e Inte rna lNode : : num chi ldren ( ) const { 33 return data . s i z e ( ) ; 34 } 35

36 in l ine Real Inte rna lNode : : g e t j o i nVa lue ( ) { 37 return j o i nVa lue ; 38 } 39

40 in l ine void Inte rna lNode : : s e t j o i nVa l u e ( Real jo inValue ) { 41 j o i nVa lue = jo inValue ; 42 } 43

Listing B.82: The source file of class InternalNode. 1 // c l / pat terns / interna lnode . cpp 2 #include<c l / pa t t e rn s/ in t e rna lnode . hpp> 3

4 namespace ClusLib { 5

6 Inte rna lNode : : Inte rna lNode( S i z e id , 7 const boost : : shared ptr<Node> p) 8 : Node (p , id ) { 9 }

11 Inte rna lNode : : Inte rna lNode( Real joinValue , 12 S i z e id , 13 const boost : : shared ptr<Node> p) 14 : Node(p , id ) , j o i nVa lue ( jo inValue ) { 15 } 16

17 void Inte rna lNode : : accept ( NodeVis i tor &v) { 18 v . v i s i t (∗ this ) ; 19 } 20

21 S i z e Inte rna lNode : : num records ( ) const {

e 24 nSum += data [ i ]−>num chi ldren ( ) ; 25 } 26

Listing B.83: The header file of class LeafNode. 1 // c l / pat terns / leafnode . hpp 2 #ifndef CLUSLIB LEAFNODE HPP 3 #define CLUSLIB LEAFNODE HPP 4

5 #include<c l / pa t t e rn s/node . hpp> 6 #include<c l / da ta s e t s / record . hpp> 7

8 namespace ClusLib { 9

10 class LeafNode : public Node { 11 public : 12 LeafNode ( const boost : : shared ptr<Record>& r , 13 S i z e id = 0 , 14 const boost : : shared ptr<Node>& p 15 = boost : : shared ptr<Node>() ) ; 16

17 void accept ( NodeVis i tor &v ) ; 18 S i z e num chi ldren ( ) const ; 19 S i z e num records ( ) const ; 20 boost : : shared ptr<Record> ge t da ta ( ) ; 21

22 private : 23 boost : : shared ptr<Record> data ; 24

27 in l ine S i z e LeafNode : : num chi ldren ( ) const { 28 return 0 ; 29 } 30

31 in l ine S i z e LeafNode : : num records ( ) const { 32 return 1 ; 33 } 34

35 in l ine boost : : shared ptr<Record> LeafNode : : g e t da ta ( ) { 36 return data ; 37 } 38 } 39

Listing B.84: The source file of class LeafNode. 1 // c l / pat terns / leafnode . cpp 2 #include<c l / pa t t e rn s/ l e a f node . hpp> 3

4 namespace ClusLib { 5

8 const boost : : shared ptr<Node>& p) 9 : Node (p , id ) , data ( r ) {

12 void LeafNode : : accept ( NodeVis i tor &v) { 13 v . v i s i t (∗ this ) ; 14 } 15 }

Listing B.85: The header file of class Node. 1 // c l / pat terns /node . hpp 2 #ifndef CLUSLIB NODE HPP 3 #define CLUSLIB NODE HPP 4

5 #include<c l / types . hpp> 6 #include<c l / e r r o r s . hpp> 7 #include<c l / pa t t e rn s/ n odev i s i t o r . hpp> 8 #include<boost / sha r ed p t r . hpp> 9

10 namespace ClusLib { 11

12 class Node { 13 public : 14 virtual ˜Node ( ) {} 15

16 S i z e g e t i d ( ) const ; 17 void s e t i d ( S i z e id ) ; 18 S i z e g e t l e v e l ( ) const ; 19 void s e t l e v e l ( S i z e l e v e l ) ; 20 boost : : shared ptr<Node> ge t pa r en t ( ) ; 21 void s e t pa r en t ( const boost : : shared ptr<Node>& p ) ; 22

23 virtual void accept ( NodeVis i tor &v) = 0 ; 24 virtual S i z e num chi ldren ( ) const = 0; 25 virtual S i z e num records ( ) const = 0; 26

27 protected : 28 Node ( boost : : shared ptr<Node> p , S i z e id ) 29 : parent (p ) , i d ( id ) {} 30

31 boost : : shared ptr<Node> parent ; 32 S i z e i d ; 33 S i z e l e v e l ; 34 } ; 35

36 in l ine S i z e Node : : g e t i d ( ) const { 37 return i d ; 38 } 39

40 in l ine void Node : : s e t i d ( S i z e id ) { 41 i d = id ; 42 } 43

44 in l ine S i z e Node : : g e t l e v e l ( ) const { 45 return l e v e l ; 46 } 47

48 in l ine void Node : : s e t l e v e l ( S i z e l e v e l ) {

52 in l ine boost : : shared ptr<Node> Node : : g e t pa r en t ( ) { 53 return parent ; 54 } 55

56 in l ine void Node : : s e t pa r en t ( 57 const boost : : shared ptr<Node>& p) { 58 parent = p ; 59 } 60 } 61

Listing B.86: The header file of class NodeVisitor. 1 // c l / pat terns / nodev i s i t o r . hpp 2 #ifndef CLUSLIB NODEVISITOR HPP 3 #define CLUSLIB NODEVISITOR HPP 4

5 #include<boost / sha r ed p t r . hpp> 6 #include<c l / types . hpp> 7

8 namespace ClusLib { 9

10 class LeafNode ; 11 class Inte rna lNode ; 12

13 class NodeVis i tor { 14 public : 15 virtual void v i s i t ( LeafNode& node ) = 0 ; 16 virtual void v i s i t ( Inte rna lNode& node ) = 0 ; 17 } ; 18

Listing B.87: The header file of class JoinValueVisitor. 1 // c l / pat terns / j o in v a l u e v i s i t o r . hpp 2 #ifndef CLUSLIB JOINVALUEVISITOR HPP 3 #define CLUSLIB JOINVALUEVISITOR HPP 4

5 #include<c l / u t i l i t i e s /nnmap . hpp> 6 #include<c l / pa t t e rn s/ l e a f node . hpp> 7 #include<c l / pa t t e rn s/ in t e rna lnode . hpp> 8 #include<c l / pa t t e rn s/ n odev i s i t o r . hpp> 9 #include<iostream>

12 namespace ClusLib { 13

14 class Jo inVa lueV i s i t o r : public NodeVis i tor { 15 public : : : os ,

) 19 void v i s i t ( Inte rna lNode& node ) ; 20 const std : : set<iirMapA : : va lue type , compare i i r>& 21 ge t j o i nVa lu e s ( ) const ; 22

23 private : 24 void pr in t ( std : : ostream& os ) const ; 25

26 std : : set<iirMapA : : va lue type , compare i i r> j o i nVa lu e s ; 27 } ; 28

29 in l ine const std : : set<iirMapA : : va lue type , compare i i r>& 30 Jo inVa lueV i s i t o r : : g e t j o i nVa lu e s ( ) const { 31 return j o i nVa lu e s ; 32 } 33 } 34

Listing B.88: The source file of class JoinValueVisitor. 1 // c l / pat terns / j o in v a l u e v i s i t o r . cpp 2 #include<c l / pa t t e rn s/ j o i n v a l u e v i s i t o r . hpp> 3 #include<c l / e r r o r s . hpp> 4 #include<algorithm> 5 #include<set> 6

7 namespace ClusLib { 8

9 void Jo inVa lueV i s i t o r : : p r i n t ( std : : ostream& os ) const { 10 std : : set<iirMapA : : va lue type , compare i i r > : : c o n s t i t e r a t o r 11 i t ; 12 for ( i t = jo i nVa lu e s . begin ( ) ; i t != j o i nVa lu e s . end();++ i t ){ 13 os<<(i t−> f i r s t ) . f i r s t+1<<” , ”<<( i t−> f i r s t ) . second+1 14 <<” , ”<<i t−>second<< ’ \n ’ ; 15 } 16 } 17

18 std : : ostream& operator<<(std : : ostream& os , 19 const Jo inVa lueV i s i t o r& jv ) { 20 j v . p r i n t ( os ) ; 21 return os ; 22 } 23

24 void Jo inVa lueV i s i t o r : : v i s i t ( LeafNode& node ) { 25 } 26

27 void Jo inVa lueV i s i t o r : : v i s i t ( Inte rna lNode& node ) { 28 i f ( node . num chi ldren ( ) != 2){ 29 FAIL( ” Jo inVa lueV i s i t o r only hand le s ” << 30 ”nodes with 2 ch i l d r en ” ) ; 31 } 32

33 j o i nVa lu e s . i n s e r t ( iirMapA : : va lue type ( 34 nnPair ( node[0]−> g e t i d ( ) , node[1]−> g e t i d ( ) ) , 35 node . g e t j o i nVa lue ( ) ) ) ; 36

37 node[0]−> accept (∗ this ) ; 38 node[1]−> accept (∗ this ) ; 39 } 40 }

Listing B.89: The header file of class PCVisitor. 1 // c l / pat terns / p c v i s i t o r . hpp 2 #ifndef CLUSLIB PCVISITOR HPP 3 #define CLUSLIB PCVISITOR HPP 4

5 #include<c l / pa t t e rn s/ l e a f node . hpp> 6 #include<c l / pa t t e rn s/ in t e rna lnode . hpp> 7 #include<c l / c l u s t e r s / p c l u s t e r i ng . hpp> 8 #include<c l / pa t t e rn s/ n odev i s i t o r . hpp> 9 #include<c l / types . hpp>

11 namespace ClusLib { 12

13 class CVis i tor : public NodeVis i tor { 14 public : 15 CVis i tor ( ) ; 16 void v i s i t ( LeafNode& node ) ; 17 void v i s i t ( Inte rna lNode& node ) ; 18 boost : : shared ptr<Cluster> g e t c l u s t e r ( ) ; 19

20 private : 21 boost : : shared ptr<Cluster> c l u s t e r ; 22 } ; 23

24 class PCVisitor : public NodeVis i tor { 25 public : 26 PCVisitor ( PCluste r ing &pc , S i z e c u t l e v e l ) ; 27 void v i s i t ( LeafNode& node ) ; 28 void v i s i t ( Inte rna lNode& node ) ; 29

30 private : 31 PCluste r ing & pc ; 32 S i z e c u t l e v e l ; 33 } ; 34

35 in l ine boost : : shared ptr<Cluster> CVis i tor : : g e t c l u s t e r ( ) { 36 return c l u s t e r ; 37 } 38

Listing B.90: The source file of class PCVisitor. 1 // c l / pat terns / p c v i s i t o r . cpp 2 #include<c l / pa t t e rn s/ p c v i s i t o r . hpp> 3 #include<c l / pa t t e rn s/ j o i n v a l u e v i s i t o r . hpp> 4 #include<c l / types . hpp> 5 #include<iostream> 6

7 namespace ClusLib { 8

9 CVis i tor : : CVis i tor ( ) { 10 c l u s t e r = boost : : shared ptr<Cluster >(new Cluste r ( ) ) ; 11 } 12

13 void CVis i tor : : v i s i t ( LeafNode& node ) { 14 c l u s t e r−>add ( node . g e t da ta ( ) ) ; 15 } 16

17 void CVis i tor : : v i s i t ( Interna lNode& node ) {

20 node [ i ]−>accept (∗ this ) ; 21 } 22 } 23

24 PCVisitor : : PCVisitor ( PCluste r ing &pc , S i z e c u t l e v e l ) 25 : pc ( pc ) , c u t l e v e l ( c u t l e v e l ) { 26 } 27

28 void PCVisitor : : v i s i t ( LeafNode& node ) { 29 boost : : shared ptr<Cluster> c = 30 boost : : shared ptr<Cluster >(new Cluste r ( ) ) ; 31 c−>add ( node . g e t da ta ( ) ) ; 32 pc . add ( c ) ; 33 } 34

35 void PCVisitor : : v i s i t ( Inte rna lNode& node ) { 36 i f ( node . g e t l e v e l ( ) >= cu t l e v e l ) { 37 for ( S i z e i =0; i<node . num chi ldren ();++ i ){ 38 node [ i ]−>accept (∗ this ) ; 39 } 40 } else { 41 CVis i tor cv ; 42 node . accept ( cv ) ; 43 pc . add ( cv . g e t c l u s t e r ( ) ) ; 44 } 45 } 46 }

Listing B.91: Makefile.am in cl/utilities. 1

2 AM CPPFLAGS = −I${ t o p s r c d i r } −I$ { t op bu i l dd i r } 3

4 t h i s i n c l u d e d i r=${ i n c l ud ed i r }/${ subd i r } 5 this include HEADERS = \ 6 a l l . hpp \ 7 matrix . hpp \ 8 con ta in e r . hpp \ 9 dataadapter . hpp \

10 data se tg ene ra to r . hpp \ 11 data se tno rma l i z e r . hpp \ 12 data se t r e ade r . hpp \ 13 dendrogram . hpp \ 14 nnmap . hpp \ 15 nu l l . hpp 16

17 noinst LTLIBRARIES = l i b U t i l i t i e s . l a 18 l ibUt i l i t i e s la SOURCES = \ 19 matrix . cpp \ 20 data se tg ene ra to r . cpp \ 21 data se tno rma l i z e r . cpp \ 22 data se t r e ade r . cpp \

25 a l l . hpp : Make f i l e . am 26 echo ”// This f i l e i s generated . P lease do not e d i t ! ” > $@ 27 echo >> $@ 28 f o r i in $ ( f i l t e r −out a l l . hpp , $ ( this include HEADERS ) ) ; \ 29 do \ 30 echo ”#inc l ude <${ subd i r }/ $$i>” >> $@; \ 31 done 32 echo >> $@ 33 subd i r s=’$ (SUBDIRS ) ’ ; f o r i in $$subd i r s ; do \ 34 echo ”#inc l ude <${ subd i r }/ $$ i / a l l . hpp>” >> $@ ; \ 35 done

Listing B.92: The header file of class Container. 1 // c l / u t i l i t i e s /container . hpp 2 #ifndef CLUSLIB CONTAINER HPP 3 #define CLUSLIB CONTAINER HPP 4

5 #include<c l / types . hpp> 6 #include<c l / e r r o r s . hpp> 7 #include<vector> 8

9 namespace ClusLib { 10

11 template<typename T> 12 class Container { 13 public : 14 typedef typename std : : vector<T> : : i t e r a t o r i t e r a t o r ; 15 typedef typename std : : vector<T> : : c o n s t i t e r a t o r 16 c o n s t i t e r a t o r ; 17

18 i t e r a t o r begin ( ) ; 19 c o n s t i t e r a t o r begin ( ) const ; 20 i t e r a t o r end ( ) ; 21 c o n s t i t e r a t o r end ( ) const ; 22 S i z e s i z e ( ) const ; 23 bool empty ( ) const ; 24 void c l e a r ( ) ; 25

26 const std : : vector<T>& data ( ) const ; 27 std : : vector<T>& data ( ) ; 28 const T& operator [ ] ( S i z e i ) const ; 29 T& operator [ ] ( S i z e i ) ; 30 void e ra s e ( const T& val ) ; 31 void add ( const T&val ) ; 32

33 protected : 34 ˜Container ( ) {} 35

36 std : : vector<T> data ; 37 } ; 38

39 template<typename T> 40 in l ine typename Container<T> : : i t e r a t o r Container<T> : : begin ( ) { 41 return data . begin ( ) ; 42 } 43

44 template<typename T> 45 in l ine typename Container<T> : : c o n s t i t e r a t o r

50 template<typename T> 51 in l ine typename Container<T> : : i t e r a t o r Container<T> : : end ( ) { 52 return data . end ( ) ; 53 } 54

55 template<typename T> 56 in l ine typename Container<T> : : c o n s t i t e r a t o r 57 Container<T> : : end ( ) const{ 58 return data . end ( ) ; 59 } 60

61 template<typename T> 62 in l ine S i z e Container<T> : : s i z e ( ) const { 63 return data . s i z e ( ) ; 64 } 65

66 template<typename T> 67 in l ine bool Container<T> : : empty ( ) const { 68 return data . s i z e ( ) == 0 ; 69 } 70

71 template<typename T> 72 in l ine void Container<T> : : c l e a r ( ) { 73 data . c l e a r ( ) ; 74 } 75

76 template<typename T> 77 in l ine const std : : vector<T>& Container<T> : : data ( ) const { 78 return data ; 79 } 80

81 template<typename T> 82 in l ine std : : vector<T>& Container<T> : : data ( ) { 83 return data ; 84 } 85

86 template<typename T> 87 in l ine const T& Container<T> : : operator [ ] ( S i z e i ) const { 88 ASSERT( i>=0 && i< data . s i z e ( ) , ” index out o f range” ) ; 89 return data [ i ] ; 90 } 91

92 template<typename T> 93 in l ine T& Container<T> : : operator [ ] ( S i z e i ) { 94 ASSERT( i>=0 && i< data . s i z e ( ) , ” index out o f range” ) ; 95 return data [ i ] ; 96 } 97

98 template<typename T> 99 in l ine void Container<T> : : e r a s e ( const T& val ) {

100 for ( i t e r a t o r i t= data . begin ( ) ; i t != data . end();++ i t ){ 101 i f ( va l == ∗ i t ){ 102 data . e r a s e ( i t ) ; 103 break ; 104 } 105 } 106 } 107

108 template<typename T> 109 in l ine void Container<T> : : add ( const T&val ) { 110 data . push back ( va l ) ; 111 } 112 } 113

Listing B.93: The header file of class DataAdapter. 1 // c l / u t i l i t i e s /dataadapter . hpp 2 #ifndef CLUSLIB DATAADAPTER HPP 3 #define CLUSLIB DATAADAPTER HPP 4

5 #include<c l / da ta s e t s / datase t . hpp> 6

7 namespace ClusLib { 8

9 class DataAdapter { 10 public : 11 virtual ˜DataAdapter ( ) {} 12 virtual void f i l l ( boost : : shared ptr<Dataset> &ds ) = 0 ; 13 } ; 14 } 15

Listing B.94: The header file of class DatasetGenerator. 1 // c l / u t i l i t i e s / datase tgenerator . hpp 2 #ifndef CLUSLIB DATASETGENERATOR HPP 3 #define CLUSLIB DATASETGENERATOR HPP 4

5 #include<c l / u t i l i t i e s /dataadapter . hpp> 6 #include<c l / u t i l i t i e s /matrix . hpp> 7 #include<vector> 8 #include<boost /random . hpp> 9

10 namespace ClusLib { 11

12 class DatasetGenerator : public DataAdapter { 13 public : 14 DatasetGenerator ( ub las : : matrix<Real> mu, 15 std : : vector<ublas : : symmetric matrix<Real> > sigma , 16 std : : vector<Size> records , 17 S i z e seed = 1 ) ; 18 void f i l l ( boost : : shared ptr<Dataset> &ds ) ; 19

20 protected : 21 typedef boost : : va r i a t e g ene ra to r <boost : : minstd rand , 22 boost : : n o rma l d i s t r i bu t i on<> > gen type ; 23

24 void generate ( S i z e ind ) ; 25 ublas : : matrix<Real> data ; 26

27 S i z e s e ed ; 28 std : : vector<Size> r e c o r d s ; 29 ublas : : matrix<Real> mu ; 30 std : : vector<ublas : : symmetric matrix<Real> > s igma ; 31

32 gen type gene ra to r ;

Listing B.95: The source file of class DatasetGenerator. 1 // c l / u t i l i t i e s / datase tgenerator . cpp 2 #include<c l / u t i l i t i e s / da ta s e tgene ra to r . hpp> 3 #include<sstream> 4

5 namespace ClusLib { 6

7 DatasetGenerator : : DatasetGenerator ( ub las : : matrix<Real> mu, 8 std : : vector<ublas : : symmetric matrix<Real> > sigma , 9 std : : vector<Size> records ,

10 S i z e seed ) : mu(mu) , s igma ( sigma ) , r e c o r d s ( r e c o rd s ) , 11 s e ed ( seed ) , g ene ra to r ( boost : : minstd rand ( seed ) , 12 boost : : n o rma l d i s t r i bu t i on <>() ) 13 { 14 ASSERT( mu . s i z e 1 ( ) > 0 && mu . s i z e 2 ()>0 , 15 ”empty input ” ) ; 16 ASSERT( mu . s i z e 1 ()==re co rd s . s i z e ( ) && 17 mu . s i z e 1 ()==sigma . s i z e ( ) , 18 ”number o f c l u s t e r s i s not c on s i s t en t ” ) ; 19 ASSERT( mu . s i z e 2 ()==sigma [ 0 ] . s i z e 1 ( ) , 20 ”number o f a t t r i b u t e s i s not con s i s t e n t ” ) ; 21 ASSERT( seed >0, ” seed must be pos t i v e ” ) ; 22

23 S i z e N = 0 ; 24 for ( S i z e i =0; i< r e c o r d s . s i z e ( ) ; ++i ) { 25 N += re c o r d s [ i ] ; 26 } 27 data . r e s i z e (N, mu . s i z e 2 ( ) ) ; 28 } 29

30 void DatasetGenerator : : generate ( S i z e ind ) { 31 ub las : : t r i angu l a r mat r i x<Real> 32 T( mu . s i z e 2 ( ) , mu . s i z e 2 ( ) ) ; 33 S i z e k = chol ( s igma [ ind ] , T) ; 34 ASSERT(k==0, ”can not decompose sigma ”<<ind ) ; 35

36 S i z e nStart = 0 ; 37 for ( S i z e i =0; i<ind ; ++i ) { 38 nStart += r e c o r d s [ i ] ; 39 } 40

41 ublas : : vector<Real> v( mu . s i z e 2 ( ) ) ; 42 ublas : : vector<Real> w( mu . s i z e 2 ( ) ) ; 43 for ( S i z e i =0; i< r e c o r d s [ ind ] ; ++i ) { 44 for ( S i z e j =0; j<v . s i z e ( ) ; ++j ) { 45 v( j ) = gene ra to r ( ) ; 46 } 47 ublas : : axpy prod (T, v , w) ; 48 for ( S i z e j =0; j<v . s i z e ( ) ; ++j ) { 49 data ( nStart+i , j ) = w( j ) + mu( ind , j ) ; 50 } 51 } 52 } 53

54 void DatasetGenerator : : f i l l ( boost : : shared ptr<Dataset> &ds ) { 55 for ( S i z e i =0; i< r e c o r d s . s i z e ( ) ; ++i ) { 56 generate ( i ) ; 57 } 58

59 boost : : shared ptr<Schema> schema (new Schema ( ) ) ; 60 boost : : shared ptr<DAttrInfo> l a b e l I n f o (

f o ( 63 new DAttrInfo ( ” I d e n t i f i e r ” ) ) ; 64 schema−>l a b e l I n f o ( ) = l a b e l I n f o ; 65 schema−>i d In f o ( ) = id In f o ; 66 std : : s t r i ng s t r e am ss ; 67 for ( S i z e s =0; s< data . s i z e 2 ( ) ; ++s ) { 68 s s . s t r ( ”” ) ; 69 ss<<”Attr ibute ”<<s+1; 70 boost : : shared ptr<CAttrInfo> c a i ( 71 new CAttrInfo ( s s . s t r ( ) ) ) ; 72 schema−>add ( ca i ) ; 73 } 74

75 ds = boost : : shared ptr<Dataset>(new Dataset ( schema ) ) ; 76 S i z e nCount = 0 ; 77 for ( S i z e i =0; i< r e c o r d s . s i z e ( ) ; ++i ) { 78 s s . s t r ( ”” ) ; 79 ss<<i +1; 80 std : : s t r i n g l a b e l = ss . s t r ( ) ; 81 for ( S i z e j =0; j< r e c o r d s [ i ] ; ++j ) { 82 s s . s t r ( ”” ) ; 83 ss<<nCount ; 84 std : : s t r i ng id = ss . s t r ( ) ; 85 boost : : shared ptr<Record> r (new Record ( schema ) ) ; 86 schema−>s e t i d ( r , id ) ; 87 schema−>s e t l a b e l ( r , l a b e l ) ; 88

89 for ( S i z e s=0; s< data . s i z e 2 ( ) ; ++s ) { 90 (∗ schema ) [ s]−> s e t c v a l ( (∗ r ) [ s ] , 91 data ( nCount , s ) ) ; 92 } 93 ds−>add ( r ) ; 94 ++nCount ; 95 } 96 } 97 } 98 }

Listing B.96: The header file of class DatasetNormalizer. 1 // c l / u t i l i t i e s / datase tnormal izer . hpp 2 #ifndef CLUSLIB DATASETNORMALIZERHPP 3 #define CLUSLIB DATASETNORMALIZERHPP 4

5 #include<c l / u t i l i t i e s /dataadapter . hpp> 6 #include<c l / u t i l i t i e s /matrix . hpp> 7 #include<vector> 8 #include<boost /random . hpp> 9

10 namespace ClusLib { 11

12 class DatasetNormalizer : public DataAdapter { 13 public : 14 DatasetNormalizer ( const boost : : shared ptr<Dataset> &ds ) ; 15 void f i l l ( boost : : shared ptr<Dataset> &ds ) ; 16

17 protected : 18 void get minmax ( ) ; 19

20 std : : vector<Real> dvMin ;

Listing B.97: The source file of class DatasetNormalizer. 1 // c l / u t i l i t i e s / datase tnormal izer . cpp 2 #include<c l / u t i l i t i e s / da ta s e tno rma l i z e r . hpp> 3 #include<sstream> 4

5 namespace ClusLib { 6

7 DatasetNormalizer : : DatasetNormalizer ( const 8 boost : : shared ptr<Dataset> &ds ) : ods ( ds ) { 9 ASSERT( ods , ” input datase t i s n u l l ” ) ;

12 void DatasetNormal izer : : f i l l ( boost : : shared ptr<Dataset> &ds ) { 13 get minmax ( ) ; 14

15 boost : : shared ptr<Schema> schema ( ods−>schema()−> c lone ( ) ) ; 16 ds = boost : : shared ptr<Dataset>(new Dataset ( schema ) ) ; 17

18 for ( S i z e i =0; i< ods−>s i z e ( ) ; ++i ) { 19 boost : : shared ptr<Record> r e c (new Record ( schema ) ) ; 20 for ( S i z e h=0; h<schema−>s i z e ( ) ; ++h) { 21 i f ( ! (∗ schema ) [ h]−> c a n c a s t t o c ( ) ) { 22 (∗ schema ) [ h]−> s e t d v a l ( (∗ r e c ) [ h ] , 23 (∗ schema ) [ h]−>g e t d v a l ( (∗ ods ) ( i , h ) ) ) ; 24 } else { 25 Real dTemp = ( 26 (∗ schema ) [ h]−>g e t c v a l ( (∗ ods ) ( i , h ) ) − 27 dvMin [ h ] ) / ( dvMax [ h ] − dvMin [ h ] ) ; 28 (∗ schema ) [ h]−> s e t c v a l ( (∗ r e c ) [ h ] , dTemp) ; 29 } 30 } 31

32 i f ( schema−> i s l a b e l l e d ( ) ) { 33 const boost : : shared ptr<DAttrInfo> &l ab e l = 34 schema−>l a b e l I n f o ( ) ; 35 l ab e l−>s e t d v a l ( rec−>l abe lValue ( ) , 36 (∗ ods ) [ i ]−>g e t l a b e l ( ) ) ; 37 } 38

39 const boost : : shared ptr<DAttrInfo> &id = 40 schema−>i d I n f o ( ) ; 41 id−>s e t d v a l ( rec−>idValue ( ) , (∗ ods ) [ i ]−>g e t i d ( ) ) ; 42

43 ds−>add ( r e c ) ; 44 } 45 } 46

47 void DatasetNormalizer : : get minmax ( ) { 48 boost : : shared ptr<Schema> schema = ods−>schema ( ) ; 49 dvMin . r e s i z e ( schema−>s i z e ( ) ) ; 50 dvMax . r e s i z e ( schema−>s i z e ( ) ) ; 51

52 for ( S i z e h=0; h<schema−>s i z e ( ) ; ++h) { 53 i f ( ! (∗ schema ) [ h]−> c a n c a s t t o c ( ) ) { 54 continue ; 55 } 56

57 Real dMin = MAXREAL; 58 Real dMax = MIN REAL;

++i { 61 dTemp = (∗ schema ) [ h]−>g e t c v a l ( (∗ ods ) ( i , h ) ) ; 62 i f (dMin > dTemp ) { 63 dMin = dTemp; 64 } 65

66 i f (dMax < dTemp ) { 67 dMax = dTemp; 68 } 69 } 70

71 i f ( dMax − dMin < EPSILON) { 72 dMax = dMin + 1 . 0 ; 73 } 74

75 dvMin [ h ] = dMin ; 76 dvMax [ h ] = dMax ; 77 } 78 } 79 }

Listing B.98: The header file of class DatasetReader. 1 // c l / u t i l i t i e s / datase treader . hpp 2 #ifndef CLUSLIB DATASETREADERHPP 3 #define CLUSLIB DATASETREADERHPP 4

5 #include<c l / types . hpp> 6 #include<c l / da ta s e t s / datase t . hpp> 7 #include<c l / u t i l i t i e s /dataadapter . hpp> 8 #include<vector> 9 #include<s t r i ng>

10 #include<boost / sha r ed p t r . hpp> 11

12 namespace ClusLib { 13

14 class DatasetReader : public DataAdapter { 15 public : 16 DatasetReader ( const std : : s t r i n g& fi leName ) ; 17 void f i l l ( boost : : shared ptr<Dataset>& ds ) ; 18

19 private : 20 void createSchema ( ) ; 21 void f i l l Da t a ( ) ; 22 boost : : shared ptr<Record> c reateRecord ( 23 const std : : vector<std : : s t r i ng>& val ) ; 24

25 std : : vector<std : : s t r i ng> s p l i t ( const std : : s t r i n g &); 26

27 std : : s t r i n g f i l eName ; 28 S i z e labelColumn ; 29 S i z e idColumn ; 30 S i z e numColumn; 31

32 boost : : shared ptr<Schema> schema ; 33 boost : : shared ptr<Dataset> ds ; 34 } ; 35

1 // c l / u t i l i t i e s / datase treader . cpp 2 #include<c l / u t i l i t i e s / da ta s e t r e ade r . hpp> 3 #include<boost / token i z e r . hpp> 4 #include<boost / a lgor i thm/ s t r i n g . hpp> 5 #include<boost / l e x i c a l c a s t . hpp> 6 #include<f stream> 7 #include<iostream> 8

9 namespace ClusLib { 10

11 DatasetReader : : DatasetReader ( const std : : s t r i n g& fi leName ) 12 : f i l eName ( f i leName ) , labelColumn ( Null<Size >()) , 13 idColumn ( Null<Size >()) , numColumn(0 ) { 14 } 15

16 void DatasetReader : : f i l l ( boost : : shared ptr<Dataset>& ds ) { 17 createSchema ( ) ; 18 f i l l D a t a ( ) ; 19 i f ( idColumn == Null<Size >()) { 20 for ( S i z e i =0; i< ds−>s i z e ( ) ; ++i ) { 21 schema−>s e t i d ((∗ ds ) [ i ] , 22 boost : : l e x i c a l c a s t <std : : s t r i ng >( i ) ) ; 23 } 24 } 25 ds = ds ; 26 } 27

28 void DatasetReader : : f i l lD a t a ( ) { 29 std : : i f s t r e am f i l e ; 30 std : : s t r i n g l i n e ; 31 f i l e . open ( f i l eName . c s t r ( ) ) ; 32 ASSERT( f i l e . good ( ) , ”can not open f i l e ” << f i l eName ) ; 33

34 ds = boost : : shared ptr<Dataset >(new Dataset ( schema ) ) ; 35 std : : vector<std : : s t r i ng> temp ; 36 std : : s t r i n g ms , id ; 37 while ( g e t l i n e ( f i l e , l i n e ) ){ 38 boost : : tr im ( l i n e ) ; 39 i f ( l i n e . empty ( ) ){ 40 break ; 41 } 42 temp = s p l i t ( l i n e ) ; 43 boost : : shared ptr<Record> pr = createRecord ( temp ) ; 44 ds−>add ( pr ) ; 45 } 46 } 47

48 boost : : shared ptr<Record> DatasetReader : : c reateRecord ( 49 const std : : vector<std : : s t r i ng>& val ) { 50 boost : : shared ptr<Record> r e c = 51 boost : : shared ptr<Record>(new Record ( schema ) ) ; 52 ASSERT( numColumn == val . s i z e ( ) , ” l ength does not match” ) ; 53 std : : s t r i n g l abe l , id ; 54 S i z e j = 0 ; 55 S i z e s ; 56 for ( S i z e i =0; i<val . s i z e ();++ i ){ 57 i f ( i == labelColumn ) { 58 l a b e l = val [ i ] ; 59 continue ; 60 } 61 i f ( i == idColumn ) { 62 id = val [ i ] ; 63 continue ; 64 } 65 switch ( (∗ schema ) [ j ]−>type ( ) ){

) t c a l ( (∗ r e c ) [ j ] , 0 . 0 ) ; 69 }else { 70 (∗ schema ) [ j ]−> s e t c v a l ( (∗ r e c ) [ j ] , 71 boost : : l e x i c a l c a s t <Real>(va l [ i ] ) ) ; 72 } 73 break ; 74 case Disc r e t e : 75 s = (∗ schema ) [ j ]−>c a s t t o d ( ) . add value ( 76 va l [ i ] ) ; 77 (∗ schema ) [ j ]−> s e t d v a l ( (∗ r e c ) [ j ] , s ) ; 78 break ; 79 } 80 ++j ; 81 } 82

83 i f ( labelColumn != Null<Size >()) { 84 schema−>s e t l a b e l ( rec , l a b e l ) ; 85 } 86

87 i f ( idColumn != Null<Size >()) { 88 schema−>s e t i d ( rec , id ) ; 89 } 90

91 return r e c ; 92 } 93

94 void DatasetReader : : createSchema ( ) { 95 s i z e t ind = f i l eName . f i n d l a s t o f ( ’ . ’ ) ; 96 ASSERT( ind != std : : s t r i n g : : npos , 97 f i l eName << ” i nv a l i d f i l e name” ) ; 98 std : : s t r i n g schemaFile = 99 f i l eName . sub s t r (0 , ind+1) + ”names” ;

101 std : : i f s t r e am f i l e ; 102 std : : s t r i n g l i n e ; 103 f i l e . open ( schemaFile . c s t r ( ) ) ; 104 ASSERT( f i l e . good ( ) , ”can not open f i l e ” << schemaFi le ) ; 105

106 bool bTag = fa l se ; 107 while ( g e t l i n e ( f i l e , l i n e ) ){ 108 ind = l i n e . f i nd ( ” /// : ” ) ; 109 i f ( ind != std : : s t r i n g : : npos ){ 110 bTag = true ; 111 break ; 112 } 113 } 114 ASSERT(bTag , 115 ” Inva l i d names f i l e ( no /// : ) ” << schemaFi le ) ; 116 std : : vector<std : : s t r i ng> temp ; 117 schema = boost : : shared ptr<Schema>(new Schema ( ) ) ; 118 schema−>i d I n f o ( ) = boost : : shared ptr<DAttrInfo>( 119 new DAttrInfo ( ” I d e n t i f i e r ” ) ) ; 120 S i z e nLine = 0 ; 121 bool bClass = fa l se ; 122 bool bId = fa l se ; 123 while ( g e t l i n e ( f i l e , l i n e ) ){ 124 boost : : tr im ( l i n e ) ; 125 i f ( l i n e . empty ( ) ){ 126 break ; 127 } 128 temp = s p l i t ( l i n e ) ; 129 ASSERT(temp . s i z e ()==2,” i n v a l i d schema l i n e ”<< l i n e ) ; 130 i f ( temp[1]==”Class ” ){ 131 i f ( ! bClass ) { 132 schema−>l a b e l I n f o ( ) = 133 boost : : shared ptr<DAttrInfo>(

; 136 labelColumn = nLine ; 137 } else { 138 FAIL( ”schema can not have two c l a s s columns” ) ; 139 } 140 } else i f ( temp [ 1 ] == ”Continuous” ) { 141 schema−>add ( boost : : shared ptr<CAttrInfo >( 142 new CAttrInfo ( temp [ 0 ] ) ) ) ; 143 } else i f ( temp [ 1 ] == ”Di sc r e t e ” ) { 144 schema−>add ( boost : : shared ptr<DAttrInfo>( 145 new DAttrInfo ( temp [ 0 ] ) ) ) ; 146 } else i f ( temp [ 1 ] == ”RecordID” ) { 147 i f ( ! bId ) { 148 bId = true ; 149 idColumn = nLine ; 150 } else { 151 FAIL( ”schema can not have two id columns” ) ; 152 } 153 } else { 154 FAIL( ” i n v a l i d type ” << temp [ 1 ] 155 << ” note that type name i s case s e n s i t i v e ” ) ; 156 } 157

161 numColumn = nLine ; 162 f i l e . c l o s e ( ) ; 163 } 164

165 std : : vector<std : : s t r i ng> DatasetReader : : s p l i t ( 166 const std : : s t r i n g& s t r ) { 167 std : : vector<std : : s t r i ng> r e t ; 168 boost : : c ha r s epa ra to r<char> sep ( ” , ” , ”” , 169 boost : : keep empty tokens ) ; 170 boost : : t oken i z e r<boost : : c ha r s epa ra to r<char> > 171 tokens ( st r , sep ) ; 172 for ( boost : : t oken i z e r<boost : : c ha r s epa ra to r<char> 173 > : : i t e r a t o r i t = tokens . begin ( ) ; 174 i t != tokens . end ( ) ; ++i t ) { 175 std : : s t r i n g temp = ∗ i t ; 176 boost : : tr im ( temp ) ; 177 r e t . push back ( temp ) ; 178 } 179

180 return r e t ; 181 } 182 }

Listing B.100: The header file of class Dendrogram. 1 // c l / u t i l i t i e s /dendrogram . hpp 2 #ifndef CLUSLIB DENDROGRAMHPP 3 #define CLUSLIB DENDROGRAMHPP 4

5 #include<c l / types . hpp> 6 #include<sstream> 7 #include<s t r i ng> 8

9 namespace ClusLib {

13 Dendrogram( ) ; 14 void setbox ( Real x1 , Real y1 , Real x2 , Real y2 ) ; 15 void drawDot ( Real x , Real y ) ; 16 void drawCirc l e ( Real x , Real y , Real r ) ; 17 void drawLine ( Real x1 , Real y1 , Real x2 , Real y2 ) ; 18 void drawText ( Real x , Real y , const std : : s t r i n g&txt ) ; 19 void save ( const std : : s t r i n g &f i l ename ) const ; 20

21 private : 22 std : : s t r i ng s t r e am s s ; 23 Real x1 ; 24 Real y1 ; 25 Real x2 ; 26 Real y2 ; 27 } ; 28

Listing B.101: The source file of class Dendrogram. 1 // c l / u t i l i t i e s /dendrogram . cpp 2 #include<c l / u t i l i t i e s /dendrogram . hpp> 3 #include<f stream> 4 #include<ctime> 5 #include<iomanip> 6

7 namespace ClusLib { 8

9 Dendrogram : : Dendrogram( ) 10 : x1 (0 ) , y1 (0 ) , x2 (100 ) , y2 (100) { 11 } 12

13 void Dendrogram : : setbox ( Real x1 , Real y1 , Real x2 , Real y2 ) { 14 x1 = x1 ; 15 y1 = y1 ; 16 x2 = x2 ; 17 y2 = y2 ; 18 } 19

20 void Dendrogram : : drawDot ( Real x , Real y ) { 21 s s << ”% Dot\n” ; 22 s s << ” 3 slw ” ; 23 s s << ” 1 s l c ” ; 24 s s << ” 0 s l j ” ; 25 s s << ”n ” 26 << x << ” ” << y << ” ” 27 << ”m ” 28 << x << ” ” << y << ” ” 29 << ” l 0 .0000 0 .0000 0 .0000 srgb s t r oke ” 30 << std : : endl ; 31 } 32

33 void Dendrogram : : drawCirc l e ( Real x , Real y , Real r ) { 34 s s << ”% E l l i p s e \n” ; 35 s s << ” 0 . 5 slw ” ; 36 s s << ” 1 s l c ” ; 37 s s << ” 0 s l j ” << std : : endl ; 38 s s << ” gs ” << x << ” ” << y << ” t r ” ; 39 s s << ” n ” << r << ” 0 m 0 0 ” 40 << r << ” 0 . 0 360 .0 arc ” ; 41 s s << ” 0 .0000 0 .0000 0 .0000 srgb ” ; 42 s s << ” s t r oke gr ” << std : : endl ;

45 void Dendrogram : : drawLine ( Real x1 , Real y1 , 46 Real x2 , Real y2 ) { 47 s s << ”% Line\n” ; 48 s s << ” 0 . 5 slw ” ; 49 s s << ” 1 s l c ” ; 50 s s << ” 0 s l j ” ; 51 s s << ”n ” 52 << x1 << ” ” << y1 << ” ” 53 << ”m ” 54 << x2 << ” ” << y2 << ” ” 55 << ” l 0 .0000 0 .0000 0 .0000 srgb s t r oke ” 56 << std : : endl ; 57 } 58

59 void Dendrogram : : drawText ( Real x , Real y , 60 const std : : s t r i n g&txt ) { 61 s s << ”% Text\n” ; 62 s s << ” gs /Times−Roman f f 8 s c f s f ” ; 63 s s << ” ” << x− 7 − txt . s i z e ( )∗3 << ” ” << y−3 << ” m” ; 64 s s << ” ( ” << txt << ” ) ” 65 << ” 0 .0000 0 .0000 0 .0000 srgb ” 66 << ” sh gr” << std : : endl ; 67

70 void Dendrogram : : save ( const std : : s t r i n g &f i l ename ) const { 71 std : : o f s t ream f i l e ( f i l ename . c s t r ( ) ) ; 72

73 f i l e << ”%!PS−Adobe−2.0 EPSF−2.0” << std : : endl ; 74 f i l e << ”%%Ti t l e : ” << f i l ename << std : : endl ; 75 f i l e << ”%%Creator : ClusLib ” << std : : endl ; 76 f i l e << ”%%CreationDate : June 23 , 2010 ” <<std : : endl ; 77 f i l e << ”%%BoundingBox : ” << std : : s e t p r e c i s i o n ( 8 ) 78 << x1 << ” ” << y1 << ” ” 79 << x2 << ” ” << y2 << std : : endl ; 80 f i l e << ”%Magni f i cat ion : 1 .0000 ” << std : : endl ; 81 f i l e << ”%%EndComments” << std : : endl ; 82 f i l e << std : : endl ; 83 f i l e << ”/cp { c l o s epa th} bind de f ” << std : : endl ; 84 f i l e << ”/ e f { e o f i l l } bind de f ” << std : : endl ; 85 f i l e << ”/gr { g r e s t o r e } bind de f ” << std : : endl ; 86 f i l e << ”/gs { gsave } bind de f ” << std : : endl ; 87 f i l e << ”/ sa { save } bind de f ” << std : : endl ; 88 f i l e << ”/ r s { r e s t o r e } bind de f ” << std : : endl ; 89 f i l e << ”/ l { l i n e t o } bind de f ” << std : : endl ; 90 f i l e << ”/m {moveto} bind de f ” << std : : endl ; 91 f i l e << ”/rm { rmoveto} bind de f ” << std : : endl ; 92 f i l e << ”/n {newpath} bind de f ” << std : : endl ; 93 f i l e << ”/ s { s t r oke } bind de f ” << std : : endl ; 94 f i l e << ”/sh {show} bind de f ” << std : : endl ; 95 f i l e << ”/ s l c { s e t l i n e c ap } bind de f ” << std : : endl ; 96 f i l e << ”/ s l j { s e t l i n e j o i n } bind de f ” << std : : endl ; 97 f i l e << ”/ slw { s e t l i n ew i d t h} bind de f ” << std : : endl ; 98 f i l e << ”/ srgb { s e t r gbc o l o r } bind de f ” << std : : endl ; 99 f i l e << ”/ ro t { r o ta t e } bind de f ” << std : : endl ;

100 f i l e << ”/ sc { s c a l e } bind de f ” << std : : endl ; 101 f i l e << ”/sd { se tdash } bind de f ” << std : : endl ; 102 f i l e << ”/ f f { f i n d f on t } bind de f ” << std : : endl ; 103 f i l e << ”/ s f { s e t f o n t } bind de f ” << std : : endl ; 104 f i l e << ”/ s c f { s c a l e f o n t } bind de f ” << std : : endl ; 105 f i l e << ”/sw { s t r i ngwid th} bind de f ” << std : : endl ; 106 f i l e << ”/sd { se tdash } bind de f ” << std : : endl ; 107 f i l e << ”/ t r { t r an s l a t e } bind de f ” << std : : endl ; 108 f i l e << ” 0 . 5 s e t l i n ew i d t h” << std : : endl ; 109 f i l e << s s . s t r ( ) <<std : : endl ;

112 f i l e << ”%EOF” << std : : endl ; 113 f i l e . c l o s e ( ) ; 114 } 115 }

Listing B.102: The header file of class nnMap. 1 // c l / u t i l i t i e s /nnmap . hpp 2 #ifndef CLUSLIB NNMAP HPP 3 #define CLUSLIB NNMAP HPP 4

5 #include<c l / types . hpp> 6 #include<c l / e r r o r s . hpp> 7 #include<map> 8 #include<algorithm> 9

10 namespace ClusLib { 11

12 typedef std : : pair<Size , Size> nnPair ; 13

14 class compare a { 15 public : 16 bool operator ( ) ( const nnPair &a , const nnPair &b) const { 17 S i z e amin = std : : min ( a . f i r s t , a . second ) ; 18 S i z e amax = std : : max( a . f i r s t , a . second ) ; 19 S i z e bmin = std : : min (b . f i r s t , b . second ) ; 20 S i z e bmax = std : : max(b . f i r s t , b . second ) ; 21

22 i f ( amin < bmin ) { 23 return true ; 24 } else i f ( amin == bmin ){ 25 i f (amax < bmax) { 26 return true ; 27 } else { 28 return fa l se ; 29 } 30 } else { 31 return fa l se ; 32 } 33 } 34 } ; 35

36 class compare b { 37 public : 38 bool operator ( ) ( const nnPair &a , const nnPair &b) const { 39 i f ( a . f i r s t < b . f i r s t ) { 40 return true ; 41 } else i f ( a . f i r s t == b . f i r s t ){ 42 i f ( a . second < b . second ) { 43 return true ; 44 } else { 45 return fa l se ; 46 } 47 } else { 48 return fa l se ; 49 } 50 } 51 } ; 52

56 typedef typename std : : map<nnPair , T, C> : : va lue type 57 va lue type ; 58 typedef typename std : : map<nnPair , T, C> : : i t e r a t o r 59 i t e r a t o r ; 60 typedef typename std : : map<nnPair , T, C> : : c o n s t i t e r a t o r 61 c o n s t i t e r a t o r ; 62

63 std : : pair<i t e r a t o r , bool> 64 add item ( S i z e i , S i z e j , T item ) ; 65 bool conta in key ( S i z e i , S i z e j ) const ; 66 T& operator ( ) ( S i z e i , S i z e j ) ; 67 const T& operator ( ) ( S i z e i , S i z e j ) const ; 68 void c l e a r ( ) ; 69

70 i t e r a t o r begin ( ) ; 71 i t e r a t o r end ( ) ; 72 c o n s t i t e r a t o r begin ( ) const ; 73 c o n s t i t e r a t o r end ( ) const ; 74 private : 75 std : : map<nnPair , T, C> map ; 76 } ; 77

78 typedef nnMap<Real , compare a> iirMapA ; 79 typedef nnMap<Size , compare b> i i iMapB ; 80

81 class c ompare i i r { 82 public : 83 bool operator ( ) ( const iirMapA : : va lue type& a , 84 const iirMapA : : va lue type& b) { 85 i f ( a . second < b . second ) { 86 return true ; 87 } 88 return fa l se ; 89 } 90 } ; 91

92 template<typename T, typename C> 93 in l ine std : : pair<typename nnMap<T,C> : : i t e r a t o r , bool> 94 nnMap<T,C> : : add item ( S i z e i , S i z e j , T item ) { 95 std : : pair<i t e r a t o r , bool> r e t = 96 map . i n s e r t ( std : : pair<nnPair , T>(nnPair ( i , j ) , item ) ) ; 97

98 return r e t ; 99 }

101 template<typename T, typename C> 102 in l ine bool nnMap<T,C> : : conta in key ( S i z e i , S i z e j ) 103 const { 104 c o n s t i t e r a t o r i t ; 105 i t = map . f i nd ( nnPair ( i , j ) ) ; 106 i f ( i t != map . end ( ) ){ 107 return true ; 108 } else { 109 return fa l se ; 110 } 111 } 112

113 template<typename T, typename C> 114 in l ine T& nnMap<T,C> : : operator ( ) ( S i z e i , S i z e j ) { 115 i t e r a t o r i t ; 116 i t = map . f i nd ( nnPair ( i , j ) ) ; 117 i f ( i t != map . end ( ) ){ 118 return i t−>second ; 119 } else { 120 FAIL( ”Can not f i nd key ( ”<<i<<” , ”<<j<<” ) in nnMap” ) ;

124 template<typename T, typename C> 125 in l ine const T& nnMap<T,C> : : operator ( ) ( S i z e i , S i z e j ) 126 const { 127 c o n s t i t e r a t o r i t ; 128 i t = map . f i nd ( nnPair ( i , j ) ) ; 129 i f ( i t != map . end ( ) ){ 130 return i t−>second ; 131 } else { 132 FAIL( ”Can not f i nd key ( ”<<i<<” , ”<<j<<” ) in nnMap” ) ; 133 } 134 } 135

136 template<typename T, typename C> 137 in l ine typename nnMap<T,C> : : i t e r a t o r nnMap<T,C> : : begin ( ) { 138 return map . begin ( ) ; 139 } 140

141 template<typename T, typename C> 142 in l ine typename nnMap<T,C> : : i t e r a t o r nnMap<T,C> : : end ( ) { 143 return map . end ( ) ; 144 } 145

146 template<typename T, typename C> 147 in l ine typename nnMap<T,C> : : c o n s t i t e r a t o r nnMap<T,C> : : begin ( ) 148 const { 149 return map . begin ( ) ; 150 } 151

152 template<typename T, typename C> 153 in l ine typename nnMap<T,C> : : c o n s t i t e r a t o r nnMap<T,C> : : end ( ) 154 const { 155 return map . end ( ) ; 156 } 157

158 template<typename T, typename C> 159 in l ine void nnMap<T,C> : : c l e a r ( ) { 160 map . c l e a r ( ) ; 161 } 162 } 163 #endif

Listing B.103: Declarations of matrix functions. 1 // c l / u t i l i t i e s /matrix . hpp 2 #ifndef CLUSLIB CHOLESKY HPP 3 #define CLUSLIB CHOLESKY HPP 4

5 #include<c l / types . hpp> 6 #include<boost /numeric/ ub las / vec tor . hpp> 7 #include<boost /numeric/ ub las / vec tor proxy . hpp> 8 #include<boost /numeric/ ub las /matrix . hpp> 9 #include<boost /numeric/ ub las /matrix proxy . hpp>

10 #include<boost /numeric/ ub las / symmetric . hpp> 11 #include<boost /numeric/ ub las / t r i a ngu l a r . hpp> 12 #include<boost /numeric/ ub las / i o . hpp> 13 #include<boost /numeric/ ub las / operat ion . hpp> 14

15 namespace ClusLib {

19 S i z e cho l ( const ublas : : symmetric matrix<Real>& A, 20 ub las : : t r i angu l a r mat r i x<Real>& L ) ; 21

22 S i z e t r i a n gu l a r ma t r i x i n v e r s e ( 23 const ub las : : t r i angu l a r mat r i x<Real>& L , 24 ub las : : t r i angu l a r mat r i x<Real>& iL ) ; 25 } 26

Listing B.104: Implementation of matrix function. 1 // c l / u t i l i t i e s /matrix . cpp 2 #include<c l / u t i l i t i e s /matrix . hpp> 3 #include<c l / e r r o r s . hpp> 4 #include<c l / types . hpp> 5 #include<cmath> 6

7 namespace ClusLib { 8

9 S i z e cho l ( const ublas : : symmetric matrix<Real>& A, 10 ub las : : t r i angu l a r mat r i x<Real>& L) { 11 using namespace ublas ; 12 ASSERT(A. s i z e 1 ( ) == A. s i z e 2 ( ) , ”matrix A i s not square ” ) ; 13 ASSERT(L . s i z e 1 ( ) == L . s i z e 2 ( ) , ”matrix L i s not square ” ) ; 14 ASSERT(A. s i z e 1 ( ) == L . s i z e 1 ( ) , 15 ”matrix A and matrix L have d i f f e r e n t dimensions ” ) ; 16

17 const S i z e n = A. s i z e 1 ( ) ; 18 for ( S i z e k=0; k < n ; k++) { 19 double qL kk = A(k , k ) − i nner prod ( 20 p r o j e c t ( row (L , k ) , range (0 , k ) ) , 21 p r o j e c t ( row (L , k ) , range (0 , k ) ) ) ; 22

23 i f ( qL kk <= 0) { 24 return 1 + k ; 25 } 26

27 double L kk = sqr t ( qL kk ) ; 28 L(k , k ) = L kk ; 29

30 matrix column<t r i angu l a r mat r i x<Real> > c l k (L , k ) ; 31 p r o j e c t ( c lk , range ( k+1, n) ) = ( 32 p r o j e c t ( column (A, k ) , range (k+1, n) ) − prod ( 33 p r o j e c t (L , range (k+1, n ) , range (0 , k ) ) , 34 p r o j e c t ( row(L , k ) , range (0 , k ) ) ) ) / L kk ; 35 } 36

37 return 0 ; 38 } 39

40 S i z e t r i a n gu l a r ma t r i x i n v e r s e ( 41 const ub las : : t r i angu l a r mat r i x<Real>& L , 42 ub las : : t r i angu l a r mat r i x<Real>& iL ) { 43 using namespace ublas ; 44 ASSERT(L . s i z e 1 ( ) == L . s i z e 2 ( ) , ”matrix L i s not square ” ) ; 45 ASSERT( iL . s i z e 1 ( ) == iL . s i z e 2 ( ) , 46 ”matrix iL i s not square ” ) ; 47 ASSERT(L . s i z e 1 ( ) == iL . s i z e 1 ( ) , 48 ”matrix L and matrix iL have d i f f e r e n t dimensions ” ) ; 49

50 const S i z e n = L . s i z e 1 ( ) ; 51 for ( S i z e k=0; k < n ; k++) { 52 i f ( std : : f abs (L(k , k ) ) < EPSILON) {

57 for ( S i z e k=0; k < n ; k++) { 58 iL (k , k ) = 1 / L(k , k ) ; 59

60 for ( S i z e j=k+1; j<n ; ++j ){ 61 iL ( j , k ) = − i nner prod ( 62 p r o j e c t ( row(L , j ) , range (k , j ) ) , 63 p r o j e c t ( column ( iL , k ) , range (k , j ) ) ) 64 / L( j , j ) ; 65 } 66 } 67

68 return 0 ; 69 } 70

Listing B.105: The header file of null types. 1 // c l / u t i l i t i e s / nu l l . hpp 2 #ifndef CLUSLIB NULL HPP 3 #define CLUSLIB NULL HPP 4

5 #include<c l / types . hpp> 6

7 namespace ClusLib { 8

9 template <class Type> 10 class Null ; 11

12 template <> 13 class Null<Intege r> { 14 public : 15 Null ( ) {} 16 operator In t e ge r ( ) const { 17 return In t e ge r (NULL INTEGER) ; 18 } 19 } ; 20

21 template <> 22 class Null<Size> { 23 public : 24 Null ( ) {} 25 operator S i z e ( ) const { 26 return S i z e (NULL SIZE ) ; 27 } 28 } ; 29

30 template<> 31 class Null<Real> { 32 public : 33 Null ( ) {} 34 operator Real ( ) const { 35 return Real (NULL REAL) ; 36 } 37 } ; 38 } 39

Listing B.106: The Makefile.am file in the directory examples. 1 AM CPPFLAGS = −I${ t o p s r c d i r } −I$ { t op bu i l dd i r } 2

3 SUBDIRS = \ 4 agg lomerat ive \ 5 cmean \ 6 diana \ 7 f s c \ 8 gkmode \ 9 gmc \

13 EXTRA DIST = \ 14 con ta in e r / con ta in e r 1 . cpp \ 15 con ta in e r / con ta in e r 2 . cpp \ 16 datase t / datase t . cpp \ 17 datase t / datase tout . txt \ 18 data s e tgene ra to r / da ta s e tgene ra to r . cpp \ 19 data se tg ene ra to r /9 po in t s . txt \ 20 data se t r e ade r / da ta se t r e ade r . cpp \ 21 data s e t r e ade r / da ta s e t r e ade rou t . txt \ 22 dummy/dummy. cpp \ 23 dummy/out3 . txt \ 24 dummy/out5 . txt \ 25 mpikmean/mpikmean . hpp \ 26 mpikmean/mpikmean . cpp \ 27 mpikmean/mpimain . cpp \ 28 nnmap/nnmap . cpp \ 29 nnmap/nnmapout . txt 30

31 examples : $ (SUBDIRS) 32

33 $ (SUBDIRS ) : 34 $ (MAKE) −C $@ examples

Listing B.107: The Makefile.am file in the directory agglomerative. 1 AM CPPFLAGS = −I${ t o p s r c d i r } −I$ { t op bu i l dd i r } 2

3 noinst PROGRAMS = agglomerat ive 4

5 agglomerative SOURCES = agglomerat ive . cpp 6 agglomerative LDADD = . . / . . / c l / l i bC lu sL ib . l a 7 agglomerative LDFLAGS = −l $ {BOOST PROGRAM OPTIONS LIB}

Listing B.108: Program to illustrate agglomerative hierarchical algorithms. 1 // examples/agg lomerat ive/agg lomerat ive . cpp 2 #include<c l / c l u s l i b . hpp>

6 #include<f stream> 7 #include<iomanip> 8 #include<boost / program options . hpp> 9

10 using namespace ClusLib ; 11 using namespace boost : : program options ; 12 using namespace std ; 13

14 int main ( int ac , char∗ av [ ] ) { 15 try{ 16 o p t i o n s de s c r i p t i o n desc ( ”Allowed opt ions ” ) ; 17 desc . add opt ions ( ) 18 ( ” help ” , ”produce help message” ) 19 ( ”method” , value<s t r i ng >()−>de f au l t v a lu e ( ” s i n g l e ” ) , 20 ”method ( s i ng l e , complete , gaverage , wgaverage , \ 21 centro id , median , ward ) ” ) 22 ( ” d a t a f i l e ” , value<s t r i ng >() , ” the data f i l e ” ) 23 ( ”p” , value<Size >()−>de f au l t v a lu e (50 ) , 24 ”maximum number o f nodes to show in dendrogram” ) 25 ( ”maxclust ” , value<Size >()−>de f au l t v a lu e (3 ) , 26 ”maximum number o f c l u s t e r s ” ) ; 27

28 var iab le s map vm; 29 s t o r e ( parse command l ine ( ac , av , desc ) , vm) ; 30 no t i f y (vm) ; 31

32 i f (vm. count ( ” help ” ) | | ac==1) { 33 cout << desc << ”\n” ; 34 return 1 ; 35 } 36

37 s t r i n g d a t a f i l e ; 38 i f (vm. count ( ” d a t a f i l e ” ) ) { 39 d a t a f i l e = vm[ ” d a t a f i l e ” ] . as<s t r i ng >() ; 40 } else { 41 cout << ” Please prov ide a data f i l e \n” ; 42 return 1 ; 43 } 44

45 s t r i n g method ; 46 i f (vm. count ( ”method” ) ){ 47 method = vm[ ”method” ] . as<s t r i ng >() ; 48 } 49

50 S i z e maxclust ; 51 i f (vm. count ( ”maxclust ” ) ) { 52 maxclust = vm[ ”maxclust ” ] . as<Size >() ; 53 } 54 S i z e p ; 55 i f (vm. count ( ”p” ) ) { 56 p = vm[ ”p” ] . as<Size >() ; 57 } 58

59 DatasetReader reader ( d a t a f i l e ) ; 60 boost : : shared ptr<Dataset> ds ; 61 reader . f i l l ( ds ) ; 62

63 std : : cout<<∗ds<<std : : endl ; 64

65 boost : : shared ptr<Algorithm> ca ; 66 boost : : shared ptr<Distance> d i s t (new Euc l ideanDistance ( ) ) ; 67 i f (method == ” s i n g l e ” ) { 68 ca = boost : : shared ptr<Algorithm>(new S ing l e ( ) ) ; 69 } else i f (method == ” complete” ) { 70 ca = boost : : shared ptr<Algorithm>(new Complete ( ) ) ;

Average ( ) ) ; 73 } else i f (method == ”wgaverage” ) { 74 ca = boost : : shared ptr<Algorithm>(new Weighted ( ) ) ; 75 } else i f (method == ” cen t ro i d ” ) { 76 ca = boost : : shared ptr<Algorithm>(new Centroid ( ) ) ; 77 } else i f (method == ”median” ) { 78 ca = boost : : shared ptr<Algorithm>(new Median ( ) ) ; 79 } else i f (method == ”ward” ) { 80 ca = boost : : shared ptr<Algorithm>(new Ward ( ) ) ; 81 } else { 82 FAIL( ”method ” << method << ” i s not a v a i l a b l e ” ) ; 83 } 84

85 Arguments &Arg = ca−>getArguments ( ) ; 86 Arg . ds = ds ; 87 Arg . d i s t anc e = d i s t ; 88

89 boost : : t imer t ; 90 t . r e s t a r t ( ) ; 91 ca−>c l u s t e r i z e ( ) ; 92 double seconds = t . e lapsed ( ) ; 93 std : : cout<<” completed in ”<<seconds<<” seconds ” 94 <<std : : endl ; 95

96 std : : s t r i n g p r e f i x ; 97 s i z e t ind = d a t a f i l e . f i n d l a s t o f ( ’ . ’ ) ; 98 i f ( ind != std : : s t r i n g : : npos ) { 99 p r e f i x = d a t a f i l e . sub s t r (0 , ind ) ;

100 } else { 101 p r e f i x = d a t a f i l e ; 102 } 103 p r e f i x += ”−” + method + ”−” ; 104

105 const Resu l t s& Res = ca−>ge tRe su l t s ( ) ; 106

107 HCluster ing hc = 108 boost : : any cast<HCluster ing>(Res . ge t ( ”hc” ) ) ; 109 hc . save ( p r e f i x + ”dendrogram . eps” ,p ) ; 110 Jo inVa lueV i s i t o r jv ; 111 hc . root ()−>accept ( jv ) ; 112

113 std : : s t r i n g j v f i l e = p r e f i x + ” j o inVa lu e s . csv ” ; 114 std : : o f s t ream o f ; 115 o f . open ( j v f i l e . c s t r ( ) ) ; 116 of<<j v ; 117 o f . c l o s e ( ) ; 118

119 PCluste r ing pc = hc . g e t p c ( maxclust ) ; 120 std : : cout<<pc<<std : : endl ; 121 pc . save ( p r e f i x + ”pcsummary. txt ” ) ; 122

123 return 0 ; 124 } catch ( std : : except ion& e ) { 125 std : : cout<<e . what()<< std : : endl ; 126 return 1 ; 127 } catch ( . . . ) { 128 std : : cout<<”unknown e r r o r ”<<std : : endl ; 129 return 2 ; 130 } 131 }

Listing B.109: The Makefile.am file in the directory diana. 1 AM CPPFLAGS = −I${ t o p s r c d i r } −I$ { t op bu i l dd i r } 2

3 noinst PROGRAMS = diana 4

5 diana SOURCES = diana . cpp 6 diana LDADD = . . / . . / c l / l i bC lu sL ib . l a 7 diana LDFLAGS = −l $ {BOOST PROGRAM OPTIONS LIB}

Listing B.110: Program to illustrate the DIANA algorithm. 1 // examples/diana/diana . cpp 2 #include<c l / c l u s l i b . hpp> 3

4 #include<boost / timer . hpp> 5 #include<iostream> 6 #include<f stream> 7 #include<iomanip> 8 #include<boost / program options . hpp> 9

10 using namespace ClusLib ; 11 using namespace boost : : program options ; 12 using namespace std ; 13

14 int main ( int ac , char∗ av [ ] ) { 15 try{ 16 o p t i o n s de s c r i p t i o n desc ( ”Allowed opt ions ” ) ; 17 desc . add opt ions ( ) 18 ( ” help ” , ”produce help message” ) 19 ( ” d a t a f i l e ” , value<s t r i ng >() , ” the data f i l e ” ) 20 ( ”p” , value<Size >()−>de f au l t v a lu e (50 ) , 21 ”maximum number o f nodes to show in dendrogram” ) 22 ( ”maxclust ” , value<Size >()−>de f au l t v a lu e (3 ) , 23 ”maximum number o f c l u s t e r s ” ) ; 24

25 var iab le s map vm; 26 s t o r e ( parse command l ine ( ac , av , desc ) , vm) ; 27 no t i f y (vm) ; 28

29 i f (vm. count ( ” help ” ) | | ac==1) { 30 cout << desc << ”\n” ; 31 return 1 ; 32 } 33

34 s t r i n g d a t a f i l e ; 35 i f (vm. count ( ” d a t a f i l e ” ) ) { 36 d a t a f i l e = vm[ ” d a t a f i l e ” ] . as<s t r i ng >() ; 37 } else { 38 cout << ” Please prov ide a data f i l e \n” ; 39 return 1 ; 40 } 41

42 S i z e maxclust ; 43 i f (vm. count ( ”maxclust ” ) ) { 44 maxclust = vm[ ”maxclust ” ] . as<Size >() ; 45 } 46 S i z e p ; 47 i f (vm. count ( ”p” ) ) { 48 p = vm[ ”p” ] . as<Size >() ; 49 } 50

51 DatasetReader reader ( d a t a f i l e ) ;

55 std : : cout<<∗ds<<std : : endl ; 56

57 boost : : shared ptr<Algorithm> ca (new Diana ( ) ) ; 58 boost : : shared ptr<Distance> d i s t (new Euc l ideanDistance ( ) ) ; 59

60 Arguments &Arg = ca−>getArguments ( ) ; 61 Arg . ds = ds ; 62 Arg . d i s t anc e = d i s t ; 63

64 boost : : t imer t ; 65 t . r e s t a r t ( ) ; 66 ca−>c l u s t e r i z e ( ) ; 67 double seconds = t . e lapsed ( ) ; 68 std : : cout<<” completed in ”<<seconds<<” seconds ” 69 <<std : : endl ; 70

71 std : : s t r i n g p r e f i x ; 72 s i z e t ind = d a t a f i l e . f i n d l a s t o f ( ’ . ’ ) ; 73 i f ( ind != std : : s t r i n g : : npos ) { 74 p r e f i x = d a t a f i l e . sub s t r (0 , ind ) ; 75 } else { 76 p r e f i x = d a t a f i l e ; 77 } 78 p r e f i x += ”−diana−” ; 79

80 const Resu l t s& Res = ca−>ge tRe su l t s ( ) ; 81

82 HCluster ing hc = 83 boost : : any cast<HCluster ing>(Res . ge t ( ”hc” ) ) ; 84 hc . save ( p r e f i x + ”dendrogram . eps” ,p ) ; 85 Jo inVa lueV i s i t o r jv ; 86 hc . root ()−>accept ( jv ) ; 87

88 std : : s t r i n g j v f i l e = p r e f i x + ” j o inVa lu e s . csv ” ; 89 std : : o f s t ream o f ; 90 o f . open ( j v f i l e . c s t r ( ) ) ; 91 of<<j v ; 92 o f . c l o s e ( ) ; 93

94 PCluste r ing pc = hc . g e t p c ( maxclust ) ; 95 std : : cout<<pc<<std : : endl ; 96 pc . save ( p r e f i x + ”pcsummary. txt ” ) ; 97

98 return 0 ; 99 } catch ( std : : except ion& e ) {

100 std : : cout<<e . what()<< std : : endl ; 101 return 1 ; 102 } catch ( . . . ) { 103 std : : cout<<”unknown e r r o r ”<<std : : endl ; 104 return 2 ; 105 } 106 }

Listing B.111: The Makefile.am file in the directory kmean. 1 AM CPPFLAGS = −I${ t o p s r c d i r } −I$ { t op bu i l dd i r } 2

3 noinst PROGRAMS = kmean

. 7 kmean LDFLAGS = −l $ {BOOST PROGRAM OPTIONS LIB}

Listing B.112: Program to illustrate the k-means algorithm. 1 // examples/kmean/kmean . cpp 2 #include<c l / c l u s l i b . hpp> 3

4 #include<boost / timer . hpp> 5 #include<boost / program options . hpp> 6 #include<iostream> 7 #include<sstream> 8 #include<iomanip> 9

10 using namespace ClusLib ; 11 using namespace std ; 12 using namespace boost : : program options ; 13

14 int main ( int ac , char∗ av [ ] ) { 15 try{ 16 o p t i o n s de s c r i p t i o n desc ( ”Allowed opt ions ” ) ; 17 desc . add opt ions ( ) 18 ( ” help ” , ”produce help message” ) 19 ( ” d a t a f i l e ” , value<s t r i ng >() , ” the data f i l e ” ) 20 ( ”k” , value<Size >()−>de f au l t v a lu e (3 ) , 21 ”number o f c l u s t e r s ” ) 22 ( ” seed ” , value<Size >()−>d e f au l t v a lu e (1 ) , 23 ” seed used to choose random i n i t i a l c en t e r s ” ) 24 ( ”maxiter ” , value<Size >()−>d e f au l t v a l u e (100 ) , 25 ”maximum number o f i t e r a t i o n s ” ) 26 ( ”numrun” , value<Size >()−>d e f au l t va l u e (1 ) , 27 ”number o f runs” ) ; 28

29 var iab le s map vm; 30 s t o r e ( parse command l ine ( ac , av , desc ) , vm) ; 31 no t i f y (vm) ; 32

33 i f (vm. count ( ” help ” ) | | ac==1) { 34 cout << desc << ”\n” ; 35 return 1 ; 36 } 37

38 s t r i n g d a t a f i l e ; 39 i f (vm. count ( ” d a t a f i l e ” ) ) { 40 d a t a f i l e = vm[ ” d a t a f i l e ” ] . as<s t r i ng >() ; 41 } else { 42 cout << ” Please prov ide a data f i l e \n” ; 43 return 1 ; 44 } 45

46 S i z e numclust = vm[ ”k” ] . as<Size >() ; 47 S i z e maxiter = vm[ ”maxiter” ] . as<Size >() ; 48 S i z e numrun = vm[ ”numrun” ] . as<Size >() ; 49 S i z e seed = vm[ ” seed ” ] . as<Size >() ; 50

51 DatasetReader reader ( d a t a f i l e ) ; 52 boost : : shared ptr<Dataset> ds ; 53 reader . f i l l ( ds ) ; 54 std : : cout<<∗ds<<std : : endl ; 55

56 boost : : shared ptr<Eucl ideanDistance > 57 ed (new Euc l ideanDistance ( ) ) ; 58

60 boost : : t imer t ;

63 Resu l t s Res ; 64 Real a v g i t e r = 0 . 0 ; 65 Real avgerror = 0 . 0 ; 66 Real dMin = MAXREAL; 67 Real e r r o r ; 68 for ( S i z e i =1; i<=numrun ; ++i ) { 69 Kmean ca ; 70 Arguments &Arg = ca . getArguments ( ) ; 71 Arg . ds = ds ; 72 Arg . d i s t anc e = ed ; 73 Arg . i n s e r t ( ”numclust” , numclust ) ; 74 Arg . i n s e r t ( ”maxiter” , maxiter ) ; 75 Arg . i n s e r t ( ” seed ” , seed ) ; 76 i f (numrun == 1) { 77 Arg . add i t i o n a l [ ” seed ” ] = seed ; 78 } else { 79 Arg . add i t i o n a l [ ” seed ” ] = i ; 80 } 81

82 ca . c l u s t e r i z e ( ) ; 83

84 const Resu l t s &tmp = ca . ge tRe su l t s ( ) ; 85 avg i t e r += boost : : any cast<Size >(tmp . ge t ( ”numiter” ) ) ; 86 e r r o r = boost : : any cast<Real>(tmp . ge t ( ” e r r o r ” ) ) ; 87 avgerror += er r o r ; 88 i f ( e r r o r < dMin) { 89 dMin = e r r o r ; 90 Res = tmp ; 91 } 92 } 93 avg i t e r /= numrun ; 94 avgerror /= numrun ; 95

96 double seconds = t . e lapsed ( ) ; 97 std : : cout<<” completed in ”<<seconds<<” seconds ” 98 <<std : : endl ; 99

100 PCluste r ing pc = 101 boost : : any cast<PCluster ing >(Res . ge t ( ”pc” ) ) ; 102

103 std : : cout<<pc<<std : : endl ; 104 std : : cout<<”Number o f runs : ”<<numrun<<std : : endl ; 105 std : : cout<<”Average number o f i t e r a t i o n s : ” 106 <<avg i t e r<<std : : endl ; 107 std : : cout<<”Average e r r o r : ”<<avgerror<<std : : endl ; 108 std : : cout<<”Best e r r o r : ”<<dMin<<std : : endl ; 109

110 std : : s t r i n g p r e f i x ; 111 s i z e t ind = d a t a f i l e . f i n d l a s t o f ( ’ . ’ ) ; 112 i f ( ind != std : : s t r i n g : : npos ) { 113 p r e f i x = d a t a f i l e . sub s t r (0 , ind ) ; 114 } else { 115 p r e f i x = d a t a f i l e ; 116 } 117 std : : s t r i ng s t r e am ss ; 118 ss<<pr e f i x<<”−kmean−k”<<numclust<<”−s ”<<seed<<” . txt ” ; 119 pc . save ( s s . s t r ( ) ) ; 120

121 return 0 ; 122 } catch ( std : : except ion& e ) { 123 std : : cout<<e . what()<< std : : endl ; 124 return 1 ; 125 } catch ( . . . ) { 126 std : : cout<<”unknown e r r o r ”<<std : : endl ; 127 return 2 ;

Listing B.113: The Makefile.am file in the directory cmean. 1 AM CPPFLAGS = −I${ t o p s r c d i r } −I$ { t op bu i l dd i r } 2

3 noinst PROGRAMS = cmean 4

5 cmean SOURCES = cmean . cpp 6 cmean LDADD = . . / . . / c l / l i bC lu sL ib . l a 7 cmean LDFLAGS = −l $ {BOOST PROGRAM OPTIONS LIB}

Listing B.114: Program to illustrate the c-means algorithm. 1 // examples/cmean/cmean . cpp 2 #include<c l / c l u s l i b . hpp> 3

4 #include<boost / timer . hpp> 5 #include<boost / program options . hpp> 6 #include<iostream> 7 #include<f stream> 8 #include<iomanip> 9

10 using namespace ClusLib ; 11 using namespace boost : : program options ; 12

13 int main ( int ac , char∗ av [ ] ) { 14 try{ 15 o p t i o n s de s c r i p t i o n desc ( ”Allowed opt ions ” ) ; 16 desc . add opt ions ( ) 17 ( ” help ” , ”produce help message” ) 18 ( ” d a t a f i l e ” , value<std : : s t r i ng >() , ” the data f i l e ” ) 19 ( ”k” , value<Size >()−>de f au l t v a lu e (3 ) , 20 ”number o f c l u s t e r s ” ) 21 ( ” seed ” , value<Size >()−>d e f au l t v a lu e (1 ) , 22 ” seed used to choose random i n i t i a l c en t e r s ” ) 23 ( ”maxiter ” , value<Size >()−>d e f au l t v a l u e (100 ) , 24 ”maximum number o f i t e r a t i o n s ” ) 25 ( ”numrun” , value<Size >()−>d e f au l t va l u e (1 ) , 26 ”number o f runs” ) 27 ( ” e p s i l o n ” , value<Real>()−>d e f au l t va l u e (1 e−6, ”1e−6” ) , 28 ” e p s i l o n ” ) 29 ( ” alpha” , value<Real>()−>d e f au l t v a lu e ( 2 . 1 , ” 2 . 1 ” ) , 30 ”alpha” ) 31 ( ” th r e sho ld” , 32 value<Real>()−>d e f au l t v a l u e (1 e−12,”1e−12” ) , 33 ”Objec t ive f unc t i on t o l e r a n c e” ) ; 34

35 var iab le s map vm; 36 s t o r e ( parse command l ine ( ac , av , desc ) , vm) ; 37 no t i f y (vm) ; 38

39 i f (vm. count ( ” help ” ) | | ac==1) { 40 std : : cout << desc << ”\n” ; 41 return 1 ; 42 } 43

44 std : : s t r i n g d a t a f i l e ; 45 i f (vm. count ( ” d a t a f i l e ” ) ) {

48 std : : cout << ”Please prov ide a data f i l e \n” ; 49 return 1 ; 50 } 51

52 S i z e numclust = vm[ ”k” ] . as<Size >() ; 53 S i z e maxiter = vm[ ”maxiter” ] . as<Size >() ; 54 S i z e numrun = vm[ ”numrun” ] . as<Size >() ; 55 S i z e seed = vm[ ” seed ” ] . as<Size >() ; 56 Real alpha = vm[ ” alpha” ] . as<Real >() ; 57 Real ep s i l o n = vm[ ” ep s i l o n ” ] . as<Real >() ; 58 Real th r e sho ld = vm[ ” th r e sho ld” ] . as<Real >() ; 59

60 DatasetReader reader ( d a t a f i l e ) ; 61 boost : : shared ptr<Dataset> ds ; 62 reader . f i l l ( ds ) ; 63

64 std : : cout<<∗ds<<std : : endl ; 65

66 boost : : t imer t ; 67 t . r e s t a r t ( ) ; 68

69 Resu l t s Res ; 70 Real a v g i t e r = 0 . 0 ; 71 Real avgerror = 0 . 0 ; 72 Real dMin = MAXREAL; 73 Real e r r o r ; 74 for ( S i z e i =1; i<=numrun ; ++i ) { 75 Cmean ca ; 76 Arguments &Arg = ca . getArguments ( ) ; 77 Arg . ds = ds ; 78 Arg . i n s e r t ( ” alpha” , alpha ) ; 79 Arg . i n s e r t ( ” e p s i l o n ” , e p s i l o n ) ; 80 Arg . i n s e r t ( ” th r e sho ld” , th r e sho ld ) ; 81 Arg . i n s e r t ( ”numclust” , numclust ) ; 82 Arg . i n s e r t ( ”maxiter” , maxiter ) ; 83 Arg . i n s e r t ( ” seed ” , seed ) ; 84 i f (numrun == 1) { 85 Arg . add i t i o n a l [ ” seed ” ] = seed ; 86 } else { 87 Arg . add i t i o n a l [ ” seed ” ] = i ; 88 } 89

90 ca . c l u s t e r i z e ( ) ; 91

92 const Resu l t s &tmp = ca . ge tRe su l t s ( ) ; 93 avg i t e r += boost : : any cast<Size >(tmp . ge t ( ”numiter” ) ) ; 94 e r r o r = boost : : any cast<Real>(tmp . ge t ( ”dObj” ) ) ; 95 avgerror += er r o r ; 96 i f ( e r r o r < dMin) { 97 dMin = e r r o r ; 98 Res = tmp ; 99 }

100 } 101 avg i t e r /= numrun ; 102 avgerror /= numrun ; 103

104 double seconds = t . e lapsed ( ) ; 105 std : : cout<<” completed in ”<<seconds<<” seconds ” 106 <<std : : endl ; 107

108 PCluste r ing pc = 109 boost : : any cast<PCluster ing >(Res . ge t ( ”pc” ) ) ; 110 std : : cout<<pc<<std : : endl ; 111 S i z e numiter = boost : : any cast<Size >(Res . ge t ( ”numiter” ) ) ; 112 e r r o r = boost : : any cast<Real>(Res . ge t ( ”dObj” ) ) ;

: : endl ; 115 std : : cout<<”Average number o f i t e r a t i o n s : ” 116 <<avg i t e r<<std : : endl ; 117 std : : cout<<”Average e r r o r : ”<<avgerror<<std : : endl ; 118 std : : cout<<”Number o f i t e r a t i o n s f o r the best case : ” 119 <<numiter<<std : : endl ; 120 std : : cout<<”Best e r r o r : ”<<e rror<<std : : endl ; 121

122 boost : : numeric : : ub las : : matrix<Real> fcm = 123 boost : : any cast<boost : : numeric : : ub las : : matrix<Real> >( 124 Res . ge t ( ”fcm” ) ) ; 125 std : : cout<<”\nFuzzy c l u s t e r memberships o f ” 126 <<” the f i r s t 5 r e c o rd s : ”<<std : : endl ; 127 for ( S i z e i =0; i<fcm . s i z e 1 ();++ i ){ 128 std : : cout<<”Record ”<<i ; 129 for ( S i z e j =0; j<fcm . s i z e 2 ();++ j ){ 130 std : : cout<<” , ”<<fcm( i , j ) ; 131 } 132 std : : cout<<std : : endl ; 133 i f ( i==4 && fcm . s i z e 1 ()>4) { 134 std : : cout<<” . . . ”<<std : : endl ; 135 break ; 136 } 137 } 138

139 return 0 ; 140 } catch ( std : : except ion& e ) { 141 std : : cout<<e . what()<< std : : endl ; 142 return 1 ; 143 } catch ( . . . ) { 144 std : : cout<<”unknown e r r o r ”<<std : : endl ; 145 return 2 ; 146 } 147 }

Listing B.115: The Makefile.am file in the directory kprototype. 1 AM CPPFLAGS = −I${ t o p s r c d i r } −I$ { t op bu i l dd i r } 2

3 noinst PROGRAMS = kprototype 4

5 kprototype SOURCES = kprototype . cpp 6 kprototype LDADD = . . / . . / c l / l i bC lu sL ib . l a 7 kprototype LDFLAGS = −l $ {BOOST PROGRAM OPTIONS LIB}

Listing B.116: Program to illustrate the k-prototype algorithm. 1 // examples/ kprototype/ kprototype . cpp 2 #include<c l / c l u s l i b . hpp> 3

4 #include<boost / timer . hpp> 5 #include<boost / program options . hpp> 6 #include<iostream> 7 #include<f stream> 8 #include<iomanip> 9

10 using namespace ClusLib ; 11 using namespace std ; 12 using namespace boost : : program options ;

15 try{ 16 o p t i o n s de s c r i p t i o n desc ( ”Allowed opt ions ” ) ; 17 desc . add opt ions ( ) 18 ( ” help ” , ”produce help message” ) 19 ( ” d a t a f i l e ” , value<s t r i ng >() , ” the data f i l e ” ) 20 ( ” normal i ze” , value<s t r i ng >()−>d e f au l t v a lu e ( ”no” ) , 21 ” normal i ze the data or not” ) 22 ( ”k” , value<Size >()−>de f au l t v a lu e (3 ) , 23 ”number o f c l u s t e r s ” ) 24 ( ” beta” , value<Real>()−>d e f au l t v a lu e (1 ) , 25 ” balance weight f o r d i s t anc e ” ) 26 ( ” seed ” , value<Size >()−>d e f au l t v a lu e (1 ) , 27 ” seed used to choose random i n i t i a l c en t e r s ” ) 28 ( ”maxiter ” , value<Size >()−>d e f au l t v a l u e (100 ) , 29 ”maximum number o f i t e r a t i o n s ” ) 30 ( ”numrun” , value<Size >()−>d e f au l t va l u e (1 ) , 31 ”number o f runs” ) ; 32

33 var iab le s map vm; 34 s t o r e ( parse command l ine ( ac , av , desc ) , vm) ; 35 no t i f y (vm) ; 36

37 i f (vm. count ( ” help ” ) | | ac==1) { 38 cout << desc << ”\n” ; 39 return 1 ; 40 } 41

42 s t r i n g d a t a f i l e ; 43 i f (vm. count ( ” d a t a f i l e ” ) ) { 44 d a t a f i l e = vm[ ” d a t a f i l e ” ] . as<s t r i ng >() ; 45 } else { 46 cout << ” Please prov ide a data f i l e \n” ; 47 return 1 ; 48 } 49

50 Real beta = vm[ ”beta” ] . as<Real >() ; 51 S i z e numclust = vm[ ”k” ] . as<Size >() ; 52 S i z e maxiter = vm[ ”maxiter” ] . as<Size >() ; 53 S i z e numrun = vm[ ”numrun” ] . as<Size >() ; 54 S i z e seed = vm[ ” seed ” ] . as<Size >() ; 55 s t r i n g normal i ze = vm[ ” normal i ze” ] . as<s t r i ng >() ; 56

57 DatasetReader reader ( d a t a f i l e ) ; 58 boost : : shared ptr<Dataset> ds ; 59 reader . f i l l ( ds ) ; 60

61 i f ( normal i ze != ”no” ) { 62 boost : : shared ptr<Dataset> ods = ds ; 63 DatasetNormalizer dn( ods ) ; 64 dn . f i l l ( ds ) ; 65 } 66

67 std : : cout<<∗ds<<std : : endl ; 68

69 boost : : shared ptr<Distance> d i s t (new MixedDistance ( beta ) ) ; 70

71 boost : : t imer t ; 72 t . r e s t a r t ( ) ; 73

74 Resu l t s Res ; 75 Real a v g i t e r = 0 . 0 ; 76 Real avgerror = 0 . 0 ; 77 Real dMin = MAXREAL; 78 Real e r r o r ; 79 for ( S i z e i =1; i<=numrun ; ++i ) {

. ; 82 Arg . ds = ds ; 83 Arg . d i s t anc e = d i s t ; 84 Arg . i n s e r t ( ”numclust” , numclust ) ; 85 Arg . i n s e r t ( ”maxiter” , maxiter ) ; 86 Arg . i n s e r t ( ” seed ” , seed ) ; 87 i f (numrun == 1) { 88 Arg . add i t i o n a l [ ” seed ” ] = seed ; 89 } else { 90 Arg . add i t i o n a l [ ” seed ” ] = i ; 91 } 92

93 ca . c l u s t e r i z e ( ) ; 94

95 const Resu l t s &tmp = ca . ge tRe su l t s ( ) ; 96 avg i t e r += boost : : any cast<Size >(tmp . ge t ( ”numiter” ) ) ; 97 e r r o r = boost : : any cast<Real>(tmp . ge t ( ” e r r o r ” ) ) ; 98 avgerror += er r o r ; 99 i f ( e r r o r < dMin) {

100 dMin = e r r o r ; 101 Res = tmp ; 102 } 103 } 104 avg i t e r /= numrun ; 105 avgerror /= numrun ; 106

107 double seconds = t . e lapsed ( ) ; 108 std : : cout<<” completed in ”<<seconds<<” seconds ” 109 <<std : : endl ; 110

111 PCluste r ing pc = 112 boost : : any cast<PCluster ing >(Res . ge t ( ”pc” ) ) ; 113

114 std : : cout<<pc<<std : : endl ; 115 std : : cout<<”Number o f runs : ”<<numrun<<std : : endl ; 116 std : : cout<<”Average number o f i t e r a t i o n s : ” 117 <<avg i t e r<<std : : endl ; 118 std : : cout<<”Average e r r o r : ”<<avgerror<<std : : endl ; 119 std : : cout<<”Best e r r o r : ”<<dMin<<std : : endl ; 120

121 return 0 ; 122 } catch ( std : : except ion& e ) { 123 std : : cout<<e . what()<< std : : endl ; 124 return 1 ; 125 } catch ( . . . ) { 126 std : : cout<<”unknown e r r o r ”<<std : : endl ; 127 return 2 ; 128 } 129 }

Listing B.117: The Makefile.am file in the directory gkmode. 1 AM CPPFLAGS = −I${ t o p s r c d i r } −I$ { t op bu i l dd i r } 2

3 noinst PROGRAMS = gkmode 4

5 gkmode SOURCES = gkmode . cpp 6 gkmode LDADD = . . / . . / c l / l i bC lu sL ib . l a 7 gkmode LDFLAGS = − l $ {BOOST PROGRAM OPTIONS LIB}

1 // examples/gkmode/gkmode. cpp 2 #include<c l / c l u s l i b . hpp> 3

4 #include<boost / timer . hpp> 5 #include<boost / program options . hpp> 6 #include<iostream> 7 #include<f stream> 8 #include<iomanip> 9

10 using namespace ClusLib ; 11 using namespace std ; 12 using namespace boost : : program options ; 13

14 int main ( int ac , char∗ av [ ] ) { 15 try{ 16 o p t i o n s de s c r i p t i o n desc ( ”Allowed opt ions ” ) ; 17 desc . add opt ions ( ) 18 ( ” help ” , ”produce help message” ) 19 ( ” d a t a f i l e ” , value<s t r i ng >() , ” the data f i l e ” ) 20 ( ”k” , value<Size >()−>de f au l t v a lu e (3 ) , 21 ”number o f c l u s t e r s ” ) 22 ( ”numpop” , value<Size >()−>d e f au l t va l u e (50 ) , 23 ”number o f chromosomes in the popu lat ion ” ) 24 ( ”maxgen” , value<Size >()−>d e f au l t va l u e (100 ) , 25 ”maximum number o f g ene ra t i on s ” ) 26 ( ” c” , value<Real>()−>de f au l t v a lu e ( 1 . 5 ) , 27 ”parameter c” ) 28 ( ”cm” , value<Real>()−>de f au l t va l u e ( 1 . 5 ) , 29 ”parameter c m” ) 30 ( ”pm” , value<Real>()−>de f au l t va l u e ( 0 . 2 , ” 0 . 2 ” ) , 31 ”mutation p r obab i l i t y ” ) ; 32

33 var iab le s map vm; 34 s t o r e ( parse command l ine ( ac , av , desc ) , vm) ; 35 no t i f y (vm) ; 36

37 i f (vm. count ( ” help ” ) | | ac==1) { 38 cout << desc << ”\n” ; 39 return 1 ; 40 } 41

42 s t r i n g d a t a f i l e ; 43 i f (vm. count ( ” d a t a f i l e ” ) ) { 44 d a t a f i l e = vm[ ” d a t a f i l e ” ] . as<s t r i ng >() ; 45 } else { 46 cout << ” Please prov ide a data f i l e \n” ; 47 return 1 ; 48 } 49

50 S i z e numclust = vm[ ”k” ] . as<Size >() ; 51 S i z e numpop = vm[ ”numpop” ] . as<Size >() ; 52 S i z e maxgen = vm[ ”maxgen” ] . as<Size >() ; 53 Real c = vm[ ”c” ] . as<Real >() ; 54 Real cm = vm[ ”cm” ] . as<Real >() ; 55 Real pm = vm[ ”pm” ] . as<Real >() ; 56

57 DatasetReader reader ( d a t a f i l e ) ; 58 boost : : shared ptr<Dataset> ds ; 59 reader . f i l l ( ds ) ; 60 std : : cout<<∗ds<<std : : endl ; 61

62 GKmode ca ; 63 Arguments &Arg = ca . getArguments ( ) ; 64 Arg . ds = ds ; 65 Arg . i n s e r t ( ”numclust” , numclust ) ;

69 Arg . i n s e r t ( ”cm” , cm) ; 70 Arg . i n s e r t ( ”pm” , pm) ; 71

72 boost : : t imer t ; 73 t . r e s t a r t ( ) ; 74

75 ca . c l u s t e r i z e ( ) ; 76

77 double seconds = t . e lapsed ( ) ; 78 std : : cout<<” completed in ”<<seconds<<” seconds ” 79 <<std : : endl ; 80

81 const Resu l t s& Res = ca . g e tRe su l t s ( ) ; 82

83 PCluste r ing pc = 84 boost : : any cast<PCluster ing >(Res . ge t ( ”pc” ) ) ; 85 std : : cout<<pc<<std : : endl ; 86

87 return 0 ; 88 } catch ( std : : except ion& e ) { 89 std : : cout<<e . what()<< std : : endl ; 90 return 1 ; 91 } catch ( . . . ) { 92 std : : cout<<”unknown e r r o r ”<<std : : endl ; 93 return 2 ; 94 } 95 }

Listing B.119: The Makefile.am file in the directory fsc. 1 AM CPPFLAGS = −I${ t o p s r c d i r } −I$ { t op bu i l dd i r } 2

3 noinst PROGRAMS = f s c 4

5 fsc SOURCES = f s c . cpp 6 fsc LDADD = . . / . . / c l / l i bC lu sL ib . l a 7 fsc LDFLAGS = −l $ {BOOST PROGRAM OPTIONS LIB}

Listing B.120: Program to illustrate the FSC algorithm. 1 // examples/ f sc / f sc . cpp 2 #include<c l / c l u s l i b . hpp> 3

4 #include<boost / timer . hpp> 5 #include<boost / program options . hpp> 6 #include<iostream> 7 #include<f stream> 8 #include<iomanip> 9

10 using namespace ClusLib ; 11 using namespace boost : : program options ; 12

13 int main ( int ac , char∗ av [ ] ) { 14 try{ 15 o p t i o n s de s c r i p t i o n desc ( ”Allowed opt ions ” ) ; 16 desc . add opt ions ( ) 17 ( ” help ” , ”produce help message” ) 18 ( ” d a t a f i l e ” , value<std : : s t r i ng >() , ” the data f i l e ” )

” ) 21 ( ” seed ” , value<Size >()−>d e f au l t v a lu e (1 ) , 22 ” seed used to choose random i n i t i a l c en t e r s ” ) 23 ( ”maxiter ” , value<Size >()−>d e f au l t v a l u e (100 ) , 24 ”maximum number o f i t e r a t i o n s ” ) 25 ( ”numrun” , value<Size >()−>d e f au l t va l u e (1 ) , 26 ”number o f runs” ) 27 ( ” e p s i l o n ” , value<Real>()−>d e f au l t va l u e (0 ) , 28 ” e p s i l o n ” ) 29 ( ” alpha” , value<Real>()−>d e f au l t v a lu e ( 2 . 1 , ” 2 . 1 ” ) , 30 ”alpha” ) 31 ( ” th r e sho ld” , 32 value<Real>()−>d e f au l t v a l u e (1 e−12,”1e−12” ) , 33 ”Objec t ive f unc t i on t o l e r a n c e” ) ; 34

35 var iab le s map vm; 36 s t o r e ( parse command l ine ( ac , av , desc ) , vm) ; 37 no t i f y (vm) ; 38

39 i f (vm. count ( ” help ” ) | | ac==1) { 40 std : : cout << desc << ”\n” ; 41 return 1 ; 42 } 43

44 std : : s t r i n g d a t a f i l e ; 45 i f (vm. count ( ” d a t a f i l e ” ) ) { 46 d a t a f i l e = vm[ ” d a t a f i l e ” ] . as<std : : s t r i ng >() ; 47 } else { 48 std : : cout << ”Please prov ide a data f i l e \n” ; 49 return 1 ; 50 } 51

52 S i z e numclust = vm[ ”k” ] . as<Size >() ; 53 S i z e maxiter = vm[ ”maxiter” ] . as<Size >() ; 54 S i z e numrun = vm[ ”numrun” ] . as<Size >() ; 55 S i z e seed = vm[ ” seed ” ] . as<Size >() ; 56 Real alpha = vm[ ” alpha” ] . as<Real >() ; 57 Real ep s i l o n = vm[ ” ep s i l o n ” ] . as<Real >() ; 58 Real th r e sho ld = vm[ ” th r e sho ld” ] . as<Real >() ; 59

60 DatasetReader reader ( d a t a f i l e ) ; 61 boost : : shared ptr<Dataset> ds ; 62 reader . f i l l ( ds ) ; 63

64 std : : cout<<∗ds<<std : : endl ; 65

66 boost : : t imer t ; 67 t . r e s t a r t ( ) ; 68

69 Resu l t s Res ; 70 Real a v g i t e r = 0 . 0 ; 71 Real avgerror = 0 . 0 ; 72 Real dMin = MAXREAL; 73 Real e r r o r ; 74 for ( S i z e i =1; i<=numrun ; ++i ) { 75 FSC ca ; 76 Arguments &Arg = ca . getArguments ( ) ; 77 Arg . ds = ds ; 78 Arg . i n s e r t ( ” alpha” , alpha ) ; 79 Arg . i n s e r t ( ” e p s i l o n ” , e p s i l o n ) ; 80 Arg . i n s e r t ( ” th r e sho ld” , th r e sho ld ) ; 81 Arg . i n s e r t ( ”numclust” , numclust ) ; 82 Arg . i n s e r t ( ”maxiter” , maxiter ) ; 83 Arg . i n s e r t ( ” seed ” , seed ) ; 84 i f (numrun == 1) { 85 Arg . add i t i o n a l [ ” seed ” ] = seed ;

i ; 88 } 89

90 ca . c l u s t e r i z e ( ) ; 91

92 const Resu l t s &tmp = ca . ge tRe su l t s ( ) ; 93 avg i t e r += boost : : any cast<Size >(tmp . ge t ( ”numiter” ) ) ; 94 e r r o r = boost : : any cast<Real>(tmp . ge t ( ”dObj” ) ) ; 95 avgerror += er r o r ; 96 i f ( e r r o r < dMin) { 97 dMin = e r r o r ; 98 Res = tmp ; 99 }

100 } 101 avg i t e r /= numrun ; 102 avgerror /= numrun ; 103

104 double seconds = t . e lapsed ( ) ; 105 std : : cout<<” completed in ”<<seconds<<” seconds ” 106 <<std : : endl ; 107

108 PCluste r ing pc = 109 boost : : any cast<PCluster ing >(Res . ge t ( ”pc” ) ) ; 110 std : : cout<<pc<<std : : endl ; 111 S i z e numiter = 112 boost : : any cast<Size >(Res . ge t ( ”numiter” ) ) ; 113 e r r o r = boost : : any cast<Real>(Res . ge t ( ”dObj” ) ) ; 114 const SubspaceCluster ∗p ; 115 std : : cout<<”Attr ibute Weights : ”<<std : : endl ; 116 for ( S i z e k=0;k<pc . s i z e ();++k) { 117 p = dynamic cast<const SubspaceCluster∗>(pc [ k ] . ge t ( ) ) ; 118 std : : cout<<”Cluste r ”<<k ; 119 for ( S i z e j =0; j<p−>w( ) . s i z e ();++ j ) { 120 std : : cout<<” , ”<<p−>w( j ) ; 121 } 122 std : : cout<<std : : endl ; 123 } 124 std : : cout<<”\nNumber o f run : ”<<numrun<<std : : endl ; 125 std : : cout<<”Average number o f i t e r a t i o n s : ” 126 <<avg i t e r<<std : : endl ; 127 std : : cout<<”Average e r r o r : ”<<avgerror<<std : : endl ; 128 std : : cout<<”Number o f i t e r a t i o n s f o r the best case : ” 129 <<numiter<<std : : endl ; 130 std : : cout<<”Best e r r o r : ”<<e rror<<std : : endl ; 131

132 return 0 ; 133 } catch ( std : : except ion& e ) { 134 std : : cout<<e . what()<< std : : endl ; 135 return 1 ; 136 } catch ( . . . ) { 137 std : : cout<<”unknown e r r o r ”<<std : : endl ; 138 return 2 ; 139 } 140 }

Listing B.121: The Makefile.am file in the directory gmc. 1 AM CPPFLAGS = −I${ t o p s r c d i r } −I$ { t op bu i l dd i r } 2

3 noinst PROGRAMS = gmc

. l a 7 gmc LDFLAGS = −l $ {BOOST PROGRAM OPTIONS LIB}

Listing B.122: Program to illustrate the Gaussian mixture clustering algorithm. 1 // examples/gmc/gmc. cpp 2 #include<c l / c l u s l i b . hpp> 3

4 #include<boost / timer . hpp> 5 #include<boost / program options . hpp> 6 #include<iostream> 7 #include<f stream> 8 #include<iomanip> 9

10 using namespace ClusLib ; 11 using namespace boost : : program options ; 12

13 int main ( int ac , char∗ av [ ] ) { 14 try{ 15 o p t i o n s de s c r i p t i o n desc ( ”Allowed opt ions ” ) ; 16 desc . add opt ions ( ) 17 ( ” help ” , ”produce help message” ) 18 ( ” d a t a f i l e ” , value<std : : s t r i ng >() , ” the data f i l e ” ) 19 ( ”k” , value<Size >()−>de f au l t v a lu e (3 ) , 20 ”number o f c l u s t e r s ” ) 21 ( ” seed ” , value<Size >()−>d e f au l t v a lu e (1 ) , 22 ” seed used to choose random i n i t i a l c en t e r s ” ) 23 ( ”maxiter ” , value<Size >()−>d e f au l t v a l u e (100 ) , 24 ”maximum number o f i t e r a t i o n s ” ) 25 ( ”numrun” , value<Size >()−>d e f au l t va l u e (1 ) , 26 ”number o f runs” ) 27 ( ” th r e sho ld” , value<Real>()−>de f au l t va l u e (1 e−10) , 28 ” L ik e l i hood t o l e r a n c e” ) 29 ( ” e p s i l o n ” , value<Real>()−>d e f au l t v a l u e ( 0 . 0 ) , 30 ”Regu l a r i z a t i on parameter ” ) ; 31

32 var iab le s map vm; 33 s t o r e ( parse command l ine ( ac , av , desc ) , vm) ; 34 no t i f y (vm) ; 35

36 i f (vm. count ( ” help ” ) | | ac==1) { 37 std : : cout << desc << ”\n” ; 38 return 1 ; 39 } 40

41 std : : s t r i n g d a t a f i l e ; 42 i f (vm. count ( ” d a t a f i l e ” ) ) { 43 d a t a f i l e = vm[ ” d a t a f i l e ” ] . as<std : : s t r i ng >() ; 44 } else { 45 std : : cout << ”Please prov ide a data f i l e \n” ; 46 return 1 ; 47 } 48

49 S i z e numclust = vm[ ”k” ] . as<Size >() ; 50 S i z e maxiter = vm[ ”maxiter” ] . as<Size >() ; 51 S i z e numrun = vm[ ”numrun” ] . as<Size >() ; 52 S i z e seed = vm[ ” seed ” ] . as<Size >() ; 53 Real th r e sho ld = vm[ ” th r e sho ld” ] . as<Real >() ; 54 Real ep s i l o n = vm[ ” ep s i l o n ” ] . as<Real >() ; 55

56 i f (numrun ==0 ){ 57 return 1 ; 58 }

62 reader . f i l l ( ds ) ; 63

64 std : : cout<<∗ds<<std : : endl ; 65

66 boost : : t imer t ; 67 t . r e s t a r t ( ) ; 68

69 Resu l t s Res ; 70 Real a v g i t e r = 0 . 0 ; 71 Real a v g l l = 0 . 0 ; 72 Real maxll = MIN REAL; 73 for ( S i z e i =1; i<=numrun ; ++i ) { 74 GMC ca ; 75 Arguments &Arg = ca . getArguments ( ) ; 76 Arg . ds = ds ; 77 Arg . i n s e r t ( ”numclust” , numclust ) ; 78 Arg . i n s e r t ( ”maxiter” , maxiter ) ; 79 i f (numrun == 1) { 80 Arg . i n s e r t ( ” seed ” , seed ) ; 81 } else { 82 Arg . i n s e r t ( ” seed ” , i ) ; 83 } 84 Arg . i n s e r t ( ” e p s i l o n ” , e p s i l o n ) ; 85 Arg . i n s e r t ( ” th r e sho ld” , th r e sho ld ) ; 86

87 ca . c l u s t e r i z e ( ) ; 88 const Resu l t s &tmp = ca . ge tRe su l t s ( ) ; 89 Real l l = boost : : any cast<Real>(tmp . ge t ( ” l l ” ) ) ; 90 avg l l += l l ; 91 i f ( l l > maxll ) { 92 maxll = l l ; 93 Res = tmp ; 94 } 95 avg i t e r += boost : : any cast<Size >(tmp . ge t ( ”numiter” ) ) ; 96 } 97 avg i t e r /= numrun ; 98 avg l l /= numrun ; 99

100 double seconds = t . e lapsed ( ) ; 101 std : : cout<<” completed in ”<<seconds<<” seconds ” 102 <<std : : endl ; 103

104 PCluste r ing pc = 105 boost : : any cast<PCluster ing >(Res . ge t ( ”pc” ) ) ; 106 std : : cout<<pc<<std : : endl ; 107

108 pc . save ( ” i r i s . txt ” ) ; 109

110 std : : vector<Real> p = boost : : any cast<std : : vector<Real> >( 111 Res . ge t ( ”p” ) ) ; 112 ublas : : matrix<Real> mu = 113 boost : : any cast<ublas : : matrix<Real> >(Res . ge t ( ”mu” ) ) ; 114 std : : cout<<”Component s i z e : ”<<std : : endl ; 115 for ( S i z e i =0; i<p . s i z e ( ) ; ++i ){ 116 std : : cout<<”Cluste r ”<<i<<” : ”<<p [ i ]<<std : : endl ; 117 } 118 std : : cout<<”\nCluste r Center : ”<<std : : endl ; 119 for ( S i z e i =0; i<p . s i z e ( ) ; ++i ){ 120 std : : cout<<”Center ”<<i<<” ” 121 <<ublas : : row (mu, i )<<std : : endl ; 122 } 123 std : : cout<<”\nNumber o f runs : ”<<numrun<<std : : endl ; 124 std : : cout<<”Average number o f i t e r a t i o n s : ” 125 <<avg i t e r<<std : : endl ; 126 std : : cout<<”Average l i k e l i h o o d : ”

o o : : endl ; 129 std : : cout<<”Number o f i t e r a t i o n s f o r the best case : ” 130 <<boost : : any cast<Size >(Res . ge t ( ”numiter” ) ) 131 <<std : : endl ; 132

133 return 0 ; 134 } catch ( std : : except ion& e ) { 135 std : : cout<<e . what()<< std : : endl ; 136 return 1 ; 137 } catch ( . . . ) { 138 std : : cout<<”unknown e r r o r ”<<std : : endl ; 139 return 2 ; 140 } 141 }

Listing B.123: The header file of the parallel k-means algorithm. 1 // examples/mpikmean/mpikmean . hpp 2 #ifndef CLUSLIB MPIKMEAN HPP 3 #define CLUSLIB MPIKMEAN HPP 4

5 #include<c l / c l u s l i b . hpp> 6 #include<boost /mpi . hpp> 7 #include<boost / s e r i a l i z a t i o n / vec tor . hpp> 8 #include<boost / timer . hpp> 9

10 namespace ClusLib { 11

12 template<typename T> 13 struct vp lus { 14 std : : vector<T> operator ( ) ( const std : : vector<T> &x , 15 const std : : vector<T> &y) { 16 std : : vector<T> r e s u l t = x ; 17 for ( s i z e t i =0; i<x . s i z e ( ) ; ++i ) { 18 r e s u l t [ i ] += y [ i ] ; 19 } 20 return r e s u l t ; 21 } 22 } ; 23

24 class MPIKmean : public Algorithm { 25 protected : 26 void setupArguments ( ) ; 27 void per formCluste r ing ( ) const ; 28 void f e t chRe su l t s ( ) const ; 29 virtual void i n i t i a l i z a t i o n ( ) const ; 30 virtual void i t e r a t i o n ( ) const ; 31 virtual Real d i s t ( S i z e i , S i z e j ) const ; 32

33 mutable std : : vector<Real> c e n t e r s ; 34 mutable std : : vector<Real> data ; 35 mutable S i z e numObj ; 36 mutable S i z e numAttr ; 37 mutable std : : vector<Size> CM; 38

39 mutable std : : vector<boost : : shared ptr<CenterCluster> > 40 c l u s t e r s ; 41 mutable Real e r r o r ; 42 mutable S i z e numiter ; 43

47 boost : : mpi : : communicator wor ld ; 48 } ; 49

Listing B.124: The source file of the parallel k-means algorithm. 1 // examples/mpikmean/mpikmean . cpp 2 #include<iostream> 3 #include<boost /random . hpp> 4 #include<cmath> 5 #include<mpikmean . hpp> 6 #include<boost / s e r i a l i z a t i o n / vec tor . hpp> 7

8 namespace ClusLib { 9

10 void MPIKmean : : per formCluste r ing ( ) const { 11 i n i t i a l i z a t i o n ( ) ; 12 i t e r a t i o n ( ) ; 13 } 14

15 void MPIKmean : : setupArguments ( ) { 16 numclust = boost : : any cast<Size >( 17 arguments . ge t ( ”numclust” ) ) ; 18

19 maxite r = boost : : any cast<Size >( 20 arguments . ge t ( ”maxiter ” ) ) ; 21 ASSERT( maxiter >0, ” i n v a l i d e maxiter” ) ; 22

23 s e ed = boost : : any cast<Size >( 24 arguments . ge t ( ” seed ” ) ) ; 25 ASSERT( seed >0, ” i n v a l i d e seed ” ) ; 26

27 i f ( wor ld . rank ( ) ==0) { 28 Algorithm : : setupArguments ( ) ; 29 ASSERT( ds−>i s numer i c ( ) , ” datase t i s not numeric” ) ; 30

31 ASSERT( numclust>=2 && numclust<= ds−>s i z e ( ) , 32 ” in v a l i d numclust” ) ; 33 } 34

37 void MPIKmean : : f e t chRe su l t s ( ) const { 38 std : : vector<Real> e r r o r (1 , 0 . 0 ) , t o t a l e r r o r ( 1 ) ; 39 for ( S i z e i =0; i< numObj;++i ) { 40 e r r o r [ 0 ] += d i s t ( i , CM[ i ] ) ; 41 } 42

43 reduce ( world , e r ror , t o t a l e r r o r , vplus<Real >() , 0 ) ; 44

45 i f ( wor ld . rank ( ) == 0) { 46 boost : : shared ptr<Schema> schema = ds−>schema ( ) ; 47 PCluste r ing pc ; 48 for ( S i z e i =0; i< numclust;++i ){ 49 for ( S i z e j =0; j< numAttr ; ++j ) { 50 (∗ schema ) [ j ]−> s e t c v a l ( 51 (∗ c l u s t e r s [ i ]−>c en t e r ( ) ) [ j ] , 52 c e n t e r s [ i ∗ numAttr+j ] ) ; 53 } 54 pc . add ( c l u s t e r s [ i ] ) ;

57 for ( S i z e i =0; i<CM. s i z e ( ) ; ++i ) { 58 c l u s t e r s [ CM[ i ]]−>add ((∗ ds ) [ i ] ) ; 59 } 60

61 r e s u l t s .CM = CM; 62 r e s u l t s . i n s e r t ( ”pc” , boost : : any ( pc ) ) ; 63

64 e r r o r = t o t a l e r r o r [ 0 ] ; 65 r e s u l t s . i n s e r t ( ” e r r o r ” , boost : : any ( e r r o r ) ) ; 66 r e s u l t s . i n s e r t ( ”numiter” , boost : : any ( numiter ) ) ; 67 } 68 } 69

70 void MPIKmean : : i t e r a t i o n ( ) const { 71 std : : vector<Size> nChanged ( 1 , 1 ) ; 72

73 numiter = 1 ; 74 while ( nChanged [ 0 ] > 0) { 75 nChanged [ 0 ] = 0 ; 76 S i z e s ; 77 Real dMin , dDist ; 78 std : : vector<Size> nChangedLocal ( 1 , 0 ) ; 79 std : : vector<Real> newCenters( numclust∗ numAttr , 0 . 0 ) ; 80 std : : vector<Size> newSize ( numclust , 0 ) ; 81

82 for ( S i z e i =0; i< numObj;++i ) { 83 dMin = MAXREAL; 84 for ( S i z e k=0;k< numclust;++k) { 85 dDist = d i s t ( i , k ) ; 86 i f (dMin > dDist ) { 87 dMin = dDist ; 88 s = k ; 89 } 90 } 91

92 for ( S i z e j =0; j< numAttr ; ++j ) { 93 newCenters [ s∗ numAttr+j ] += 94 data [ i ∗ numAttr+j ] ; 95 } 96 newSize [ s ] +=1; 97

98 i f ( CM[ i ] != s ){ 99 CM[ i ] = s ;

104 a l l r e d u c e ( world , nChangedLocal , nChanged , 105 vplus<Size > ( ) ) ; 106 a l l r e d u c e ( world , newCenters , c en t e r s , 107 vplus<Real > ( ) ) ; 108 std : : vector<Size> t o t a l S i z e ( numclust , 0 ) ; 109 a l l r e d u c e ( world , newSize , t o t a l S i z e , vplus<Size > ( ) ) ; 110

111 for ( S i z e k=0; k< numclust ; ++k) { 112 for ( S i z e j =0; j< numAttr ; ++j ) { 113 c e n t e r s [ k∗ numAttr+j ] /= t o t a l S i z e [ k ] ; 114 } 115 } 116

117 ++ numiter ; 118 i f ( numiter > maxite r ){ 119 break ; 120 } 121 }

124 wor ld . send (0 , 0 , CM) ; 125 } else { 126 for ( S i z e p=1; p< wor ld . s i z e ( ) ; ++p) { 127 std : : vector<Size> msg ; 128 wor ld . recv (p , 0 ,msg ) ; 129 for ( S i z e j =0; j<msg . s i z e ( ) ; ++j ) { 130 CM. push back (msg [ j ] ) ; 131 } 132 } 133 } 134 } 135

136 void MPIKmean : : i n i t i a l i z a t i o n ( ) const { 137 S i z e numRecords ; 138 S i z e rank = wor ld . rank ( ) ; 139

140 i f ( rank == 0) { 141 numRecords = ds−>s i z e ( ) ; 142 numAttr = ds−>num attr ( ) ; 143 c en t e r s . r e s i z e ( numclust ∗ numAttr ) ; 144

145 std : : vector<Intege r> index ( numRecords , 0 ) ; 146 for ( S i z e i =0; i<index . s i z e ();++ i ){ 147 index [ i ] = i ; 148 } 149

150 boost : : shared ptr<Schema> schema = ds−>schema ( ) ; 151 boost : : minstd rand generator ( s e ed ) ; 152 for ( S i z e i =0; i< numclust;++i ){ 153 boost : : un i form int<> un i d i s t (0 , numRecords−i −1); 154 boost : : va r i a t e g ene ra to r <boost : : minstd rand&, 155 boost : : un i form int<> > 156 uni ( generator , u n i d i s t ) ; 157 In t e ge r r = uni ( ) ; 158 boost : : shared ptr<Record> c r = boost : : sha r ed p t r 159 <Record>(new Record (∗ (∗ ds ) [ r ] ) ) ; 160 boost : : shared ptr<CenterCluster> c = 161 boost : : shared ptr<CenterCluster >( 162 new CenterCluste r ( c r ) ) ; 163 c−>s e t i d ( i ) ; 164 c l u s t e r s . push back ( c ) ; 165 for ( S i z e j =0; j< numAttr ; ++j ) { 166 c e n t e r s [ i ∗ numAttr + j ] = 167 (∗ schema ) [ j ]−>g e t c v a l ( (∗ ds ) ( r , j ) ) ; 168 } 169 index . e r a s e ( index . begin ()+ r ) ; 170 } 171

174 boost : : mpi : : b roadcast ( world , c en t e r s , 0 ) ; 175 boost : : mpi : : b roadcast ( world , numRecords , 0 ) ; 176 boost : : mpi : : b roadcast ( world , numAttr , 0 ) ; 177

178 S i z e nDiv = numRecords / wor ld . s i z e ( ) ; 179 S i z e nRem = numRecords % wor ld . s i z e ( ) ; 180

181 i f ( rank == 0) { 182 boost : : shared ptr<Schema> schema = ds−>schema ( ) ; 183 numObj = (nRem >0) ? nDiv+1: nDiv ; 184 data . r e s i z e ( numObj ∗ numAttr ) ; 185 CM. r e s i z e ( numObj ) ; 186 for ( S i z e i =0; i< numObj ; ++i ) { 187 for ( S i z e j =0; j< numAttr ; ++j ) { 188 data [ i ∗ numAttr +j ] =

) ; 191 } 192

193 S i z e nCount = numObj ; 194 for ( S i z e p=1; p< wor ld . s i z e ( ) ; ++p) { 195 S i z e s = (p< nRem) ? nDiv +1 : nDiv ; 196 std : : vector<Real> dv( s∗ numAttr ) ; 197 for ( S i z e i =0; i<s ; ++i ) { 198 for ( S i z e j =0; j< numAttr ; ++j ) { 199 dv [ i ∗ numAttr+j ] = 200 (∗ schema ) [ j ]−>g e t c v a l ( 201 (∗ ds ) ( i+nCount , j ) ) ; 202 } 203 } 204 nCount += s ; 205 wor ld . send (p , 0 , dv ) ; 206 } 207 } else { 208 numObj = ( rank < nRem) ? nDiv+1: nDiv ; 209 CM. r e s i z e ( numObj ) ; 210 wor ld . recv (0 , 0 , data ) ; 211 } 212 } 213

214 Real MPIKmean : : d i s t ( S i z e i , S i z e j ) const { 215 Real dDist = 0 . 0 ; 216 for ( S i z e h=0; h< numAttr ; ++h) { 217 dDist += std : : pow( data [ i ∗ numAttr + h ] 218 − c en t e r s [ j ∗ numAttr + h ] , 2 . 0 ) ; 219 } 220 return std : : pow( dDist , 0 . 5 ) ; 221 } 222 }

Listing B.125: Program to illustrate the parallel k-means algorithm. 1 // examples/mpikmean/mpimain . cpp 2 #include<c l / c l u s l i b . hpp> 3 #include<mpikmean . hpp> 4

5 #include<boost / timer . hpp> 6 #include<boost /mpi . hpp> 7 #include<boost / program options . hpp> 8 #include<iostream> 9 #include<sstream>

10 #include<iomanip> 11 #include<f un c t i ona l> 12

13 using namespace ClusLib ; 14 using namespace std ; 15 using namespace boost : : program options ; 16 namespace mpi=boost : : mpi ; 17

18 int main ( int ac , char∗ av [ ] ) { 19 try{ 20 mpi : : environment env ( ac , av ) ; 21 mpi : : communicator world ; 22

23 o p t i o n s de s c r i p t i o n desc ( ”Allowed opt ions ” ) ; 24 desc . add opt ions ( ) 25 ( ” help ” , ”produce help message” ) 26 ( ” d a t a f i l e ” , value<s t r i ng >() , ” the data f i l e ” ) 27 ( ”k” , value<Size >()−>de f au l t v a lu e (3 ) , 28 ”number o f c l u s t e r s ” ) 29 ( ” seed ” , value<Size >()−>d e f au l t v a lu e (1 ) ,

f au v a l u e (100 ) , 32 ”maximum number o f i t e r a t i o n s ” ) 33 ( ”numrun” , value<Size >()−>d e f au l t va l u e (1 ) , 34 ”number o f runs” ) ; 35

36 var iab le s map vm; 37 s t o r e ( parse command l ine ( ac , av , desc ) , vm) ; 38 no t i f y (vm) ; 39

40 i f (vm. count ( ” help ” ) | | ac==1) { 41 cout << desc << ”\n” ; 42 return 1 ; 43 } 44

45 S i z e numclust = vm[ ”k” ] . as<Size >() ; 46 S i z e maxiter = vm[ ”maxiter” ] . as<Size >() ; 47 S i z e numrun = vm[ ”numrun” ] . as<Size >() ; 48 S i z e seed = vm[ ” seed ” ] . as<Size >() ; 49

50 s t r i n g d a t a f i l e ; 51 i f (vm. count ( ” d a t a f i l e ” ) ) { 52 d a t a f i l e = vm[ ” d a t a f i l e ” ] . as<s t r i ng >() ; 53 } else { 54 cout << ” Please prov ide a data f i l e \n” ; 55 return 1 ; 56 } 57

58 boost : : shared ptr<Dataset> ds ; 59

60 i f ( world . rank ( ) ==0) { 61 DatasetReader reader ( d a t a f i l e ) ; 62 reader . f i l l ( ds ) ; 63 std : : cout<<∗ds<<std : : endl ; 64 } 65

66 boost : : t imer t ; 67 t . r e s t a r t ( ) ; 68

69 Resu l t s Res ; 70 Real a v g i t e r = 0 . 0 ; 71 Real avgerror = 0 . 0 ; 72 Real dMin = MAXREAL; 73 Real e r r o r ; 74

75 for ( S i z e i =1; i<=numrun ; ++i ) { 76 MPIKmean ca ; 77 Arguments &Arg = ca . getArguments ( ) ; 78 Arg . ds = ds ; 79 Arg . i n s e r t ( ”numclust” , numclust ) ; 80 Arg . i n s e r t ( ”maxiter” , maxiter ) ; 81 Arg . i n s e r t ( ” seed ” , seed ) ; 82 i f (numrun == 1) { 83 Arg . add i t i o n a l [ ” seed ” ] = seed ; 84 } else { 85 Arg . add i t i o n a l [ ” seed ” ] = i ; 86 } 87

88 ca . c l u s t e r i z e ( ) ; 89

90 i f ( world . rank ( ) == 0) { 91 const Resu l t s &tmp = ca . ge tRe su l t s ( ) ; 92 avg i t e r += 93 boost : : any cast<Size >(tmp . ge t ( ”numiter” ) ) ; 94 e r r o r = boost : : any cast<Real>(tmp . ge t ( ” e r r o r ” ) ) ; 95 avgerror += er r o r ; 96 i f ( e r r o r < dMin) {

103 double seconds = t . e lapsed ( ) ; 104 i f ( world . rank ( ) == 0) { 105 avg i t e r /= numrun ; 106 avgerror /= numrun ; 107

108 std : : cout<<” completed in ”<<seconds 109 <<” seconds ”<<std : : endl ; 110 std : : cout<<”number o f p roc e s s e s : ” 111 <<world . s i z e ()<< std : : endl ; 112

113 PCluste r ing pc = 114 boost : : any cast<PCluster ing >(Res . ge t ( ”pc” ) ) ; 115

116 std : : cout<<pc<<std : : endl ; 117 std : : cout<<”Number o f runs : ”<<numrun<<std : : endl ; 118 std : : cout<<”Average number o f i t e r a t i o n s : ” 119 <<avg i t e r<<std : : endl ; 120 std : : cout<<”Average e r r o r : ”<<avgerror<<std : : endl ; 121 std : : cout<<”Best e r r o r : ”<<dMin<<std : : endl ; 122

123 std : : s t r i n g p r e f i x ; 124 s i z e t ind = da t a f i l e . f i n d l a s t o f ( ’ . ’ ) ; 125 i f ( ind != std : : s t r i n g : : npos ) { 126 p r e f i x = da t a f i l e . sub s t r (0 , ind ) ; 127 } else { 128 p r e f i x = da t a f i l e ; 129 } 130 std : : s t r i ng s t r e am ss ; 131 ss<<pr e f i x<<”−kmean−k”<<numclust<<”−s ”<<seed<<” . txt ” ; 132 pc . save ( s s . s t r ( ) ) ; 133 } 134

135 return 0 ; 136 } catch ( std : : except ion& e ) { 137 std : : cout<<e . what()<< std : : endl ; 138 return 1 ; 139 } catch ( . . . ) { 140 std : : cout<<”unknown e r r o r ”<<std : : endl ; 141 return 2 ; 142 } 143 }

Listing B.126: The Makefile.am file in the directory test-suite. 1 CL TESTS = \ 2 c l u s l i b t e s t s u i t e . cpp \ 3 a t t r i n f o . hpp a t t r i n f o . cpp \ 4 datase t . hpp datase t . cpp \

7 matrix . hpp matrix . cpp \ 8 schema . hpp schema . cpp 9

10 AM CPPFLAGS = −I${ t o p s r c d i r } −I$ { t op bu i l dd i r } 11

12 bin PROGRAMS = c l u s l i b−t e s t−s u i t e 13 c lus l ib te st su i te SOURCES = ${CL TESTS} 14 c lus l ib te st su i te LDADD = ${ t o p bu i l d d i r }/ c l / l i bC lu sL ib . l a

Listing B.127: The source file of the master test suite. 1 // te s t−s u i t e / c l u s l i b t e s t s u i t e . cpp 2 #include<boost / t e s t / inc luded / un i t t e s t f r amework . hpp> 3

4 #include<iostream> 5 #include ” a t t r i n f o . hpp” 6 #include ”matrix . hpp” 7 #include ” datase t . hpp” 8 #include ”nnmap . hpp” 9 #include ” d i s t anc e . hpp”

10 #include ”schema . hpp” 11

12 using namespace boost : : un i t t e s t f r amework ; 13

14 t e s t s u i t e ∗ i n i t u n i t t e s t s u i t e ( int , char∗ [ ] ) { 15 std : : s t r i n g header = ”Test ing ClusLib” ; 16 std : : s t r i n g r u l e = std : : s t r i ng ( header . l ength ( ) , ’=’ ) ; 17

18 BOOSTMESSAGE( ru l e ) ; 19 BOOSTMESSAGE( header ) ; 20 BOOSTMESSAGE( ru l e ) ; 21

22 t e s t s u i t e ∗ t e s t = BOOST TEST SUITE(”ClusLib t e s t s u i t e ” ) ; 23

24 t e s t−>add ( Attr In foTest : : s u i t e ( ) ) ; 25 t e s t−>add ( DatasetTest : : s u i t e ( ) ) ; 26 t e s t−>add (nnMapTest : : s u i t e ( ) ) ; 27 t e s t−>add ( DistanceTest : : s u i t e ( ) ) ; 28 t e s t−>add (MatrixTest : : s u i t e ( ) ) ; 29 t e s t−>add ( SchemaTest : : s u i t e ( ) ) ; 30

31 return t e s t ; 32 }

Listing B.128: The header file of class AttrInfoTest. 1 // te s t−s u i t e / a t t r i n f o . hpp 2 #ifndef TEST ATTRINFO HPP 3 #define TEST ATTRINFO HPP 4

5 #include<boost / t e s t / un i t t e s t . hpp> 6

7 class Attr In foTest {

11 static boost : : un i t t e s t f r amework : : t e s t s u i t e ∗ s u i t e ( ) ; 12 } ; 13

Listing B.129: The source file of class AttrInfoTest. 1 // te s t−s u i t e / a t t r i n f o . cpp 2 #include ” a t t r i n f o . hpp” 3

4 #include<c l / da ta se t s / c a t t r i n f o . hpp> 5 #include<c l / da ta se t s / da t t r i n f o . hpp> 6 #include<sstream> 7

8 using namespace ClusLib ; 9 using namespace boost : : un i t t e s t f r amework ;

11 void Attr In foTest : : t e stDAttr In fo ( ) { 12 std : : s t r i ng s t r e am ss ; 13

14 DAttrInfo na i ( ”Nominal A” ) ; 15 nai . add value ( ”A” ) ; 16 nai . add value ( ”B” ) ; 17 nai . add value ( ”A” ) ; 18 ss<<nai . name()<<” has ”<<nai . num values ( ) 19 <<” va lues ”<<std : : endl ; 20

21 ss<<” B: ”<<nai . s t r t o i n t ( ”B”)<<std : : endl ; 22 ss<<” A: ”<<nai . s t r t o i n t ( ”A”)<<std : : endl ; 23 ss<<” 0 : ”<<nai . i n t t o s t r (0)<< std : : endl ; 24 ss<<” 1 : ”<<nai . i n t t o s t r (1)<< std : : endl ; 25

26 DAttrInfo naib ( ”Nominal B” ) ; 27 i f ( na i==naib ) 28 ss<<nai . name()<<”=”<<naib . name()<< std : : endl ; 29 i f ( na i !=naib ) 30 ss<<nai . name()<<”!=”<<naib . name()<< std : : endl ; 31 i f ( naib . c an c a s t t o d ( ) ) 32 ss<<naib . name()<<” can ca s t to d i s c r e t e ”<<std : : endl ; 33 else 34 ss<<naib . name()<<” can not c a s t to d i s c r e t e ”<<std : : endl ; 35

36 AttrValue nava , navb ; 37 nai . set unknown ( nava ) ; 38 BOOST CHECK( nai . is unknown ( nava ) ) ; 39

40 nai . s e t d v a l ( nava , 0 ) ; 41 nai . s e t d v a l ( navb , 1 ) ; 42 ss<<”nava navb d i s t anc e : ”<<nai . d i s t anc e ( nava , navb) 43 <<std : : endl ; 44 ss<<”nava has va lue : ”<<nai . g e t d va l ( nava)<<std : : endl ; 45 ss<<”navb has va lue : ”<<nai . g e t d va l ( navb)<<std : : endl ; 46

47 BOOSTMESSAGE( s s . s t r ( ) ) ; 48 } 49

50 void Attr In foTest : : t e s tCAt t r In f o ( ) { 51 std : : s t r i ng s t r e am ss ; 52

53 CAttrInfo r a i ( ”Real A” ) ; 54 CAttrInfo ra i b ( ”Real B” ) ; 55 CAttrInfo r a i c ( ”Real B” ) ; 56

57 i f ( r a i c==ra ib )

60 ss<<r a i . name()<<”!=”<<r a ib . name()<< std : : endl ; 61

62 i f ( r a i . c a n c a s t t o c ( ) ) 63 ss<<r a i . name()<<” can ca s t to cont inuous”<<std : : endl ; 64 else 65 ss<<r a i . name()<<” can not c a s t to cont inuous”<<std : : endl ; 66

67 AttrValue rava , ravb ; 68 r a i . set unknown ( rava ) ; 69 BOOST CHECK( r a i . is unknown ( rava ) ) ; 70

71 r a i . s e t c v a l ( rava , 2 . 0 ) ; 72 r a i . s e t c v a l ( ravb , 4 . 0 ) ; 73

74 ss<<” rava ravb d i s t anc e : ”<<r a i . d i s t anc e ( rava , ravb ) 75 <<std : : endl ; 76 ss<<” rava has va lue : ”<<r a i . g e t c v a l ( rava)<<std : : endl ; 77 ss<<” ravb has va lue : ”<<r a i . g e t c v a l ( ravb)<<std : : endl ; 78

79 BOOSTMESSAGE( s s . s t r ( ) ) ; 80 } 81

82 t e s t s u i t e ∗ Attr In foTest : : s u i t e ( ) { 83 t e s t s u i t e ∗ s u i t e = BOOST TEST SUITE( 84 ”Test ing Attr ibute Infomation ” ) ; 85

86 su i t e−>add (BOOST TEST CASE(&Attr In foTest : : t e stDAttr In fo ) ) ; 87 su i t e−>add (BOOST TEST CASE(&Attr In foTest : : t e s tCAt t r In f o ) ) ; 88

89 return s u i t e ; 90 }

Listing B.130: The header file of class DatasetTest. 1 // te s t−s u i t e / datase t . hpp 2 #ifndef TEST DATASET HPP 3 #define TEST DATASET HPP 4

5 #include<boost / t e s t / un i t t e s t . hpp> 6

7 class DatasetTest { 8 public : 9 static void t e s tData se t ( ) ;

10 static boost : : un i t t e s t f r amework : : t e s t s u i t e ∗ s u i t e ( ) ; 11 } ; 12

Listing B.131: The source file of class DatasetTest. 1 // te s t−s u i t e / datase t . cpp 2 #include ” datase t . hpp” 3 #include<c l / da ta s e t s / datase t . hpp> 4 #include<c l / u t i l i t i e s / da ta s e t r e ade r . hpp> 5 #include<sstream> 6 #include<f stream> 7 #include<iostream> 8

f r ; 11

12 void DatasetTest : : t e s tData se t ( ) { 13 BOOSTMESSAGE( ”Test ing Dataset” ) ; 14

15 std : : s t r i n g f i leName ( ” . . / . . / Data/ i r i s . data ” ) ; 16 DatasetReader reader ( f i leName ) ; 17 boost : : shared ptr<Dataset> ds ; 18 reader . f i l l ( ds ) ; 19

20 std : : cout<<”Num re co rd s : ”<<ds−>s i z e ()<< std : : endl 21 <<”Num a t t r i b u t e s : ”<<ds−>s i z e ()<< std : : endl 22 <<”Num ca t e g o r i e s : ” 23 <<ds−>schema()−> l a b e l I n f o ()−>num values()<< std : : endl ; 24

26 Dataset ds2 = ∗ds ; 27 std : : cout<<”Num re co rd s : ”<<ds2 . s i z e ()<< std : : endl 28 <<”Num a t t r i b u t e s : ”<<ds2 . s i z e ()<< std : : endl 29 <<”Num ca t e g o r i e s : ” 30 <<ds2 . schema()−> l a b e l I n f o ()−>num values()<< std : : endl ; 31

34 t e s t s u i t e ∗ DatasetTest : : s u i t e ( ) { 35 t e s t s u i t e ∗ s u i t e = BOOST TEST SUITE(”Test ing Dataset” ) ; 36

37 su i t e−>add (BOOST TEST CASE(&DatasetTest : : t e s tData se t ) ) ; 38

39 return s u i t e ; 40 }

Listing B.132: The header file of class DistanceTest. 1 // te s t−s u i t e / dis tance . hpp 2 #ifndef TEST DISTANCE HPP 3 #define TEST DISTANCE HPP 4

5 #include<boost / t e s t / un i t t e s t . hpp> 6

7 class DistanceTest { 8 public : 9 static void t e s tEuc l i d ean ( ) ;

10 static void te stMahalanobi s ( ) ; 11 static boost : : un i t t e s t f r amework : : t e s t s u i t e ∗ s u i t e ( ) ; 12 } ; 13

Listing B.133: The source file of class DistanceTest. 1 // te s t−s u i t e / dis tance . cpp 2 #include ” d i s t anc e . hpp” 3

4 #include<c l / da ta s e t s / datase t . hpp> 5 #include<c l / d i s t anc e s / euc l i d eand i s t anc e . hpp> 6 #include<c l / d i s t anc e s /mahalanob i sd i s tance . hpp> 7 #include<c l / d i s t anc e s /minkowskidistance . hpp> 8 #include<c l / u t i l i t i e s / da ta s e t r e ade r . hpp>

12 using namespace ClusLib ; 13 using namespace boost : : un i t t e s t f r amework ; 14

15 void DistanceTest : : t e s tEuc l i d e an ( ) { 16 BOOSTMESSAGE( ”Test ing Eucl idean ” ) ; 17

18 std : : s t r i n g f i leName ( ” . . / . . / Data/ i r i s . data ” ) ; 19 DatasetReader reader ( f i leName ) ; 20 boost : : shared ptr<Dataset> ds ; 21 reader . f i l l ( ds ) ; 22

23 Euc l ideanDistance ed ; 24 std : : vector<Real> d i s t ( ds−>s i z e ( ) ) ; 25 for ( S i z e i =0; i<ds−>s i z e ();++ i ){ 26 d i s t [ i ] = ed ((∗ ds ) [ 0 ] , ( ∗ ds ) [ i ] ) ; 27 } 28

29 std : : s t r i ng s t r e am ss ; 30 for ( S i z e i =0; i<d i s t . s i z e ();++ i ){ 31 ss<<” d i s t anc e between r e c o rd s 0 and ”<<(∗ds ) [ i ]−>g e t i d ( ) 32 <<” : ”<<d i s t [ i ]<< ’ \n ’ ; 33 } 34

36 std : : o f s t ream out ; 37 out . open ( ” i r i s d i s t . txt ” ) ; 38 out<<s s . s t r ( ) ; 39 out . c l o s e ( ) ; 40 } 41

42 void DistanceTest : : t e stMahalanobi s ( ) { 43 BOOSTMESSAGE( ”Test ing Mahalanobis” ) ; 44

45 std : : s t r i n g f i leName ( ” . . / . . / Data/ b e z d e k I r i s . data” ) ; 46 DatasetReader reader ( f i leName ) ; 47 boost : : shared ptr<Dataset> ds ; 48 reader . f i l l ( ds ) ; 49

50 boost : : shared ptr<Schema> schema = ds−>schema ( ) ; 51 ublas : : matrix<Real> data ; 52 ublas : : vector<Real> mu; 53

54 data . r e s i z e ( ds−>s i z e ( ) , ds−>num attr ( ) ) ; 55 mu. r e s i z e ( ds−>num attr ( ) ) ; 56 for ( S i z e j =0; j<mu. s i z e ( ) ; ++j ) { 57 mu( j ) = 0 . 0 ; 58 } 59 for ( S i z e i =0; i<ds−>s i z e ( ) ; ++i ) { 60 for ( S i z e j =0; j<ds−>num attr ( ) ; ++j ) { 61 data ( i , j ) = (∗ schema ) [ j ]−>g e t c v a l ( (∗ ds ) ( i , j ) ) ; 62 mu( j ) += data ( i , j ) ; 63 } 64 } 65

66 ublas : : symmetric matrix<Real> cov ; 67 cov . r e s i z e ( ds−>num attr ( ) , ds−>num attr ( ) ) ; 68 for ( S i z e i =0; i<ds−>num attr ( ) ; ++i ) { 69 for ( S i z e j =0; j<=i ; ++j ) { 70 cov ( i , j ) = ( inner prod ( column ( data , i ) , 71 column ( data , j ) ) − mu( i )∗mu( j ) / ds−>s i z e ( ) ) 72 / ( ds−>s i z e ( ) − 1 . 0 ) ; 73 } 74 } 75

79 boost : : shared ptr<Record> r (new Record ( ds−>schema ( ) ) ) ; 80 for ( S i z e i =0; i<r−>s i z e ( ) ; ++i ) { 81 (∗ schema ) [ i ]−> s e t c v a l ( (∗ r ) [ i ] , mu( i )/ ds−>s i z e ( ) ) ; 82 } 83

84 MahalanobisDistance md( cov ) ; 85 std : : vector<Real> d i s t ( ds−>s i z e ( ) ) ; 86 for ( S i z e i =0; i<ds−>s i z e ();++ i ){ 87 d i s t [ i ] = md( r , ( ∗ ds ) [ i ] ) ; 88 } 89

90 std : : s t r i ng s t r e am ss ; 91 for ( S i z e i =0; i<d i s t . s i z e ();++ i ){ 92 ss<<” d i s t anc e between r e c o rd s 0 and ”<<(∗ds ) [ i ]−>g e t i d ( ) 93 <<” : ”<<d i s t [ i ]<< ’ \n ’ ; 94 } 95

97 std : : o f s t ream out ; 98 out . open ( ” i r i s maha l . txt ” ) ; 99 out<<s s . s t r ( ) ;

100 out . c l o s e ( ) ; 101 } 102

103 t e s t s u i t e ∗ DistanceTest : : s u i t e ( ) { 104 t e s t s u i t e ∗ s u i t e = BOOST TEST SUITE(”Test ing Distances ” ) ; 105

106 su i t e−>add (BOOST TEST CASE(&DistanceTest : : t e s tEuc l i d e an ) ) ; 107 su i t e−>add (BOOST TEST CASE(&DistanceTest : : t e stMahalanob is ) ) ; 108

109 return s u i t e ; 110 }

Listing B.134: The header file of class nnMapTest. 1 // te s t−s u i t e /nnmap . hpp 2 #ifndef TEST NNMAP HPP 3 #define TEST NNMAP HPP 4

5 #include<boost / t e s t / un i t t e s t . hpp> 6

7 class nnMapTest { 8 public : 9 static void test i i rMapA ( ) ;

10 static void te st i i iMapB ( ) ; 11 static boost : : un i t t e s t f r amework : : t e s t s u i t e ∗ s u i t e ( ) ; 12 } ; 13

Listing B.135: The source file of class nnMapTest. 1 // te s t−s u i t e /nnmap . cpp 2 #include ”nnmap . hpp” 3 #include<c l / da ta s e t s / datase t . hpp> 4 #include<c l / u t i l i t i e s / da ta s e t r e ade r . hpp> 5 #include<c l / u t i l i t i e s /nnmap . hpp>

8 #include<f stream> 9 #include<iostream>

11 using namespace ClusLib ; 12 using namespace boost : : un i t t e s t f r amework ; 13

14 void nnMapTest : : test i i rMapA () { 15 BOOSTMESSAGE( ”Test ing iirMapA” ) ; 16

17 std : : s t r i n g f i leName ( ” . . / . . / Data/ b e z d e k I r i s . data” ) ; 18 DatasetReader reader ( f i leName ) ; 19 boost : : shared ptr<Dataset> ds ; 20 reader . f i l l ( ds ) ; 21

22 Euc l ideanDistance ed ; 23 iirMapA dm; 24 S i z e n = ds−>s i z e ( ) ; 25 for ( S i z e i =0; i<n;++i ){ 26 for ( S i z e j=i +1; j<n;++j ){ 27 dm. add item ( i , j , ed ((∗ ds ) [ i ] , (∗ ds ) [ j ] ) ) ; 28 } 29 } 30

31 std : : s t r i ng s t r e am ss ; 32 for ( S i z e i =0; i<n;++i ){ 33 for ( S i z e j=i +1; j<n;++j ){ 34 ss<<i+1<<” , ”<<j+1<<” , ”<<dm( j , i )<<std : : endl ; 35 } 36 } 37

38 std : : o f s t ream out ( ” i r i sdm . csv ” ) ; 39 out<<s s . s t r ( ) ; 40 out . c l o s e ( ) ; 41 } 42

43 void nnMapTest : : t e st i i iMapB ( ) { 44 BOOSTMESSAGE( ”Test ing ii iMapB” ) ; 45

46 i i iMapB dm; 47 S i z e n = 10 ; 48 for ( S i z e i =0; i<n;++i ){ 49 for ( S i z e j =0; j<n;++j ){ 50 dm. add item ( i , j , i+j ) ; 51 } 52 } 53

54 std : : s t r i ng s t r e am ss ; 55 for ( S i z e i =0; i<n;++i ){ 56 for ( S i z e j =0; j<n;++j ){ 57 ss<<i+1<<” , ”<<j+1<<” , ”<<dm( j , i )<<std : : endl ; 58 } 59 } 60

61 std : : o f s t ream out ( ” i i rmapb . csv ” ) ; 62 out<<s s . s t r ( ) ; 63 out . c l o s e ( ) ; 64 } 65

66 t e s t s u i t e ∗ nnMapTest : : s u i t e ( ) { 67 t e s t s u i t e ∗ s u i t e = BOOST TEST SUITE(”Test ing nnMap” ) ; 68

69 su i t e−>add (BOOST TEST CASE(&nnMapTest : : test i i rMapA ) ) ; 70 su i t e−>add (BOOST TEST CASE(&nnMapTest : : t e st i i iMapB ) ) ; 71

72 return s u i t e ;

Listing B.136: The header file of class MatrixTest. 1 // te s t−s u i t e /matrix . hpp 2 #ifndef TEST MATRIX HPP 3 #define TEST MATRIX HPP 4

5 #include<boost / t e s t / un i t t e s t . hpp> 6

7 class MatrixTest { 8 public : 9 static void te stChole sky ( ) ;

10 static void t e s tT r i a n gu l a r I nve r s e ( ) ; 11 static boost : : un i t t e s t f r amework : : t e s t s u i t e ∗ s u i t e ( ) ; 12 } ; 13

Listing B.137: The source file of class MatrixTest. 1 // te s t−s u i t e /matrix . cpp 2 #include ”matrix . hpp” 3

4 #include<c l / u t i l i t i e s /matrix . hpp> 5 #include<boost /numeric/ ub las / i o . hpp> 6 #include<sstream> 7 #include<f stream> 8

9 using namespace ClusLib ; 10 using namespace boost : : un i t t e s t f r amework ; 11 using namespace boost : : numeric : : ub las ; 12

13 void MatrixTest : : t e stChole sky ( ) { 14 BOOSTMESSAGE( ”Test ing Cholesky ” ) ; 15

16 symmetric matrix<Real> A(3 , 3 ) ; 17 A(0 , 0 ) = 3 . 1325 ; 18 A(1 , 0 ) = 0 . 9748 ; A(1 , 1 ) = 1 . 4862 ; 19 A(2 , 0 ) = −0.7613; A(2 , 1 ) = −0.8402; A(2 , 2 ) = 0 . 7390 ; 20

21 t r i angu l a r mat r i x<Real> L( 3 , 3 ) ; 22 S i z e k = chol (A,L ) ; 23

26 std : : cout<<”A: \n” ; 27 for ( S i z e i =0; i<A. s i z e 1 ( ) ; ++i ) { 28 std : : cout<<row (A, i )<<std : : endl ; 29 } 30 std : : cout<<”L : \n” ; 31 for ( S i z e i =0; i<L . s i z e 1 ( ) ; ++i ) { 32 std : : cout<<row (L , i )<<std : : endl ; 33 } 34 } 35

36 void MatrixTest : : t e s tT r i a n gu l a r In ve r s e ( ) { 37 BOOSTMESSAGE( ”Test ing Tr iangu lar Inve r s e ” ) ; 38

39 t r i angu l a r mat r i x<Real> L( 4 , 4 ) ;

. 42 L(2 , 0 ) = 1 . 5389 ; L(2 , 1 ) = −0.5794; L(2 , 2 ) = 0 . 6421 ; 43 L(3 , 0 ) = 0 . 6235 ; L(3 , 1 ) = −0.2072; L(3 , 2 ) = 0 . 3365 ; 44 L(3 ,3)=0.1900 ; 45

46 t r i angu l a r mat r i x<Real> iL ( 4 , 4 ) ; 47 S i z e k = t r i a n gu l a r ma t r i x i n v e r s e (L , iL ) ; 48

51 std : : cout<<”L : \n” ; 52 for ( S i z e i =0; i<L . s i z e 1 ( ) ; ++i ) { 53 std : : cout<<row (L , i )<<std : : endl ; 54 } 55 std : : cout<<” iL : \n” ; 56 for ( S i z e i =0; i<iL . s i z e 1 ( ) ; ++i ) { 57 std : : cout<<row ( iL , i )<<std : : endl ; 58 } 59 } 60

61 t e s t s u i t e ∗ MatrixTest : : s u i t e ( ) { 62 t e s t s u i t e ∗ s u i t e = BOOST TEST SUITE(”Test ing Matrix” ) ; 63

64 su i t e−>add (BOOST TEST CASE(&MatrixTest : : t e stChole sky ) ) ; 65 su i t e−>add (BOOST TEST CASE( 66 &MatrixTest : : t e s tT r i a n gu l a r In ve r s e ) ) ; 67

68 return s u i t e ; 69 }

Listing B.138: The header file of class SchemaTest. 1 // te s t−s u i t e /nnmap . hpp 2 #ifndef TEST NNMAP HPP 3 #define TEST NNMAP HPP 4

5 #include<boost / t e s t / un i t t e s t . hpp> 6

7 class nnMapTest { 8 public : 9 static void test i i rMapA ( ) ;

10 static void te st i i iMapB ( ) ; 11 static boost : : un i t t e s t f r amework : : t e s t s u i t e ∗ s u i t e ( ) ; 12 } ; 13

Listing B.139: The source file of class SchemaTest. 1 // te s t−s u i t e /nnmap . cpp 2 #include ”nnmap . hpp” 3 #include<c l / da ta s e t s / datase t . hpp> 4 #include<c l / u t i l i t i e s / da ta s e t r e ade r . hpp> 5 #include<c l / u t i l i t i e s /nnmap . hpp> 6 #include<c l / d i s t anc e s / euc l i d eand i s t anc e . hpp> 7 #include<sstream> 8 #include<f stream> 9 #include<iostream>

f r ; 13

14 void nnMapTest : : test i i rMapA () { 15 BOOSTMESSAGE( ”Test ing iirMapA” ) ; 16

17 std : : s t r i n g f i leName ( ” . . / . . / Data/ b e z d e k I r i s . data” ) ; 18 DatasetReader reader ( f i leName ) ; 19 boost : : shared ptr<Dataset> ds ; 20 reader . f i l l ( ds ) ; 21

22 Euc l ideanDistance ed ; 23 iirMapA dm; 24 S i z e n = ds−>s i z e ( ) ; 25 for ( S i z e i =0; i<n;++i ){ 26 for ( S i z e j=i +1; j<n;++j ){ 27 dm. add item ( i , j , ed ((∗ ds ) [ i ] , (∗ ds ) [ j ] ) ) ; 28 } 29 } 30

31 std : : s t r i ng s t r e am ss ; 32 for ( S i z e i =0; i<n;++i ){ 33 for ( S i z e j=i +1; j<n;++j ){ 34 ss<<i+1<<” , ”<<j+1<<” , ”<<dm( j , i )<<std : : endl ; 35 } 36 } 37

38 std : : o f s t ream out ( ” i r i sdm . csv ” ) ; 39 out<<s s . s t r ( ) ; 40 out . c l o s e ( ) ; 41 } 42

43 void nnMapTest : : t e st i i iMapB ( ) { 44 BOOSTMESSAGE( ”Test ing ii iMapB” ) ; 45

46 i i iMapB dm; 47 S i z e n = 10 ; 48 for ( S i z e i =0; i<n;++i ){ 49 for ( S i z e j =0; j<n;++j ){ 50 dm. add item ( i , j , i+j ) ; 51 } 52 } 53

54 std : : s t r i ng s t r e am ss ; 55 for ( S i z e i =0; i<n;++i ){ 56 for ( S i z e j =0; j<n;++j ){ 57 ss<<i+1<<” , ”<<j+1<<” , ”<<dm( j , i )<<std : : endl ; 58 } 59 } 60

61 std : : o f s t ream out ( ” i i rmapb . csv ” ) ; 62 out<<s s . s t r ( ) ; 63 out . c l o s e ( ) ; 64 } 65

66 t e s t s u i t e ∗ nnMapTest : : s u i t e ( ) { 67 t e s t s u i t e ∗ s u i t e = BOOST TEST SUITE(”Test ing nnMap” ) ; 68

69 su i t e−>add (BOOST TEST CASE(&nnMapTest : : test i i rMapA ) ) ; 70 su i t e−>add (BOOST TEST CASE(&nnMapTest : : t e st i i iMapB ) ) ; 71

72 return s u i t e ; 73 }