5 CLS 6 open "items.txt" for input as #2 7 input #2, setname$ 8 print "clustering programme using informaion methods" 10 PRINT "Garry Law's information based clustering" 15 INPUT #2, l, k 20 open "results.dat" for output as #1 21 write #1, "clustering programme using informaion methods" 22 write #1, setname$ 23 write #1, "Number 0f Items", l 24 write #1, "Number 0f characters", k 25 print setname$ 26 print "Number 0f Items", l 27 print "Number 0f characters", k 28 kmax = k 29 if kmax > 9 then 35 30 kmax = 9 35 DIM a(kmax, l), M(l), E(l) 38 FOR J = 1 TO l 39 Input #2, identifier$ 40 FOR i = 1 TO k 45 INPUT #2, a(i, J) 50 NEXT i 51 write #1, j, identifier$, a(1,j), a(2,j), a(3,j), a(4,j), a(5,j), a(6,j), a(7,j), a(8,j), a(9,j) 55 NEXT J 100 t = 0 110 n = 0 111 o = 0 120 FOR J = 1 TO l 130 FOR i = 1 TO k 131 z = a(i, J) 140 n = n + z 141 IF z = 0 THEN 150 142 o = o + z * LOG(z) 150 NEXT i 160 M(J) = n - t 170 t = n 180 E(J) = 0 190 NEXT J 200 t = LOG(n) - o / n 210 f = 0 211 z = 0 220 FOR J = 1 TO l 230 z = z + M(J) 240 IF M(J) = o THEN 260 250 f = f + M(J) * LOG(M(J)) 260 NEXT J 270 f = (LOG(n) * z - f) / n 280 y = 0 290 g = 0 300 FOR i = 1 TO k 310 z = 0 320 FOR J = 1 TO l 330 z = z + a(i, J) 340 NEXT J 350 IF z = 0 THEN 380 360 g = g + z * LOG(z) 370 y = y + z 380 NEXT i 390 g = LOG(n) - g / n 395 h = f + g - t 400 CLS 410 PRINT " H(CHAR) H(ITEMS) J Joined DEL-J R": 411 write #1, " H(CHAR) H(ITEMS) J Joined DEL-J R": 420 : 600 x = -100 610 u = l - 1 620 FOR J = 1 TO u 630 IF E(J) = 1 THEN 730 640 v = J + 1 650 FOR w = v TO l 655 IF E(w) = 1 THEN 720 660 GOSUB 2000 670 IF b < x THEN 720 680 x = b 690 y = p 695 r = q 700 c = J 710 d = w 720 NEXT w 730 NEXT J 740 PRINT ; g; f; h: 741 write #1, g, f, h: 750 PRINT : 760 t = t + r 770 f = f + y 775 h = f + g - t 790 r = h / (f + g) 800 PRINT " "; c; d; x; r: 801 write #1, " ", c, d; x; r: 810 E(d) = 1 820 FOR i = 1 TO k 830 a(i, c) = a(i, c) + a(i, d) 840 NEXT i 850 M(c) = M(c) + M(d) 860 z = 0 870 INPUT "press return to continue"; i 880 FOR J = 1 TO l 890 IF E(J) = 0 THEN z = z + 1 900 NEXT J 910 IF z > 2 THEN GOTO 600 920 GOTO 3000 2000 s = M(J) 2010 z = M(w) 2020 p = (s * LOG(s) + z * LOG(z) - (s + z) * LOG(s + z)) / n 2030 q = 0 2040 FOR i = 1 TO k 2050 s = a(i, J) 2060 z = a(i, w) 2070 IF s = 0 THEN 2090 2080 q = q + s * LOG(s) 2090 IF z = 0 THEN 2110 2100 q = q + z * LOG(z) 2110 s = s + z 2120 IF s = 0 THEN 2140 2130 q = q - s * LOG(s) 2140 NEXT i 2150 q = q / n 2160 b = p - q 2170 RETURN 3000 PRINT g; f; h: 3001 PRINT "Only two groups left clustering stops" 3005 PRINT "n = "; n 3006 print "see file results.dat for this information" 3010 write #1, g; f; h: 3011 write #1, "Only two groups left clustering stops" 3012 write #1, "n = "; n 3030 END