* %ARI - A SAS Macro written by von Borries (2008) PhD Thesis, Kansas State University to * calculate the Adjusted Rand statistic (Hubert & Arabie, 1985); * Coding was updated from Fisher and Hoffman (1988) to work in SAS Ver 9x; * Additional modification by Bowley (2008) for direct listing of result; * Two options for handling missing clusters: * I) Each missing value becomes its own cluster. MISSING=SEPARATE ; * 2) M*N table ---> (M+I)*(N+I). MISSING=COMBINED; %macro tab(data=,missing=); proc iml; start a2; do i=1 to nrow(z); t[z[i,1],z[i,2]] = z[i,3]; end; finish; start separate; do k = 1 to nrow(z); if z0[k,1] = . then if z0[k,2] = . then do; in = i:i+z0[k,3]-1; jn = j:j+z0[k,3]-1; z = z // (in || jn || j(z0[k,3],1,1)); i = i + z0[k,3]; j = j + z0[k,3]; nomiss = 0; end; else do; in = i:i+z0[k,3]-1; z = z // (in || j(z0[k,3],1,z0[k,2]) || j(z0[k,3],1,1)); i = i + z0[k,3]; nomiss = 0; end; else if z[k,2] = . then do; jn = j:j+z0[k,3]-1; z = z // (j(z0[k,3],1,z0[k,1]) || jn || j(z0[k,3],1,1)); j = j + z0[k,3]; nomiss = 0; end; else keep = keep || k; end; if nomiss = 0 then z = z[keep || (nrow(z0)+1:nrow(z)),]; z0 = z; z[rank(z[,1]),] = z0; finish; start combined; x0 = loc(z[,1] = .); y0 = loc(z[,2] = .); if ncol(x0) > 0 then z[x0,1] = i; if ncol(y0) > 0 then z[y0,2] = j; if ((ncol(x0) > 0) | (ncol(y0) > 0)) then nomiss = 0; finish; use tabari; read all into z0; i = max(z0[,1])+1; j = max(z0[,2])+1; z = z0; nomiss = 1; if ((&missing = 'separate') | (&missing = '0')) then call separate; if ((&missing = 'combined') | (&missing = '1')) then call combined; t = j(max(z[,1]),max(z[,2]),0); call a2; cm = t[+,]; sscm = ssq(cm); rm = t[,+]; ssrm = ssq(rm); tot = rm[+]; sst = ssq(t); nn = tot * tot; n2 = 0.5 * tot * (tot - 1); n0 = -0.5 * (ssrm + sscm) + n2; ns = sst + n0; nc = ssrm * sscm / nn + n0 + (nn - ssrm) * (nn - sscm) / (nn * (tot - 1)); adjrand = (ns - nc) / (n2 - nc); TotalNumber=tot; print nomiss sscm ssrm tot sst; print TotalNumber; res = nomiss || sscm || ssrm || tot || sst || adrand; print adjrand; create adjrand var{adrand}; append from adjrand; quit; %mend tab; %macro freq(data=,x=,y=) ;*/ store des='ARI - FREQ'; ; proc freq data = &data; tables &x * &y / out = tabari(rename=(&x = x &y = y) drop=percent) noprint; run; %mend freq; %macro ari(data,x,y) ;*/ store des='ARI - MAIN'; %let missing='separate'; data &data; set &data; z = &x + 1; k = &y + 1; run; %freq(data=&data,x=z,y=k); %tab(data=&data,missing=&missing); data &data(drop=z k); set &data; run; %mend ari;