## usage: cat corpus.dat | python3 classifier.py f0.mem >corpus.out import sys from math import exp ## ---------------------------------- ## Global variables lbd={} ## lambda for each class-feature pair bias={} ## bias for each class nclas=0 ## number of classes ## --------------------------------- ## Load MLE model in given file into 'ptr' def loadModel(fname) : global nclas,bias,lbd; # open file model = open(fname, encoding='utf-8') # read first line, class biasses lin=model.readline() bias=lin.split() bias=bias[1:len(bias)] nclas = len(bias) # load rest of lines into lbd lin=model.readline(); while (lin!="") : t=lin.split() for i in range(1,len(t)) : lbd[t[0]+"#"+str(i-1)]=float(t[i]) lin=model.readline(); ## MAIN --------------------- ## -- load MLE trigram model loadModel(sys.argv[1]) ntot=0 nok=0 ## classify each input example line=sys.stdin.readline() while (line!="") : feat=line.split() ## keep right answer, for evaluation tagOK = int(feat[0]) ## compute P(c|ex) for each class c p=[] z=0 for c in range(0,nclas) : ## compute s = sum of lambda_i * f_i for each example feature i ## The requiered info is contained in lbd[] and in the line s = 0 for i in range(1,len(feat),2) : s = s + lbd[feat[i]+"#"+str(c)] * float(feat[i+1]) ## add value to resulting vector p p.append(exp(s+float(bias[c]))) ## accumulate normalization factor z = z + p[c] # normalize class probabilities, and compute maximum mx=0 for c in range(0,nclas) : p[c] = p[c]/z if (p[c]>p[mx]): mx=c # output chosen class sys.stdout.write(str(mx)) # print all probs if we want to emulate "megam -predict" for c in range(0,nclas) : sys.stdout.write(" "+str(p[c])) sys.stdout.write("\n") ## evaluation if (mx==tagOK): nok = nok+1 ntot = ntot+1 ## next example line=sys.stdin.readline() # print accuracy results acc= 100.0*nok/ntot sys.stderr.write("Accuracy= "+str(nok)+"/"+str(ntot)+" ("+str(acc)+"%)\n")