## usage:   cat corpus.dat | python3 classifier.py f0.mem >corpus.out

import sys
from math import exp

## ----------------------------------
## Global variables 
lbd={}  ## lambda for each class-feature pair
bias={}    ## bias for each class
nclas=0    ## number of classes

## ---------------------------------
## Load MLE model in given file into 'ptr'
def loadModel(fname) :
  global nclas,bias,lbd;

  # open file
  model = open(fname, encoding='utf-8')

  # read first line, class biasses
  lin=model.readline()
  bias=lin.split()
  bias=bias[1:len(bias)]
  nclas = len(bias)

  # load rest of lines into lbd
  lin=model.readline(); 
  while (lin!="") :
    t=lin.split()
    for i in range(1,len(t)) :      
      lbd[t[0]+"#"+str(i-1)]=float(t[i])
    lin=model.readline(); 


## MAIN ---------------------

## -- load MLE trigram model
loadModel(sys.argv[1])

ntot=0
nok=0
## classify each input example
line=sys.stdin.readline()
while (line!="") :
  feat=line.split()

  ## keep right answer, for evaluation
  tagOK = int(feat[0])

  ## compute P(c|ex) for each class c
  p=[]
  z=0
  for c in range(0,nclas) :
    ## compute s = sum of lambda_i * f_i for each example feature i
    ## The requiered info is contained in lbd[] and in the line
    s = 0
    for i in range(1,len(feat),2) :
        s = s + lbd[feat[i]+"#"+str(c)] * float(feat[i+1])

    ## add value to resulting vector p
    p.append(exp(s+float(bias[c])))

    ## accumulate normalization factor
    z = z + p[c]

  # normalize class probabilities, and compute maximum
  mx=0
  for c in range(0,nclas) :
    p[c] = p[c]/z
    if (p[c]>p[mx]):
       mx=c

  # output chosen class 
  sys.stdout.write(str(mx))
  # print all probs if we want to emulate "megam -predict"
  for c in range(0,nclas) :
    sys.stdout.write(" "+str(p[c]))
  sys.stdout.write("\n")

  ## evaluation
  if (mx==tagOK):
    nok = nok+1
  ntot = ntot+1

  ## next example
  line=sys.stdin.readline()


# print accuracy results

acc= 100.0*nok/ntot
sys.stderr.write("Accuracy= "+str(nok)+"/"+str(ntot)+" ("+str(acc)+"%)\n")