code.Sym
1import math 2 3class Sym: 4 def __init__(self, c="", s=0): 5 self.n = 0 # items seen 6 self.at = c if c else 0 # column position 7 self.name = s or "" # column name 8 self._has = {} # kept data 9 10 def add(self, v): 11 if v != "?": 12 self.n += 1 13 if v in self._has: 14 self._has[v]+=1 15 else: 16 self._has[v] = 1 17 18 def mid(self): 19 most = -1 20 for k, v in self._has.items(): 21 if v > most: 22 mode = k 23 most = v 24 return mode 25 26 def div(self): 27 def fun(p): 28 return p * math.log(p, 2) 29 e = 0 30 for _, _n in self._has.items(): 31 if _n > 0: 32 e = e - fun(_n/self.n) 33 return e
class
Sym:
4class Sym: 5 def __init__(self, c="", s=0): 6 self.n = 0 # items seen 7 self.at = c if c else 0 # column position 8 self.name = s or "" # column name 9 self._has = {} # kept data 10 11 def add(self, v): 12 if v != "?": 13 self.n += 1 14 if v in self._has: 15 self._has[v]+=1 16 else: 17 self._has[v] = 1 18 19 def mid(self): 20 most = -1 21 for k, v in self._has.items(): 22 if v > most: 23 mode = k 24 most = v 25 return mode 26 27 def div(self): 28 def fun(p): 29 return p * math.log(p, 2) 30 e = 0 31 for _, _n in self._has.items(): 32 if _n > 0: 33 e = e - fun(_n/self.n) 34 return e