In [6]:

```
%reset -f
from numpy import genfromtxt, savetxt
import numpy as np
#pollution data
Data = genfromtxt(open('data/pollution.csv','r'),dtype=float, delimiter=',')[1:]
Labels = Data[:,0]
Data = Data[:,1:]
header= genfromtxt(open('data/pollution.csv','r'),delimiter=',',dtype = None)[0]
header = header[1:]
header = header[np.newaxis,:]
print 'size of data set: ', Data.shape
# %reset -f
import sys
import numpy as np
import sompylib.som_structure as SOM
from matplotlib import pyplot as plt
msz0 = 50
msz1 = 50
cd = msz0*msz1*1*1
dlen = 100*1000*1*1*1#+224
dim = 3
```

In [34]:

```
from pandas.tools.plotting import scatter_matrix
from pandas import Series, DataFrame
import pandas as pd
df = DataFrame(data = Data[1:1000,:], columns= header.T)
fig = scatter_matrix(df, alpha=0.2, figsize=(10, 10), diagonal='kde')
```

In [7]:

```
reload(sys.modules['sompylib.som_structure'])
sm = SOM.SOM('sm', Data, mapsize = [msz0, msz1],norm_method = 'var',initmethod='pca')
sm.init_map()
setattr(sm, 'compname', header)
sm.view_map(which_dim = 'all')
```

In [8]:

```
sm.train(n_job = 1, shared_memory = 'no',verbose='on')
```

In [9]:

```
sm.view_map(which_dim = 'all')
```

In [13]:

```
sm.view_map(which_dim= 'all' , pack='Yes',text_size=6,save='No',save_dir='')
```

In [14]:

```
sm.hit_map()
```

In [51]:

```
%reset -f
import numpy as np
import sompylib.som_structure as SOM
from matplotlib import pyplot as plt
import sys
msz0 = 50
msz1 = 50
cd = msz0*msz1*1*1
dlen = 81920*1*1*1*1#+224
# dlen = 200*1000
dim = 256
Data = np.random.randint(0,2,size = (dlen,dim))
```

In [52]:

```
reload(sys.modules['sompylib.som_structure'])
sm = SOM.SOM('sm', Data, mapsize = [msz0, msz1],norm_method = 'var')
sm.train()
```

In [115]:

```
from pandas.tools.plotting import scatter_matrix
from pandas import Series, DataFrame
import pandas as pd
data = Data[48*1000:58*1000]
Target = 8
print 'Variable to predict: ', header[0][Target]
pred = sm.predict_by(data,Target, K =1)
real = data[:,Target]
accuracy = (1-np.abs((pred-real)/real))*100
print 'median accuracy', np.median(accuracy)
print 'mean accuracy', np.mean(accuracy)
print 'std accuracy', np.std(accuracy)
print 'min accuracy', np.min(accuracy)
print 'max accuracy', np.max(accuracy)
DF = DataFrame({'True Value': real[1:100], 'Predicted Value':pred[1:100]})
fig = plt.figure();
DF.plot(DF.index,DF.columns[:],label=header[0][Target],colormap='jet',x_compat=True,style='.-'); plt.legend(loc='best',bbox_to_anchor = (1.0, 1.0),fontsize = 'medium')
plt.ylabel('values')
font = {'size' : 12}
plt.rc('font', **font)
fig.set_size_inches(10,10)
```

In [ ]:

```
```