from fastai.tabular.all import *
from ipfspy.ipfshttpapi import IPFSApiUsing IPFS in ML with ipfspy.ipfshttpapi
    A Tutorial using IPFS and ML (using fastai)
  
In this tutorial, we will see how we can use IPFS as the storage for datasets and model in ML workflow.
Add a dataset to IPFS using infura node
api = IPFSApi()Changed to local node
api.change_gateway_type = 'infura'Changed to infura node
res, obj = api.add_items('output/adult_data.csv'); obj[{'Name': 'adult_data.csv', 'Bytes': 262144},
 {'Name': 'adult_data.csv', 'Bytes': 524288},
 {'Name': 'adult_data.csv', 'Bytes': 786432},
 {'Name': 'adult_data.csv', 'Bytes': 1048576},
 {'Name': 'adult_data.csv', 'Bytes': 1310720},
 {'Name': 'adult_data.csv', 'Bytes': 1572864},
 {'Name': 'adult_data.csv', 'Bytes': 1835008},
 {'Name': 'adult_data.csv', 'Bytes': 2097152},
 {'Name': 'adult_data.csv', 'Bytes': 2359296},
 {'Name': 'adult_data.csv', 'Bytes': 2621440},
 {'Name': 'adult_data.csv', 'Bytes': 2883584},
 {'Name': 'adult_data.csv', 'Bytes': 3145728},
 {'Name': 'adult_data.csv', 'Bytes': 3407872},
 {'Name': 'adult_data.csv', 'Bytes': 3670016},
 {'Name': 'adult_data.csv', 'Bytes': 3932160},
 {'Name': 'adult_data.csv', 'Bytes': 3974475},
 {'Name': 'adult_data.csv',
  'Hash': 'QmZnxARhJWsCbTxiAzoRhnxHgMtoEkNJNS8DGLCBEMvm4V',
  'Size': '3975476'}]
Retrieving a dataset from IPFS using public node
api.change_gateway_type = 'public'Changed to public node
res, obj = api.cat_items('QmZnxARhJWsCbTxiAzoRhnxHgMtoEkNJNS8DGLCBEMvm4V')with open('output/adult_data_dl.csv', 'wb') as f:
    f.write(res.content)df = pd.read_csv('output/adult_data_dl.csv')df.columns = [col.strip() for col in df.columns]Building a tabular model
splits = RandomSplitter()(range_of(df))
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
procs = [Categorify, FillMissing, Normalize]
y_names = 'salary'
y_block = CategoryBlock()to = TabularPandas(df, procs=procs, cat_names=cat_names, cont_names=cont_names,
                   y_names=y_names, y_block=y_block, splits=splits)
dls = to.dataloaders(bs=64)learn = tabular_learner(dls, [200,100], metrics=accuracy)learn.fit_one_cycle(3, 1e-3)| epoch | train_loss | valid_loss | accuracy | time | 
|---|---|---|---|---|
| 0 | 0.376169 | 0.351168 | 0.838913 | 00:02 | 
| 1 | 0.368451 | 0.349306 | 0.836456 | 00:02 | 
| 2 | 0.357292 | 0.343430 | 0.840756 | 00:02 | 
learn.export('output/testmodel.pkl')Adding model+config_files to IPFS
api.change_gateway_type = 'infura'Changed to infura node
res, obj = api.add_items('output/testmodel.pkl'); obj[{'Name': 'testmodel.pkl', 'Bytes': 242379},
 {'Name': 'testmodel.pkl',
  'Hash': 'QmR77qXp7CYEg6kHA3z77mcayTmm9hoXz7YQHFz9WjphiE',
  'Size': '242393'}]
Retrieving model+config_files from IPFS
api.change_gateway_type = 'public'Changed to public node
res, obj = api.cat_items('QmR77qXp7CYEg6kHA3z77mcayTmm9hoXz7YQHFz9WjphiE')with open('output/testmodel_dl.pkl', 'wb') as f:
    f.write(res.content)learn = load_learner('output/testmodel_dl.pkl')dl = learn.dls.test_dl(df.iloc[:10])learn.get_preds(dl=dl)(tensor([[0.9282, 0.0718],
         [0.3556, 0.6444],
         [0.9667, 0.0333],
         [0.8731, 0.1269],
         [0.4372, 0.5628],
         [0.1317, 0.8683],
         [0.9819, 0.0181],
         [0.5074, 0.4926],
         [0.8733, 0.1267],
         [0.1803, 0.8197]]),
 tensor([[0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [1],
         [1]], dtype=torch.int8))