In this tutorial, we will see how we can use IPFS as the storage for datasets and model in ML workflow.
from fastai.tabular.allimport*from ipfspy.ipfsspec.asyn import AsyncIPFSFileSystemfrom fsspec import register_implementationimport asyncioimport ioimport fsspecimport os
/usr/local/lib/python3.8/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
from .autonotebook import tqdm as notebook_tqdm
Retrieving the dataset from IPFS using public node
if fs.file.exists('output/adult_data.csv'): fs.file.rm('output/adult_data.csv', recursive=True)print('Before: ', [p.lstrip(os.getcwd()) for p in fs.file.glob('test/data/dataset/output/*')])fs.ipfs.get(rpath='QmZnxARhJWsCbTxiAzoRhnxHgMtoEkNJNS8DGLCBEMvm4V', lpath='output/adult_data.csv', # a filename must be given recursive=True, return_cid=False)print('After: ', [p.lstrip(os.getcwd()) for p in fs.file.glob('output/*')])
if fs.file.exists('output/testmodel.pkl'): fs.file.rm('output/testmodel.pkl', recursive=True)print('Before: ', [p.lstrip(os.getcwd()) for p in fs.file.glob('output/*')])fs.ipfs.get(rpath='QmSo4beNV5LAr166yZRvy7TNRmCtX4HXyiXqECVvDD6bnt', lpath='output/testmodel.pkl', # a filename must be given recursive=True, return_cid=False)print('After: ', [p.lstrip(os.getcwd()) for p in fs.file.glob('output/*')])