!ls
init.ipynb notebook.ipynb
#Crear la directorio en colab
#Poner el nombr que se desee al directorio
!mkdir dvc5
mkdir: cannot create directory ‘dvc5’: File exists
!pwd
/work
#%rm -Rf dvc5
#Cambiar de directorio
import os
os.chdir("/work/dvc5/dvc5")
!pwd
/work
#Instalar dvc
!pip install dvc
Collecting dvc
Downloading dvc-2.9.2-py3-none-any.whl (389 kB)
|████████████████████████████████| 389 kB 16.1 MB/s
Collecting appdirs>=1.4.3
Downloading appdirs-1.4.4-py2.py3-none-any.whl (9.6 kB)
Collecting fsspec[http]>=2021.10.1
Downloading fsspec-2021.11.1-py3-none-any.whl (132 kB)
|████████████████████████████████| 132 kB 51.7 MB/s
Collecting flufl.lock>=5
Downloading flufl.lock-6.0.tar.gz (30 kB)
Collecting ply>=3.9
Downloading ply-3.11-py2.py3-none-any.whl (49 kB)
|████████████████████████████████| 49 kB 9.0 MB/s
Collecting nanotime>=0.5.2
Downloading nanotime-0.5.2.tar.gz (3.2 kB)
Collecting diskcache>=5.2.1
Downloading diskcache-5.3.0-py3-none-any.whl (44 kB)
|████████████████████████████████| 44 kB 7.2 MB/s
Collecting scmrepo==0.0.4
Downloading scmrepo-0.0.4-py3-none-any.whl (38 kB)
Collecting funcy>=1.14
Downloading funcy-1.16-py2.py3-none-any.whl (32 kB)
Collecting grandalf==0.6
Downloading grandalf-0.6-py3-none-any.whl (31 kB)
Collecting pathspec<0.10.0,>=0.9.0
Downloading pathspec-0.9.0-py2.py3-none-any.whl (31 kB)
Collecting pygtrie>=2.3.2
Downloading pygtrie-2.4.2.tar.gz (35 kB)
Collecting distro>=1.3.0
Downloading distro-1.6.0-py2.py3-none-any.whl (19 kB)
Requirement already satisfied: toml>=0.10.1 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from dvc) (0.10.2)
Requirement already satisfied: requests>=2.22.0 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from dvc) (2.26.0)
Collecting psutil>=5.8.0
Downloading psutil-5.8.0-cp39-cp39-manylinux2010_x86_64.whl (293 kB)
|████████████████████████████████| 293 kB 47.4 MB/s
Collecting python-benedict>=0.24.2
Downloading python_benedict-0.24.3-py3-none-any.whl (41 kB)
|████████████████████████████████| 41 kB 71 kB/s
Collecting configobj>=5.0.6
Downloading configobj-5.0.6.tar.gz (33 kB)
Collecting rich>=10.13.0
Downloading rich-10.15.2-py3-none-any.whl (214 kB)
|████████████████████████████████| 214 kB 48.1 MB/s
Collecting aiohttp-retry>=2.4.5
Downloading aiohttp_retry-2.4.6-py3-none-any.whl (7.7 kB)
Collecting colorama>=0.3.9
Downloading colorama-0.4.4-py2.py3-none-any.whl (16 kB)
Collecting ruamel.yaml>=0.17.11
Downloading ruamel.yaml-0.17.17-py3-none-any.whl (109 kB)
|████████████████████████████████| 109 kB 50.1 MB/s
Requirement already satisfied: tabulate>=0.8.7 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from dvc) (0.8.9)
Collecting flatten-dict<1,>=0.4.1
Downloading flatten_dict-0.4.2-py2.py3-none-any.whl (9.7 kB)
Collecting networkx>=2.5
Downloading networkx-2.6.3-py3-none-any.whl (1.9 MB)
|████████████████████████████████| 1.9 MB 30.2 MB/s
Requirement already satisfied: packaging>=19.0 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from dvc) (21.2)
Collecting zc.lockfile>=1.2.1
Downloading zc.lockfile-2.0-py2.py3-none-any.whl (9.7 kB)
Requirement already satisfied: typing-extensions>=3.7.4 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from dvc) (3.10.0.2)
Collecting dictdiffer>=0.8.1
Downloading dictdiffer-0.9.0-py2.py3-none-any.whl (16 kB)
Collecting voluptuous>=0.11.7
Downloading voluptuous-0.12.2.tar.gz (48 kB)
|████████████████████████████████| 48 kB 11.9 MB/s
Collecting shortuuid>=0.5.0
Downloading shortuuid-1.0.8-py3-none-any.whl (9.5 kB)
Requirement already satisfied: pyparsing>=2.4.7 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from dvc) (2.4.7)
Collecting pydot>=1.2.4
Downloading pydot-1.4.2-py2.py3-none-any.whl (21 kB)
Collecting shtab<2,>=1.3.4
Downloading shtab-1.5.2-py2.py3-none-any.whl (14 kB)
Requirement already satisfied: pyasn1>=0.4.1 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from dvc) (0.4.8)
Collecting dpath<3,>=2.0.2
Downloading dpath-2.0.5-py3-none-any.whl (15 kB)
Requirement already satisfied: tqdm<5,>=4.45.0 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from dvc) (4.62.3)
Requirement already satisfied: future in /shared-libs/python3.9/py/lib/python3.9/site-packages (from grandalf==0.6->dvc) (0.18.2)
Collecting pygit2>=1.5.0
Downloading pygit2-1.7.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.5 MB)
|████████████████████████████████| 4.5 MB 37.7 MB/s
Collecting dulwich>=0.20.23
Downloading dulwich-0.20.26-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (545 kB)
|████████████████████████████████| 545 kB 44.7 MB/s
Collecting gitpython>3
Downloading GitPython-3.1.24-py3-none-any.whl (180 kB)
|████████████████████████████████| 180 kB 53.8 MB/s
Collecting asyncssh<2.9,>=2.7.1
Downloading asyncssh-2.8.1-py3-none-any.whl (287 kB)
|████████████████████████████████| 287 kB 54.9 MB/s
Requirement already satisfied: aiohttp in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from aiohttp-retry>=2.4.5->dvc) (3.8.0)
Requirement already satisfied: cryptography>=2.8 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from asyncssh<2.9,>=2.7.1->scmrepo==0.0.4->dvc) (3.4.8)
Requirement already satisfied: six in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from configobj>=5.0.6->dvc) (1.16.0)
Requirement already satisfied: cffi>=1.12 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from cryptography>=2.8->asyncssh<2.9,>=2.7.1->scmrepo==0.0.4->dvc) (1.15.0)
Requirement already satisfied: pycparser in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from cffi>=1.12->cryptography>=2.8->asyncssh<2.9,>=2.7.1->scmrepo==0.0.4->dvc) (2.21)
Requirement already satisfied: certifi in /shared-libs/python3.9/py/lib/python3.9/site-packages (from dulwich>=0.20.23->scmrepo==0.0.4->dvc) (2021.10.8)
Requirement already satisfied: urllib3>=1.24.1 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from dulwich>=0.20.23->scmrepo==0.0.4->dvc) (1.26.7)
Collecting atpublic
Downloading atpublic-2.3.tar.gz (16 kB)
Collecting gitdb<5,>=4.0.1
Downloading gitdb-4.0.9-py3-none-any.whl (63 kB)
|████████████████████████████████| 63 kB 2.2 MB/s
Collecting smmap<6,>=3.0.1
Downloading smmap-5.0.0-py3-none-any.whl (24 kB)
Requirement already satisfied: python-dateutil in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from python-benedict>=0.24.2->dvc) (2.8.2)
Collecting phonenumbers
Downloading phonenumbers-8.12.39-py2.py3-none-any.whl (2.6 MB)
|████████████████████████████████| 2.6 MB 42.2 MB/s
Requirement already satisfied: pyyaml in /shared-libs/python3.9/py/lib/python3.9/site-packages (from python-benedict>=0.24.2->dvc) (6.0)
Collecting xmltodict
Downloading xmltodict-0.12.0-py2.py3-none-any.whl (9.2 kB)
Collecting ftfy
Downloading ftfy-6.0.3.tar.gz (64 kB)
|████████████████████████████████| 64 kB 6.8 MB/s
Requirement already satisfied: python-slugify in /shared-libs/python3.9/py/lib/python3.9/site-packages (from python-benedict>=0.24.2->dvc) (5.0.2)
Collecting mailchecker
Downloading mailchecker-4.1.6.tar.gz (225 kB)
|████████████████████████████████| 225 kB 44.9 MB/s
Collecting python-fsutil
Downloading python_fsutil-0.5.0-py3-none-any.whl (11 kB)
Requirement already satisfied: idna<4,>=2.5 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from requests>=2.22.0->dvc) (3.3)
Requirement already satisfied: charset-normalizer~=2.0.0 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from requests>=2.22.0->dvc) (2.0.7)
Requirement already satisfied: pygments<3.0.0,>=2.6.0 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from rich>=10.13.0->dvc) (2.10.0)
Collecting commonmark<0.10.0,>=0.9.0
Downloading commonmark-0.9.1-py2.py3-none-any.whl (51 kB)
|████████████████████████████████| 51 kB 12.8 MB/s
Collecting ruamel.yaml.clib>=0.1.2
Downloading ruamel.yaml.clib-0.2.6-cp39-cp39-manylinux1_x86_64.whl (539 kB)
|████████████████████████████████| 539 kB 46.0 MB/s
Requirement already satisfied: setuptools in /root/venv/lib/python3.9/site-packages (from zc.lockfile>=1.2.1->dvc) (57.4.0)
Requirement already satisfied: yarl<2.0,>=1.0 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from aiohttp->aiohttp-retry>=2.4.5->dvc) (1.7.2)
Requirement already satisfied: attrs>=17.3.0 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from aiohttp->aiohttp-retry>=2.4.5->dvc) (21.2.0)
Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from aiohttp->aiohttp-retry>=2.4.5->dvc) (4.0.1)
Requirement already satisfied: frozenlist>=1.1.1 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from aiohttp->aiohttp-retry>=2.4.5->dvc) (1.2.0)
Requirement already satisfied: multidict<7.0,>=4.5 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from aiohttp->aiohttp-retry>=2.4.5->dvc) (5.2.0)
Requirement already satisfied: aiosignal>=1.1.2 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from aiohttp->aiohttp-retry>=2.4.5->dvc) (1.2.0)
Requirement already satisfied: wcwidth in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from ftfy->python-benedict>=0.24.2->dvc) (0.2.5)
Requirement already satisfied: text-unidecode>=1.3 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from python-slugify->python-benedict>=0.24.2->dvc) (1.3)
Building wheels for collected packages: configobj, flufl.lock, nanotime, pygtrie, voluptuous, atpublic, ftfy, mailchecker
Building wheel for configobj (setup.py) ... done
Created wheel for configobj: filename=configobj-5.0.6-py3-none-any.whl size=34546 sha256=2582824a000d4bc03cc60cfdb8ca8879f6ca17801cfe3e2badd533e0193d4e4b
Stored in directory: /root/.cache/pip/wheels/4b/35/53/dfa4d3a4196794cb0a777a97c68dcf02b073d33de9c135d72a
Building wheel for flufl.lock (setup.py) ... done
Created wheel for flufl.lock: filename=flufl.lock-6.0-py3-none-any.whl size=11995 sha256=5f4a2b491d3477d31d890b3e084668f69608f42161037f0abb912f76a4a352fb
Stored in directory: /root/.cache/pip/wheels/ef/eb/67/7701d39d37cdda0e89e1b6d702970fb309dce498d57c474f49
Building wheel for nanotime (setup.py) ... done
Created wheel for nanotime: filename=nanotime-0.5.2-py3-none-any.whl size=2440 sha256=c8658e09b960860c06448207727c05049d012635ea420958d4089e4119787130
Stored in directory: /root/.cache/pip/wheels/ee/1f/7c/610bdb7d5541b98d9743c5953e32681ef35dd54fadddd347e8
Building wheel for pygtrie (setup.py) ... done
Created wheel for pygtrie: filename=pygtrie-2.4.2-py3-none-any.whl size=19062 sha256=2f39b11cf751cf8ff8d3e701e5a5cadd690ff2da252c974e3b3a1b6ba9656b05
Stored in directory: /root/.cache/pip/wheels/2e/5d/81/d6e52fec193180ffffef19b185ecf1d0a85d418808c20c09c9
Building wheel for voluptuous (setup.py) ... done
Created wheel for voluptuous: filename=voluptuous-0.12.2-py3-none-any.whl size=29562 sha256=b7185d80eb0ef4c7fd184fd81dec1441c2018111d08c27eeac5b3ca9cfcf5aa8
Stored in directory: /root/.cache/pip/wheels/50/80/ab/ab37c8824a928c42a261bce71227504b6ef12276ef28f0667b
Building wheel for atpublic (setup.py) ... done
Created wheel for atpublic: filename=atpublic-2.3-py3-none-any.whl size=5033 sha256=d32f72646472210432c143a667f2d56a93dc40efcb9ab71e7c7564129182c7f2
Stored in directory: /root/.cache/pip/wheels/63/25/f3/f34ce655fd46daeae4727b6d2a2c387ac3a8c1d360b596c020
Building wheel for ftfy (setup.py) ... done
Created wheel for ftfy: filename=ftfy-6.0.3-py3-none-any.whl size=41933 sha256=1d67e5de4fb27235444f3d160a4b65ea8a117d2dde27abe3a116d9a75e9f224b
Stored in directory: /root/.cache/pip/wheels/3d/ee/4b/03a4e2e591ea56687aff999edc83827a2ace523baab75b8e41
Building wheel for mailchecker (setup.py) ... done
Created wheel for mailchecker: filename=mailchecker-4.1.6-py3-none-any.whl size=225333 sha256=13dcc3d2d4a205ac13e686366879d8cd9b80b7d72b4526706d62fae267de1f9b
Stored in directory: /root/.cache/pip/wheels/ef/b6/7e/e7b77971454db6f003b94175f0fc46c64fc1e2c471719a450b
Successfully built configobj flufl.lock nanotime pygtrie voluptuous atpublic ftfy mailchecker
Installing collected packages: smmap, gitdb, xmltodict, ruamel.yaml.clib, python-fsutil, pygtrie, pygit2, psutil, phonenumbers, pathspec, mailchecker, gitpython, funcy, ftfy, fsspec, dulwich, commonmark, colorama, atpublic, asyncssh, zc.lockfile, voluptuous, shtab, shortuuid, scmrepo, ruamel.yaml, rich, python-benedict, pydot, ply, networkx, nanotime, grandalf, flufl.lock, flatten-dict, dpath, distro, diskcache, dictdiffer, configobj, appdirs, aiohttp-retry, dvc
Successfully installed aiohttp-retry-2.4.6 appdirs-1.4.4 asyncssh-2.8.1 atpublic-2.3 colorama-0.4.4 commonmark-0.9.1 configobj-5.0.6 dictdiffer-0.9.0 diskcache-5.3.0 distro-1.6.0 dpath-2.0.5 dulwich-0.20.26 dvc-2.9.2 flatten-dict-0.4.2 flufl.lock-6.0 fsspec-2021.11.1 ftfy-6.0.3 funcy-1.16 gitdb-4.0.9 gitpython-3.1.24 grandalf-0.6 mailchecker-4.1.6 nanotime-0.5.2 networkx-2.6.3 pathspec-0.9.0 phonenumbers-8.12.39 ply-3.11 psutil-5.8.0 pydot-1.4.2 pygit2-1.7.2 pygtrie-2.4.2 python-benedict-0.24.3 python-fsutil-0.5.0 rich-10.15.2 ruamel.yaml-0.17.17 ruamel.yaml.clib-0.2.6 scmrepo-0.0.4 shortuuid-1.0.8 shtab-1.5.2 smmap-5.0.0 voluptuous-0.12.2 xmltodict-0.12.0 zc.lockfile-2.0
WARNING: You are using pip version 21.2.3; however, version 21.3.1 is available.
You should consider upgrading via the '/root/venv/bin/python -m pip install --upgrade pip' command.
#Crear un repo en Github y clonar dentro del directorio
!git clone https://dagshub.com/turacam2014/dvc5.git
Cloning into 'dvc5'...
remote: Enumerating objects: 3, done.
remote: Counting objects: 100% (3/3), done.
remote: Total 3 (delta 0), reused 0 (delta 0), pack-reused 0
Unpacking objects: 100% (3/3), done.
#Verificar si git está activo
!git status
On branch master
Your branch is up to date with 'origin/master'.
nothing to commit, working tree clean
#Inicializar dvc
!dvc init -f
Initialized DVC repository.
You can now commit the changes to git.
+---------------------------------------------------------------------+
| |
| DVC has enabled anonymous aggregate usage analytics. |
| Read the analytics documentation (and how to opt-out) here: |
| <https://dvc.org/doc/user-guide/analytics> |
| |
+---------------------------------------------------------------------+
What's next?
------------
- Check out the documentation: <https://dvc.org/doc>
- Get help and share ideas: <https://dvc.org/chat>
- Star us on GitHub: <https://github.com/iterative/dvc>
#Commit de git
!git commit -m "Initialize DVC"
[master a2ecceb] Initialize DVC
Committer: root <root@p-17346dea-180f-4bb1-9e4f-d68c790cbdd1.projects-domain.projects.svc.cluster.local>
Your name and email address were configured automatically based
on your username and hostname. Please check that they are accurate.
You can suppress this message by setting them explicitly. Run the
following command and follow the instructions in your editor to edit
your configuration file:
git config --global --edit
After doing this, you may fix the identity used for this commit with:
git commit --amend --reset-author
9 files changed, 515 insertions(+)
create mode 100644 .dvc/.gitignore
create mode 100644 .dvc/config
create mode 100644 .dvc/plots/confusion.json
create mode 100644 .dvc/plots/confusion_normalized.json
create mode 100644 .dvc/plots/linear.json
create mode 100644 .dvc/plots/scatter.json
create mode 100644 .dvc/plots/simple.json
create mode 100644 .dvc/plots/smooth.json
create mode 100644 .dvcignore
#Descargar los datos
#Obiamente pueden provenir de cualquier fuente
!dvc get https://github.com/iterative/dataset-registry \
get-started/data.xml -o data/data.xml
#Agregar los datos al seguimiento de dvc
!dvc add data/data.xml
⠋ Checking graph
Adding...
!
0%| | 0.00/? [00:00<?, ?B/s]
!
0%| |Querying cache in /work/dvc5/dvc0.00/1.00 [00:00<?, ?file/s]
!
0%| |Transferring 0/1 [00:00<?, ?file/s]
!
0%| |.CDQjNssvM5bmPqqL4yqLNB.tmp 0.00/? [00:00<?, ?B/s]
0%| |.CDQjNssvM5bmPqqL4yqLNB.tmp 0.00/4.00 [00:00<?, ?B/s]
0%| |.CDQjNssvM5bmPqqL4yqLNB.tmp 0.00/4.00 [00:00<?, ?B/s]
!
0%| |04afb96060aad90176268345e10355 0.00/? [00:00<?, ?B/s]
0%| |04afb96060aad90176268345e10355 0.00/36.1M [00:00<?, ?B/s]
0%| |04afb96060aad90176268345e10355 0.00/36.1M [00:00<?, ?B/s]
100%|██████████|04afb96060aad90176268345e136.1M/36.1M [00:00<00:00, 147MB/s]
100% Adding...|████████████████████████████████████████|1/1 [00:00, 1.39file/s]
To track the changes with git, run:
git add data/.gitignore data/data.xml.dvc
To enable auto staging, run:
dvc config core.autostage true
#Config de git para poder hacer
!git config --global user.name "turacam2014"
!git config --global user.email "turacam2014@gmail.com"
#Agregar los datos al seguimiento de git
!git add data/data.xml.dvc data/.gitignore
#Commit
!git commit -m "Add raw data"
[master 58b2f45] Add raw data
2 files changed, 5 insertions(+)
create mode 100644 data/.gitignore
create mode 100644 data/data.xml.dvc
#Verificar en que rama estamos
!git branch
* master
!git status
On branch master
Your branch is ahead of 'origin/master' by 2 commits.
(use "git push" to publish your local commits)
nothing to commit, working tree clean
!git commit -m"your commit"
On branch master
Your branch is ahead of 'origin/master' by 2 commits.
(use "git push" to publish your local commits)
nothing to commit, working tree clean
#Verificar que remoto
!git remote -v
origin https://dagshub.com/turacam2014/dvc5.git (fetch)
origin https://dagshub.com/turacam2014/dvc5.git (push)
# ELIMINA EL REPO LOCAL si la sentencia anterior informa que ya existe
!git remote rm origin
#Agregar el origin de Github con el token
#En Agosto Github hizo cambio de politicas de seguridad
!git remote add origin https://abf7fe7a1819b4dec866ee33ed91dfc0fc34a458@dagshub.com/turacam2014/dvc5.git
#Push al remoto
!git push -u origin master
fatal: not a git repository (or any parent up to mount point /)
Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).
!git commit -m "first commit"
On branch master
Your branch is up to date with 'origin/master'.
nothing to commit, working tree clean
#Obtener código
!wget https://code.dvc.org/get-started/code.zip
--2021-12-11 22:23:05-- https://code.dvc.org/get-started/code.zip
Resolving code.dvc.org (code.dvc.org)... 104.21.81.205, 172.67.164.76, 2606:4700:3036::6815:51cd, ...
Connecting to code.dvc.org (code.dvc.org)|104.21.81.205|:443... connected.
HTTP request sent, awaiting response... 303 See Other
Location: https://s3-us-east-2.amazonaws.com/dvc-public/code/get-started/code.zip [following]
--2021-12-11 22:23:05-- https://s3-us-east-2.amazonaws.com/dvc-public/code/get-started/code.zip
Resolving s3-us-east-2.amazonaws.com (s3-us-east-2.amazonaws.com)... 52.219.105.1
Connecting to s3-us-east-2.amazonaws.com (s3-us-east-2.amazonaws.com)|52.219.105.1|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4634 (4.5K) [application/zip]
Saving to: ‘code.zip’
code.zip 100%[===================>] 4.53K --.-KB/s in 0s
2021-12-11 22:23:05 (92.2 MB/s) - ‘code.zip’ saved [4634/4634]
#Dezipear código
!unzip code.zip
Archive: code.zip
inflating: params.yaml
inflating: src/evaluate.py
inflating: src/featurization.py
inflating: src/prepare.py
inflating: src/requirements.txt
inflating: src/train.py
creating: .github/workflows/
inflating: .github/workflows/cml.yaml
#Instalar requerimientos
!pip install -r src/requirements.txt
Requirement already satisfied: pandas in /shared-libs/python3.9/py/lib/python3.9/site-packages (from -r src/requirements.txt (line 1)) (1.2.5)
Collecting pyaml
Downloading pyaml-21.10.1-py2.py3-none-any.whl (24 kB)
Requirement already satisfied: scikit-learn in /shared-libs/python3.9/py/lib/python3.9/site-packages (from -r src/requirements.txt (line 3)) (1.0.1)
Requirement already satisfied: scipy in /shared-libs/python3.9/py/lib/python3.9/site-packages (from -r src/requirements.txt (line 4)) (1.7.2)
Requirement already satisfied: python-dateutil>=2.7.3 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from pandas->-r src/requirements.txt (line 1)) (2.8.2)
Requirement already satisfied: pytz>=2017.3 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from pandas->-r src/requirements.txt (line 1)) (2021.3)
Requirement already satisfied: numpy>=1.16.5 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from pandas->-r src/requirements.txt (line 1)) (1.21.4)
Requirement already satisfied: PyYAML in /shared-libs/python3.9/py/lib/python3.9/site-packages (from pyaml->-r src/requirements.txt (line 2)) (6.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from scikit-learn->-r src/requirements.txt (line 3)) (3.0.0)
Requirement already satisfied: joblib>=0.11 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from scikit-learn->-r src/requirements.txt (line 3)) (1.1.0)
Requirement already satisfied: six>=1.5 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from python-dateutil>=2.7.3->pandas->-r src/requirements.txt (line 1)) (1.16.0)
Installing collected packages: pyaml
Successfully installed pyaml-21.10.1
WARNING: You are using pip version 21.2.3; however, version 21.3.1 is available.
You should consider upgrading via the '/root/venv/bin/python -m pip install --upgrade pip' command.
!dvc remote add origin https://dagshub.com/turacam2014/dvc5.dvc
!dvc remote modify origin --local auth basic
!dvc remote modify origin --local user turacam2014
!dvc remote modify origin --local password e6481150a542a76906a039707d3306809aaaa8fa
!dvc push -r origin
/bin/bash: dvc: command not found
/bin/bash: dvc: command not found
/bin/bash: dvc: command not found
/bin/bash: dvc: command not found
/bin/bash: dvc: command not found
#Correr etapas de dvc
!dvc run -n prepare \
-p prepare.seed,prepare.split \
-d src/prepare.py -d data/data.xml \
-o data/prepared \
python src/prepare.py data/data.xml
Running stage 'prepare':
> python src/prepare.py data/data.xml
Computing file/dir hashes (only done once) |0.00 [00:00, ?md5/s]
!
0%| | 0.00/? [00:00<?, ?B/s]
!
0%| | 0.00/? [00:00<?, ?B/s]
0% Transferring| |0/3 [00:00<?, ?file/s]
!
0%| |b786b6e6f80e2b3fcf17827ad18597.dir 0.00/? [00:00<?, ?B/s]
0%| |b786b6e6f80e2b3fcf17827ad18597.di0.00/137 [00:00<?, ?B/s]
Creating 'dvc.yaml'
Adding stage 'prepare' in 'dvc.yaml'
Generating lock file 'dvc.lock'
Updating lock file 'dvc.lock'
To track the changes with git, run:
git add dvc.lock dvc.yaml data/.gitignore
To enable auto staging, run:
dvc config core.autostage true
#Correr etapas de dvc
!dvc run -n featurize \
-p featurize.max_features,featurize.ngrams \
-d src/featurization.py -d data/prepared \
-o data/features \
python src/featurization.py data/prepared data/features
Running stage 'featurize':
> python src/featurization.py data/prepared data/features
The input data frame data/prepared/train.tsv size is (20017, 3)
The output matrix data/features/train.pkl size is (20017, 502) and data type is float64
The input data frame data/prepared/test.tsv size is (4983, 3)
The output matrix data/features/test.pkl size is (4983, 502) and data type is float64
Computing file/dir hashes (only done once) |0.00 [00:00, ?md5/s]
!
0%| | 0.00/? [00:00<?, ?B/s]
!
0%| | 0.00/? [00:00<?, ?B/s]
33% Transferring|██████████▎ |1/3 [00:29<00:59, 29.90s/file]
!
0%| |5e0fdd787b569b4fcb4bafa543d270.dir 0.00/? [00:00<?, ?B/s]
0%| |5e0fdd787b569b4fcb4bafa543d270.di0.00/137 [00:00<?, ?B/s]
Adding stage 'featurize' in 'dvc.yaml'
Updating lock file 'dvc.lock'
To track the changes with git, run:
git add data/.gitignore dvc.yaml dvc.lock
To enable auto staging, run:
dvc config core.autostage true
#Correr etapas de dvc
!dvc run -n train \
-p train.seed,train.n_est,train.min_split \
-d src/train.py -d data/features \
-o model.pkl \
python src/train.py data/features model.pkl
Running stage 'train':
> python src/train.py data/features model.pkl
Input matrix size (20017, 502)
X matrix size (20017, 500)
Y matrix size (20017,)
Adding stage 'train' in 'dvc.yaml'
Updating lock file 'dvc.lock'
To track the changes with git, run:
git add dvc.lock .gitignore dvc.yaml
To enable auto staging, run:
dvc config core.autostage true
#Correr etapas de dvc
!dvc run -n evaluate \
-d src/evaluate.py -d model.pkl -d data/features \
-M scores.json \
--plots-no-cache prc.json \
--plots-no-cache roc.json \
python src/evaluate.py model.pkl \
data/features scores.json prc.json roc.json
Running stage 'evaluate':
> python src/evaluate.py model.pkl data/features scores.json prc.json roc.json
Adding stage 'evaluate' in 'dvc.yaml'
Updating lock file 'dvc.lock'
To track the changes with git, run:
git add dvc.lock dvc.yaml
To enable auto staging, run:
dvc config core.autostage true
#Correr etapas de dvc
!dvc metrics show
Path avg_prec roc_auc
scores.json 0.52048 0.9032
#Si modificamos parámetros, código fuente, etc
#corremos un repro (reproducir prueba)
!dvc repro
'data/data.xml.dvc' didn't change, skipping
Running stage 'prepare':
> python src/prepare.py data/data.xml
Computing file/dir hashes (only done once) |0.00 [00:00, ?md5/s]
!
0%| | 0.00/? [00:00<?, ?B/s]
!
0%| | 0.00/? [00:00<?, ?B/s]
33% Transferring|██████████▎ |1/3 [00:30<01:01, 30.84s/file]
!
0%| |d584579b8a95a035a5643acd6f3476.dir 0.00/? [00:00<?, ?B/s]
0%| |d584579b8a95a035a5643acd6f3476.di0.00/137 [00:00<?, ?B/s]
Updating lock file 'dvc.lock'
Running stage 'featurize':
> python src/featurization.py data/prepared data/features
The input data frame data/prepared/train.tsv size is (17476, 3)
The output matrix data/features/train.pkl size is (17476, 602) and data type is float64
The input data frame data/prepared/test.tsv size is (7524, 3)
The output matrix data/features/test.pkl size is (7524, 602) and data type is float64
Computing file/dir hashes (only done once) |0.00 [00:00, ?md5/s]
!
0%| | 0.00/? [00:00<?, ?B/s]
!
0%| | 0.00/? [00:00<?, ?B/s]
33% Transferring|██████████▎ |1/3 [00:34<01:09, 34.84s/file]
!
0%| |9862cd6282bbedb44fa80886bfa235.dir 0.00/? [00:00<?, ?B/s]
0%| |9862cd6282bbedb44fa80886bfa235.di0.00/137 [00:00<?, ?B/s]
Updating lock file 'dvc.lock'
Running stage 'train':
> python src/train.py data/features model.pkl
Input matrix size (17476, 602)
X matrix size (17476, 600)
Y matrix size (17476,)
Updating lock file 'dvc.lock'
Running stage 'evaluate':
> python src/evaluate.py model.pkl data/features scores.json prc.json roc.json
Updating lock file 'dvc.lock'
To track the changes with git, run:
git add dvc.lock
To enable auto staging, run:
dvc config core.autostage true
Use `dvc push` to send your updates to remote storage.
!dvc metrics diff
Path Metric HEAD workspace Change
scores.json avg_prec - 0.582 -
scores.json roc_auc - 0.93109 -
!pip install Cython
Collecting Cython
Downloading Cython-0.29.25-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (1.9 MB)
|████████████████████████████████| 1.9 MB 19.5 MB/s
Installing collected packages: Cython
Successfully installed Cython-0.29.25
WARNING: You are using pip version 21.2.3; however, version 21.3.1 is available.
You should consider upgrading via the '/root/venv/bin/python -m pip install --upgrade pip' command.
#Cargamos repo remoto para dvc
from google.colab import drive
drive.mount('/work/drive')
Execution error
ModuleNotFoundError: No module named 'google.colab'
!ls /datasets/dvc5
Untitled0.ipynb
# DEFINO MI REMOTO
# Con este comando podemos hacer el PUSH a un repositorio externo, en este caso el drive.
# Link de doc: https://dvc.org/doc/command-reference/remote/add
# Link para obtener un Token de Drive: https://developers.google.com/identity/protocols/oauth2
!dvc remote add -d storage /content/drive/MyDrive/dvc5/dvcstore
!git add .dvc/config
!git commit -m "Configure remote storage"
/bin/bash: dvc: command not found
fatal: not a git repository (or any parent up to mount point /)
Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).
fatal: not a git repository (or any parent up to mount point /)
Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).
!dvc push
/bin/bash: dvc: command not found
!dvc remote add origin https://dagshub.com/turacam2014/dvc5.dvc
!dvc remote modify origin --local auth basic
!dvc remote modify origin --local user turacam2014
!dvc remote modify origin --local password your_token
!dvc push -r origin
/bin/bash: dvc: command not found
/bin/bash: dvc: command not found
/bin/bash: dvc: command not found
/bin/bash: dvc: command not found
/bin/bash: dvc: command not found