import pandas as pd
import numpy as np
import great_expectations as ge
from great_expectations.data_context.types.base import DataContextConfig, DatasourceConfig, FilesystemStoreBackendDefaults
from great_expectations.data_context import BaseDataContext
from great_expectations.checkpoint import SimpleCheckpoint
from great_expectations.core.batch import RuntimeBatchRequest
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
#!great_expectations --yes --v3-api init
!great_expectations --yes --v3-api init
products=np.random.choice(['camera', 'phone', 'computer', 'speaker', 'TV',
'cable', 'movie', 'guitar', 'printer'], size=5)
quantities=np.random.choice(list(range(10))+[None], size=5)
dates=np.random.choice(pd.date_range(start="2020-12-30",end="2021-01-8", ), size=5)
df=pd.DataFrame({'products': products, 'quantities': quantities, 'dates': dates})
df
df=ge.from_pandas(df)
df.expect_column_values_to_be_unique('products'); # ~30% chance of passing
df.expect_column_values_to_not_be_null('quantities'); # ~60% chance of passing
df.expect_column_values_to_be_between('dates', '2021-01-01', '2021-01-8', parse_strings_as_datetimes=True); #~60% chance of passing
data_context_config = DataContextConfig(
datasources={
"my_datasource": DatasourceConfig(
class_name="Datasource",
module_name="great_expectations.datasource",
execution_engine={
"class_name": "PandasExecutionEngine",
"module_name": "great_expectations.execution_engine"
},
data_connectors={
"default_runtime_data_connector_name": {
"class_name": "RuntimeDataConnector",
"batch_identifiers": ["default_identifier_name"],
}
}
)
},
store_backend_defaults=FilesystemStoreBackendDefaults(root_directory="/work/great_expectations"),
)
context = BaseDataContext(project_config=data_context_config)
context.save_expectation_suite(expectation_suite_name='my_expectation_suite', expectation_suite=df.get_expectation_suite(discard_failed_expectations=False));
batch_request = RuntimeBatchRequest(
datasource_name="my_datasource",
data_connector_name="default_runtime_data_connector_name",
data_asset_name="df",
runtime_parameters={"batch_data": df},
batch_identifiers={"default_identifier_name": "df"},
)
checkpoint_config = {
"name": "my_checkpoint",
"config_version": 1,
"class_name": "SimpleCheckpoint",
"expectation_suite_name": "my_expectation_suite"
}
context.add_checkpoint(**checkpoint_config);
results = context.run_checkpoint(
checkpoint_name="my_checkpoint",
validations = [
{"batch_request": batch_request}
],
run_id="my_run_id",
)
context.build_data_docs();
# Uncomment this line to serve up the documentation at https://5119d502-592c-43bd-b99d-244e09f7080a.deepnoteproject.com
#!python -m http.server 8080 --directory great_expectations/uncommitted/data_docs/local_site