import duckdb
conn = duckdb.connect("SampleDB",read_only=False)
# connect to an in-memory temporary database
conn = duckdb.connect()
# run arbitrary SQL commands
conn.execute("CREATE TABLE test_table (i INTEGER, j STRING)")
conn.execute("INSERT INTO test_table VALUES (1, 'one'),(9,'nine')")
conn.execute("SELECT i from test_table ").fetchdf()
# we can use placeholders for parameters
conn.execute("INSERT INTO test_table VALUES (?, ?)", [2, 'two'])
conn.executemany("INSERT INTO test_table VALUES (?, ?)", [[3, 'three'], [4, 'four']])
conn.execute("SELECT * from test_table ").fetchdf()
conn.execute("SELECT * FROM test_table").fetchnumpy()
import pandas as pd
test_df = pd.DataFrame.from_dict({"i":[1, 2, 3, 4], "j":["one", "two", "three", "four"]})
# make this data frame available as a view in duckdb
conn.register("test_df", test_df)
conn.execute("SELECT j FROM test_df WHERE i > 1").fetchdf()
df = pd.read_csv("bank_data.csv")
conn.register("bank_df", df)
conn.execute("SELECT actual_recovery_amount FROM bank_df WHERE age > 27").fetchdf()
rel = conn.from_df(df)
rel.filter('age > 19').project('age + 1').order('sex').limit(2)
rel = conn.from_df(test_df)
rel
rel = duckdb.df(test_df)
rel
rel = conn.table("test_table")
rel
rel = duckdb.from_csv_auto("bank_data.csv")
rel
rel.alias
rel2 = rel.set_alias('bank_data')
rel2.alias
rel.type
rel.columns
rel.types
rel.filter('age > 18')
# project the relation, get some columns
rel.project('id, age')
rel.project('age + 1')
rel.order('sex')
rel.limit(2)
rel.filter('age > 19').project('age + 1').order('sex').limit(2)
rel.aggregate("sum(actual_recovery_amount)")
rel.aggregate("age, sum(actual_recovery_amount)")
rel.aggregate("sum(actual_recovery_amount)", "age")
rel.distinct()
rel.union(rel)
rel2 = duckdb.df(df)
rel.join(rel2, 'id')
print(rel.set_alias('a').join(rel.set_alias('b'), 'a.id=b.id'))
print(duckdb.filter(df, 'age > 1'))
print(duckdb.project(df, 'age +1'))
print(duckdb.order(df, 'sex'))
print(duckdb.limit(df, 2))
duckdb.filter(df, 'age > 1').project('age + 1').order('sex').limit(2)
# compute the query result from the relation
res = rel.execute()
print(res)
# res is a query result, you can call fetchdf() or fetchnumpy() or fetchone() on it
print(res.fetchone())
print(res.fetchall())
# convert a relation back to a pandas data frame
rel.to_df()
rel.create("test_table2")
# Inserting elements into table_3
conn.execute("CREATE TABLE test_table3 (i INTEGER, j STRING)")
print(conn.values([5, 'five']).insert_into("test_table3"))
rel_3 = conn.table("test_table3")
rel_3.insert([6,'six'])
rel_3
# create a SQL-accessible view of the relation
rel.create_view('test_view')
res = rel.query('my_name_for_rel', 'SELECT * FROM my_name_for_rel LIMIT 5')
res.fetchall()
res.fetchdf()
res = duckdb.query('SELECT * FROM test_df')
res.df()