print("hello world")
hello = "hello world!"
hello = hello + " how are you?"
hello
hello.title()
import pandas as pd
cities = pd.DataFrame({
"name": ["Athens", "Bratislava", "Copenhagen", "Dublin"],
"area": [39, 367.6, 86.2, 115],
"elevation": [170, 152, 14, 20],
"population": [664046, 429564, 602481, 553165]
}
)
cities
cities["area"].max()
cities.loc[ cities['area'].idxmax() ]
cities.mean(numeric_only=True)
import altair as alt
alt.Chart(cities).mark_bar().encode(x='name', y='area')
alt.Chart(cities).mark_bar().encode(
x='name',
y='area',
tooltip=['name', 'area', 'elevation', 'population'],
).properties(
width=200,
height=200
)
# we load the dataset from Geonames using a URL, a web address
countries = pd.read_csv("http://www.geonames.org/countryInfoCSV", sep='\t', keep_default_na=False)
# we pass two further parameters: first we specify that the file is tab-separated,
# and then we ask it not to translate NA into 'not a number' as it refers to North America
# let's take a look at the first rows in the dataset:
countries.head()
Beachte, dass die Werte in der Spalte Kontinent abgekürzt sind. Um sie aussagekräftiger zu machen, ersetzen wir sie durch ihre vollständigen Namen:
# string replace in continent column with a dictionary of find-replace pairs:
countries = countries.replace( { "continent": {
"AF": "Africa",
"AN": "Antarctica",
"AS": "Asia",
"EU": "Europe",
"OC": "Oceania",
"NA": "North America",
"SA": "South America"
}})
countries = countries[ countries["population"]>0]
countries = countries[ countries["areaInSqKm"]>0]
alt.Chart(countries).mark_circle().encode(
alt.X('areaInSqKm').scale(type='log'),
alt.Y('population').scale(type='log'),
color='continent',
tooltip = ["name", "capital", "areaInSqKm", "population", "continent"]
).interactive().properties(
width=600,
height=400
)