print("hello world")
hello = "hello world!"
hello = hello + " how are you?"
hello
hello.title()
import pandas as pd
cities = pd.DataFrame({
  "name": ["Athens", "Bratislava", "Copenhagen", "Dublin"],
  "area": [39, 367.6, 86.2, 115],
  "elevation": [170, 152, 14, 20],
  "population": [664046, 429564, 602481, 553165]
  }
)
cities
cities["area"].max()
cities.loc[ cities['area'].idxmax() ]
cities.mean(numeric_only=True)
import altair as alt
alt.Chart(cities).mark_bar().encode(x='name', y='area')
alt.Chart(cities).mark_bar().encode(
  x='name',
  y='area',
  tooltip=['name', 'area', 'elevation', 'population'],
).properties(
  width=200,
  height=200
)
# we load the dataset from Geonames using a URL, a web address
countries = pd.read_csv("http://www.geonames.org/countryInfoCSV", sep='\t', keep_default_na=False)
# we pass two further parameters: first we specify that the file is tab-separated,
# and then we ask it not to translate NA into 'not a number' as it refers to North America
# let's take a look at the first rows in the dataset:
countries.head()
Beachte, dass die Werte in der Spalte Kontinent abgekürzt sind. Um sie aussagekräftiger zu machen, ersetzen wir sie durch ihre vollständigen Namen:
# string replace in continent column with a dictionary of find-replace pairs:
countries = countries.replace( { "continent": {
  "AF": "Africa",
  "AN": "Antarctica",
  "AS": "Asia",
  "EU": "Europe",
  "OC": "Oceania",
  "NA": "North America",
  "SA": "South America"
}})
countries = countries[ countries["population"]>0]
countries = countries[ countries["areaInSqKm"]>0]
alt.Chart(countries).mark_circle().encode(
    alt.X('areaInSqKm').scale(type='log'),
    alt.Y('population').scale(type='log'),
    color='continent',
    tooltip = ["name", "capital", "areaInSqKm", "population", "continent"]
).interactive().properties(
    width=600,
    height=400
)