import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
air_data_path = 'air_data.csv'
# 读取原始数据,指定UTF-8编码
air_data = pd.read_csv(air_data_path,encoding='utf-8')
# percentiles参数是指定计算多少的分位数表(如1/4分位数、中位数等)
explore = air_data.describe(percentiles=[],include='all').T
# describe()函数自动计算非空值数,需要手动计算空值数
explore['null'] = len(air_data)-explore['count']
explore = explore[['null', 'max', 'min']]
# # 表头重命名
explore.columns = [u'空值数', u'最大值', u'最小值']
explore
空值数object
0.084.1%
3.02.3%
6 others13.6%
最大值object
728282.06.8%
29 others75%
Missing18.2%
MEMBER_NO
0.0
62988.0
FFP_DATE
0
nan
FIRST_FLIGHT_DATE
0
nan
GENDER
3
nan
FFP_TIER
0.0
6.0
WORK_CITY
2269
nan
WORK_PROVINCE
3248
nan
WORK_COUNTRY
26
nan
AGE
420.0
110.0
LOAD_TIME
0
nan
from datetime import datetime
# 提取入会年份,apply将一个函数作用于DataFrame中的每个行或者列
ffp = air_data['FFP_DATE'].apply(lambda x:datetime.strptime(x,'%Y/%m/%d'))
# map应用于Series结构中的每个元素
ffp_year = ffp.map(lambda x : x.year)
# 绘制各年份会员入会人数直方图
fig = plt.figure(figsize = (8 ,5)) # 设置画布大小
plt.hist(ffp_year, bins='auto', color='#0504aa')
plt.xlabel('年份')
plt.ylabel('入会人数')
plt.title('各年份会员入会人数')
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 21508 (\N{CJK UNIFIED IDEOGRAPH-5404}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 24180 (\N{CJK UNIFIED IDEOGRAPH-5E74}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 20221 (\N{CJK UNIFIED IDEOGRAPH-4EFD}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 20250 (\N{CJK UNIFIED IDEOGRAPH-4F1A}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 21592 (\N{CJK UNIFIED IDEOGRAPH-5458}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 20837 (\N{CJK UNIFIED IDEOGRAPH-5165}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 20154 (\N{CJK UNIFIED IDEOGRAPH-4EBA}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 25968 (\N{CJK UNIFIED IDEOGRAPH-6570}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
# 提取会员不同性别人数
male = pd.value_counts(air_data['GENDER'])['男']
female = pd.value_counts(air_data['GENDER'])['女']
fig = plt.figure(figsize=(7,4))
# autopct 控制饼图内百分比设置
plt.pie([male,female],labels=['男','女'],
colors=['lightskyblue', 'lightcoral'],
autopct='%1.1f%%')
plt.title('会员性别比例')
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 24615 (\N{CJK UNIFIED IDEOGRAPH-6027}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 21035 (\N{CJK UNIFIED IDEOGRAPH-522B}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 27604 (\N{CJK UNIFIED IDEOGRAPH-6BD4}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 20363 (\N{CJK UNIFIED IDEOGRAPH-4F8B}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 30007 (\N{CJK UNIFIED IDEOGRAPH-7537}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 22899 (\N{CJK UNIFIED IDEOGRAPH-5973}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
lv_four = pd.value_counts(air_data['FFP_TIER'])[4]
lv_five = pd.value_counts(air_data['FFP_TIER'])[5]
lv_six = pd.value_counts(air_data['FFP_TIER'])[6]
fig = plt.figure(figsize = (8 ,5))
plt.bar(range(3), height=[lv_four,lv_five,lv_six],
width=0.4, alpha=0.8, color='skyblue')
plt.xticks([index for index in range(3)], ['4','5','6'])
plt.xlabel('会员等级')
plt.ylabel('会员人数')
plt.title('会员各级别人数')
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 32423 (\N{CJK UNIFIED IDEOGRAPH-7EA7}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 31561 (\N{CJK UNIFIED IDEOGRAPH-7B49}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
age = air_data['AGE'].dropna()
age = age.astype('int64')
# 绘制会员年龄分布箱型图
fig = plt.figure(figsize = (5 ,10))
plt.boxplot(age,
patch_artist=True,
labels = ['会员年龄'], # 设置x轴标题
boxprops = {'facecolor':'lightblue'}) # 设置填充颜色
plt.title('会员年龄分布箱线图')
# 显示y坐标轴的底线
plt.grid(axis='y')
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 20250 (\N{CJK UNIFIED IDEOGRAPH-4F1A}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 21592 (\N{CJK UNIFIED IDEOGRAPH-5458}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 24180 (\N{CJK UNIFIED IDEOGRAPH-5E74}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 40836 (\N{CJK UNIFIED IDEOGRAPH-9F84}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 20998 (\N{CJK UNIFIED IDEOGRAPH-5206}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 24067 (\N{CJK UNIFIED IDEOGRAPH-5E03}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 31665 (\N{CJK UNIFIED IDEOGRAPH-7BB1}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 32447 (\N{CJK UNIFIED IDEOGRAPH-7EBF}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 22270 (\N{CJK UNIFIED IDEOGRAPH-56FE}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
# 乘机信息类别
lte = air_data['LAST_TO_END']
fc = air_data['FLIGHT_COUNT']
sks = air_data['SEG_KM_SUM']
fig = plt.figure(figsize = (5 ,8))
plt.boxplot(lte,
patch_artist=True,
labels = ['时长'], # 设置x轴标题
boxprops = {'facecolor':'lightblue'}) # 设置填充颜色
plt.title('会员最后乘机至结束时长分布箱线图')
# 显示y坐标轴的底线
plt.grid(axis='y')
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 20250 (\N{CJK UNIFIED IDEOGRAPH-4F1A}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 21592 (\N{CJK UNIFIED IDEOGRAPH-5458}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 26368 (\N{CJK UNIFIED IDEOGRAPH-6700}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 21518 (\N{CJK UNIFIED IDEOGRAPH-540E}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 20056 (\N{CJK UNIFIED IDEOGRAPH-4E58}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 26426 (\N{CJK UNIFIED IDEOGRAPH-673A}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 33267 (\N{CJK UNIFIED IDEOGRAPH-81F3}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 32467 (\N{CJK UNIFIED IDEOGRAPH-7ED3}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 26463 (\N{CJK UNIFIED IDEOGRAPH-675F}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 26102 (\N{CJK UNIFIED IDEOGRAPH-65F6}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 38271 (\N{CJK UNIFIED IDEOGRAPH-957F}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 20998 (\N{CJK UNIFIED IDEOGRAPH-5206}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 24067 (\N{CJK UNIFIED IDEOGRAPH-5E03}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 31665 (\N{CJK UNIFIED IDEOGRAPH-7BB1}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 32447 (\N{CJK UNIFIED IDEOGRAPH-7EBF}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 22270 (\N{CJK UNIFIED IDEOGRAPH-56FE}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
fig = plt.figure(figsize = (5 ,8))
plt.boxplot(fc,
patch_artist=True,
labels = ['飞行次数'], # 设置x轴标题
boxprops = {'facecolor':'lightblue'}) # 设置填充颜色
plt.title('会员飞行次数分布箱线图')
# 显示y坐标轴的底线
plt.grid(axis='y')
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 20250 (\N{CJK UNIFIED IDEOGRAPH-4F1A}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 21592 (\N{CJK UNIFIED IDEOGRAPH-5458}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 39134 (\N{CJK UNIFIED IDEOGRAPH-98DE}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 34892 (\N{CJK UNIFIED IDEOGRAPH-884C}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 27425 (\N{CJK UNIFIED IDEOGRAPH-6B21}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 25968 (\N{CJK UNIFIED IDEOGRAPH-6570}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 20998 (\N{CJK UNIFIED IDEOGRAPH-5206}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 24067 (\N{CJK UNIFIED IDEOGRAPH-5E03}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 31665 (\N{CJK UNIFIED IDEOGRAPH-7BB1}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 32447 (\N{CJK UNIFIED IDEOGRAPH-7EBF}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 22270 (\N{CJK UNIFIED IDEOGRAPH-56FE}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
fig = plt.figure(figsize = (5 ,10))
plt.boxplot(sks,
patch_artist=True,
labels = ['总飞行公里数'], # 设置x轴标题
boxprops = {'facecolor':'lightblue'}) # 设置填充颜色
plt.title('客户总飞行公里数箱线图')
# 显示y坐标轴的底线
plt.grid(axis='y')
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 23458 (\N{CJK UNIFIED IDEOGRAPH-5BA2}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 25143 (\N{CJK UNIFIED IDEOGRAPH-6237}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 24635 (\N{CJK UNIFIED IDEOGRAPH-603B}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 39134 (\N{CJK UNIFIED IDEOGRAPH-98DE}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 34892 (\N{CJK UNIFIED IDEOGRAPH-884C}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 20844 (\N{CJK UNIFIED IDEOGRAPH-516C}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 37324 (\N{CJK UNIFIED IDEOGRAPH-91CC}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 25968 (\N{CJK UNIFIED IDEOGRAPH-6570}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 31665 (\N{CJK UNIFIED IDEOGRAPH-7BB1}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 32447 (\N{CJK UNIFIED IDEOGRAPH-7EBF}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 22270 (\N{CJK UNIFIED IDEOGRAPH-56FE}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
# 提取会员积分兑换次数
ec = air_data['EXCHANGE_COUNT']
fig = plt.figure(figsize = (8 ,5)) # 设置画布大小
plt.hist(ec, bins=5, color='#0504aa')
plt.xlabel('兑换次数')
plt.ylabel('会员人数')
plt.title('会员兑换积分次数分布直方图')
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 20250 (\N{CJK UNIFIED IDEOGRAPH-4F1A}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 21592 (\N{CJK UNIFIED IDEOGRAPH-5458}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 20817 (\N{CJK UNIFIED IDEOGRAPH-5151}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 25442 (\N{CJK UNIFIED IDEOGRAPH-6362}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 31215 (\N{CJK UNIFIED IDEOGRAPH-79EF}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 20998 (\N{CJK UNIFIED IDEOGRAPH-5206}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 27425 (\N{CJK UNIFIED IDEOGRAPH-6B21}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 24067 (\N{CJK UNIFIED IDEOGRAPH-5E03}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 30452 (\N{CJK UNIFIED IDEOGRAPH-76F4}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 26041 (\N{CJK UNIFIED IDEOGRAPH-65B9}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 20154 (\N{CJK UNIFIED IDEOGRAPH-4EBA}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
# 提取会员总累计积分
ps = air_data['Points_Sum']
fig = plt.figure(figsize = (5 ,8))
plt.boxplot(ps,
patch_artist=True,
labels = ['总累计积分'], # 设置x轴标题
boxprops = {'facecolor':'lightblue'}) # 设置填充颜色
plt.title('客户总累计积分箱线图')
# 显示y坐标轴的底线
plt.grid(axis='y')
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 23458 (\N{CJK UNIFIED IDEOGRAPH-5BA2}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 25143 (\N{CJK UNIFIED IDEOGRAPH-6237}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 24635 (\N{CJK UNIFIED IDEOGRAPH-603B}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 32047 (\N{CJK UNIFIED IDEOGRAPH-7D2F}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 35745 (\N{CJK UNIFIED IDEOGRAPH-8BA1}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 31215 (\N{CJK UNIFIED IDEOGRAPH-79EF}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 20998 (\N{CJK UNIFIED IDEOGRAPH-5206}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 31665 (\N{CJK UNIFIED IDEOGRAPH-7BB1}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 32447 (\N{CJK UNIFIED IDEOGRAPH-7EBF}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
/shared-libs/python3.9/py-core/lib/python3.9/site-packages/IPython/core/pylabtools.py:151: UserWarning: Glyph 22270 (\N{CJK UNIFIED IDEOGRAPH-56FE}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
# 提取属性并合并为新数据集
data_corr = air_data[['FFP_TIER','FLIGHT_COUNT','LAST_TO_END',
'SEG_KM_SUM','EXCHANGE_COUNT','Points_Sum']]
# 填充age
age1 = air_data['AGE'].fillna(0)
data_corr.loc[:,'AGE'] = age1.astype('int64')
data_corr.loc[:,'ffp_year'] = ffp_year
# 计算相关性矩阵
dt_corr = data_corr.corr(method = 'pearson')
print('相关性矩阵为:\n',dt_corr)
# 绘制热力图
plt.subplots(figsize=(10, 10))
sns.heatmap(dt_corr, annot=True, vmax=1, square=True, cmap='Blues')
/shared-libs/python3.9/py/lib/python3.9/site-packages/pandas/core/indexing.py:1597: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
self.obj[key] = value
/shared-libs/python3.9/py/lib/python3.9/site-packages/pandas/core/indexing.py:1676: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
self._setitem_single_column(ilocs[0], value, pi)
相关性矩阵为:
FFP_TIER FLIGHT_COUNT LAST_TO_END SEG_KM_SUM \
FFP_TIER 1.000000 0.582447 -0.206313 0.522350
FLIGHT_COUNT 0.582447 1.000000 -0.404999 0.850411
LAST_TO_END -0.206313 -0.404999 1.000000 -0.369509
SEG_KM_SUM 0.522350 0.850411 -0.369509 1.000000
EXCHANGE_COUNT 0.342355 0.502501 -0.169717 0.507819
Points_Sum 0.559249 0.747092 -0.292027 0.853014
AGE 0.076245 0.075309 -0.027654 0.087285
ffp_year -0.116510 -0.188181 0.117913 -0.171508
EXCHANGE_COUNT Points_Sum AGE ffp_year
FFP_TIER 0.342355 0.559249 0.076245 -0.116510
FLIGHT_COUNT 0.502501 0.747092 0.075309 -0.188181
LAST_TO_END -0.169717 -0.292027 -0.027654 0.117913
SEG_KM_SUM 0.507819 0.853014 0.087285 -0.171508
EXCHANGE_COUNT 1.000000 0.578581 0.032760 -0.216610
Points_Sum 0.578581 1.000000 0.074887 -0.163431
AGE 0.032760 0.074887 1.000000 -0.242579
ffp_year -0.216610 -0.163431 -0.242579 1.000000
cleanedfile = './data_cleaned.csv' # 数据清洗后保存的文件路径
# 读取数据
airline_data = pd.read_csv(air_data_path,encoding = 'utf-8')
print('原始数据的形状为:',airline_data.shape)
# 去除票价为空的记录
airline_notnull = airline_data.loc[airline_data['SUM_YR_1'].notnull() &
airline_data['SUM_YR_2'].notnull(),:]
print('删除缺失记录后数据的形状为:',airline_notnull.shape)
# 只保留票价非零的,或者平均折扣率不为0且总飞行公里数大于0的记录。
index1 = airline_notnull['SUM_YR_1'] != 0
index2 = airline_notnull['SUM_YR_2'] != 0
index3 = (airline_notnull['SEG_KM_SUM']> 0) & (airline_notnull['avg_discount'] != 0)
index4 = airline_notnull['AGE'] > 100 # 去除年龄大于100的记录
airline = airline_notnull[(index1 | index2) & index3 & ~index4]
print('数据清洗后数据的形状为:',airline.shape)
airline.to_csv(cleanedfile) # 保存清洗后的数据
原始数据的形状为: (62988, 44)
删除缺失记录后数据的形状为: (62299, 44)
数据清洗后数据的形状为: (62043, 44)
# 读取数据清洗后的数据
cleanedfile = './data_cleaned.csv' # 数据清洗后保存的文件路径
airline = pd.read_csv(cleanedfile, encoding = 'utf-8')
# 选取需求属性
airline_selection = airline[['FFP_DATE','LOAD_TIME','LAST_TO_END',
'FLIGHT_COUNT','SEG_KM_SUM','avg_discount']]
print('筛选的属性前5行为:\n',airline_selection.head())
筛选的属性前5行为:
FFP_DATE LOAD_TIME LAST_TO_END FLIGHT_COUNT SEG_KM_SUM avg_discount
0 2006/11/02 2014/03/31 1 210 580717 0.961639
1 2007/02/19 2014/03/31 7 140 293678 1.252314
2 2007/02/01 2014/03/31 11 135 283712 1.254676
3 2008/08/22 2014/03/31 97 23 281336 1.090870
4 2009/04/10 2014/03/31 5 152 309928 0.970658
# 构造属性L
L = pd.to_datetime(airline_selection['LOAD_TIME']) - \
pd.to_datetime(airline_selection['FFP_DATE'])
L = L.astype('str').str.split().str[0]
L = L.astype('int')/30
# 合并属性
airline_features = pd.concat([L,airline_selection.iloc[:,2:]],axis = 1)
airline_features.columns = ['L','R','F','M','C']
print('构建的LRFMC属性前5行为:\n',airline_features.head())
# 数据标准化
from sklearn.preprocessing import StandardScaler
data = StandardScaler().fit_transform(airline_features)
np.savez('./airline_scale.npz',data)
print('标准化后LRFMC五个属性为:\n',data[:5,:])
构建的LRFMC属性前5行为:
L R F M C
0 90.200000 1 210 580717 0.961639
1 86.566667 7 140 293678 1.252314
2 87.166667 11 135 283712 1.254676
3 68.233333 97 23 281336 1.090870
4 60.533333 5 152 309928 0.970658
标准化后LRFMC五个属性为:
[[ 1.43579256 -0.94493902 14.03402401 26.76115699 1.29554188]
[ 1.30723219 -0.91188564 9.07321595 13.12686436 2.86817777]
[ 1.32846234 -0.88985006 8.71887252 12.65348144 2.88095186]
[ 0.65853304 -0.41608504 0.78157962 12.54062193 1.99471546]
[ 0.3860794 -0.92290343 9.92364019 13.89873597 1.34433641]]
from sklearn.cluster import KMeans # 导入kmeans算法
# 读取标准化后的数据
airline_scale = np.load('./airline_scale.npz')['arr_0']
k = 5 # 确定聚类中心数
# 构建模型,随机种子设为123
kmeans_model = KMeans(n_clusters = k,n_jobs=4,random_state=123)
fit_kmeans = kmeans_model.fit(airline_scale) # 模型训练
# 查看聚类结果
kmeans_cc = kmeans_model.cluster_centers_ # 聚类中心
print('各类聚类中心为:\n',kmeans_cc)
kmeans_labels = kmeans_model.labels_ # 样本的类别标签
print('各样本的类别标签为:\n',kmeans_labels)
r1 = pd.Series(kmeans_model.labels_).value_counts() # 统计不同类别样本的数目
print('最终每个类别的数目为:\n',r1)
# 输出聚类分群的结果
cluster_center = pd.DataFrame(kmeans_model.cluster_centers_,\
columns = ['ZL','ZR','ZF','ZM','ZC']) # 将聚类中心放在数据框中
cluster_center.index = pd.DataFrame(kmeans_model.labels_ ).\
drop_duplicates().iloc[:,0] # 将样本类别作为数据框索引
print(cluster_center)
Execution error
TypeError: __init__() got an unexpected keyword argument 'n_jobs'
# 客户分群雷达图
labels = ['ZL','ZR','ZF','ZM','ZC']
legen = [' customers' + str(i + 1) for i in cluster_center.index] # 客户群命名,作为雷达图的图例
lstype = ['-','--',(0, (3, 5, 1, 5, 1, 5)),':','-.']
kinds = list(cluster_center.iloc[:, 0])
# 由于雷达图要保证数据闭合,因此再添加L列,并转换为 np.ndarray
cluster_center = pd.concat([cluster_center, cluster_center[['ZL']]], axis=1)
centers = np.array(cluster_center.iloc[:, 0:])
# 分割圆周长,并让其闭合
n = len(labels)
angle = np.linspace(0, 2 * np.pi, n, endpoint=False)
angle = np.concatenate((angle, [angle[0]]))
# 绘图
fig = plt.figure(figsize = (8,6))
ax = fig.add_subplot(111, polar=True) # 以极坐标的形式绘制图形
# plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
# plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
# 画线
for i in range(len(kinds)):
ax.plot(angle, centers[i], linestyle=lstype[i], linewidth=2, label=kinds[i])
# 添加属性标签
ax.set_thetagrids(angle * 180 / np.pi, labels)
plt.title('Customer Profile Analysis')
plt.legend(legen)