import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv(r'D:\project\旅游景点票价预测\qunar\data.csv',encoding='utf-8')
# print(df.describe())
#数据预处理工作
#处理景区等级
df['lever'] = df['lever'].fillna(0)
# print(df.head(20))
df['lever'] = df['lever'].apply(lambda x:0 if x==0 else int(x[0]) )
# print(df.head(20))
#处理景区热度,保留两位小数
df['hot'] = df['hot'].apply(lambda x:float("%.2f"%float(x.split(" ")[-1])))
# print(df.head(20))
df['province'] = df['area'].apply(lambda x:x.split("·")[0]) #注意点要打在中间
# print(df.head(20))
df['city'] = df['area'].apply(lambda x:x.split("·")[1])
df['mini_city'] = df['area'].apply(lambda x:x.split("·")[-1])
# print(df.head())
del df['area']
# print(df.head())
#统计销量最多的前10个景点
num_top = df.sort_values(by='num',axis=0,ascending=False)
# print(num_top)
num_top = num_top.reset_index(drop=True)
# print(num_top)
plt.rcParams['font.sans-serif'] = ['Microsfot YaHei']
plt.rcParams['axes.unicode_minus'] = False
import seaborn as sns
sns.set(font='SimHei')
sns.set_context('talk') #控制图中标签默认的字体大小
fig = plt.figure()#修改尺寸大小
sns.barplot(num_top['name'][:10],num_top['num'][:10])
plt.xticks(rotation=90)
# fig.show()
#景区评级和省份的关系
df['lever_sum']=1
var = df.groupby(['province','lever']).lever_sum.sum()
# print(var.unstack())
# unstack() 数据不要堆积,把一维表转化成二维表
var.unstack().plot(kind='bar',figsize=(35,10),stacked=False,color=['red','blue','green','black'])
老师,我在pycharm上运行到最后一段,结果报错,跟视频上老师的代码是一样的啊