# 绘制每个特征的分布
def plot_distribution(dataset, cols=5, width=20, height=15, hspace=0.2, wspace=0.5):
plt.style.use('seaborn-whitegrid')
fig = plt.figure(figsize=(width,height))
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=wspace, hspace=hspace)
rows = math.ceil(float(dataset.shape[1]) / cols)
for i, column in enumerate(dataset.columns):
ax = fig.add_subplot(rows, cols, i + 1)
ax.set_title(column)
if dataset.dtypes[column] == np.object:
g = sns.countplot(y=column, data=dataset)
substrings = [s.get_text()[:18] for s in g.get_yticklabels()]
g.set(yticklabels=substrings)
plt.xticks(rotation=25)
else:
g = sns.distplot(dataset[column])
plt.xticks(rotation=25)
plot_distribution(data, cols=3, width=20, height=20, hspace=0.45, wspace=0.5)
执行这段代码时,出现如下错误(这段代码完全copy老师的),同时显示了部分特征的分布图
ValueError: could not convert string to float: 'scott'
RuntimeError: Selected KDE bandwidth is 0. Cannot estiamte density.
完整的错误信息如下:




