xJB拜客生活常识网
pandas&matplotlib绘图基础
import matplotlib
pd.set_option("display.max_columns",100)
pd.set_option("display.max_rows",100)
matplotlib.rcParams
matplotlib.rcParams['font.sans-serif'] = ['SimHei']#显示中文
matplotlib.rcParams['axes.unicode_minus'] = False
from matplotlib.font_manager import _rebuild
_rebuild() #reload一下
1.箱线图
import matplotlib.pyplot as plt
%matplotlib inline
df_2016.plot('population', kind='box')
df_2016.plot('gdp', kind='box')
def draw_boxplot(data):
fig1, ax1 = plt.subplots()
ax1.set_title('箱线图')
ax1.boxplot(data,showbox=True,showcaps=True,meanline=True)
2.直方图
def draw_hist(data,bins,log):
"""
绘制直方图
data:必选参数,绘图数据
bins:直方图的长条形数目,可选项,默认为10
normed:是否将得到的直方图向量归一化,可选项,默认为0,代表不归一化,显示频数。normed=1,表示归一化,显示频率。
facecolor:长条形的颜色
edgecolor:长条形边框的颜色
alpha:透明度
"""
print('min:',np.percentile(data,0),'
'
'25%:',np.percentile(data,25),'
'
'median:',np.percentile(data,50),'
'
'75%:',np.percentile(data,75),'
'
'max:',np.percentile(data,100),'
'
)
plt.hist(data, bins, log = log ,density = 0,facecolor="blue", edgecolor="black", alpha=0.7)
# 显示横轴标签
plt.xlabel("区间")
# 显示纵轴标签
plt.ylabel("频数/频率")
# 显示图标题
plt.title("频数/频率分布直方图")
plt.show()
data = np.random.randn(1000)*100
draw_hist(data,40,False)
3.柱状图
X=[0,1,2,3,4,5]
Y=[222,42,455,664,454,334]
fig = plt.figure()
plt.bar(X,Y,0.4,color="green")
4.折线图
import matplotlib.pyplot as plt
%matplotlib inline
df_projects['totalamt'] = pd.to_numeric(df_projects['totalamt'].str.replace(',',''))
ax = df_projects.groupby('approvalyear')['totalamt'].sum()
.plot(x='approvalyear', y='totalamt',title ='Total Amount Approved per Year')
ax.set_xlabel('year')
ax.set_ylabel('amount #39;)
plt.show()
5.折线图:多系列
import matplotlib.pyplot as plt
%matplotlib inline
# put the data set into long form instead of wide
df_melt = pd.melt(df, id_vars=['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code'], var_name='year', value_name='GDP')
# convert year to a date time
df_melt['year'] = pd.to_datetime(df_melt['year'])
def plot_results(column_name):
# plot the results for Afghanistan, Albania, and Honduras
fig, ax = plt.subplots(figsize=(8,6))
df_melt[(df_melt['Country Name'] == 'Afghanistan') |
(df_melt['Country Name'] == 'Albania') |
(df_melt['Country Name'] == 'Honduras')].groupby('Country Name').plot('year', column_name, legend=True, ax=ax)
ax.legend(labels=['Afghanistan', 'Albania', 'Honduras'])
plot_results('GDP')
6.散点图
x = list(df_2016['population'])
y = list(df_2016['gdp'])
text = df_2016['Country Name']
fig, ax = plt.subplots(figsize=(15,10))
ax.scatter(x, y)
plt.title('GDP vs Population')
plt.xlabel('population')
plt.ylabel('GDP')
for i, txt in enumerate(text):
ax.annotate(txt, (x[i],y[i]))