defprepare_country_stats(oecd_bli,gdp_per_capita):# 筛选行oecd_bli=oecd_bli[oecd_bli["INEQUALITY"]=="TOT"]# 抽取多行数据为多列数据oecd_bli=oecd_bli.pivot(index="Country",columns="Indicator",values="Value")# 列重命名gdp_per_capita.rename(columns={"2015":"GDP per capita"},inplace=True)gdp_per_capita.set_index("Country",inplace=True)# joinfull_country_stats=pd.merge(left=oecd_bli,right=gdp_per_capita,left_index=True,right_index=True)# 排序full_country_stats.sort_values(by="GDP per capita",inplace=True)# 剔除不需要的行remove_indices=[0,1,6,8,33,34,35]keep_indices=list(set(range(36))-set(remove_indices))# 按整数索引行returnfull_country_stats[["GDP per capita",'Life satisfaction']].iloc[keep_indices]# df.pivot()作用:# Country | INDICATOR | Value Country | Dwe....... | Housing .. | ...# ---------------------------- ===》 ----------------------------------------# Australia| Dwe...... | 1.10 Australia| 1.10 | 20.00 | ...# Australia| Housing ..| 20.00 Austria | ... # ... ...
保存图片:
1
2
3
4
5
6
7
8
9
10
11
12
# Where to save the figuresPROJECT_ROOT_DIR="."CHAPTER_ID="fundamentals"IMAGES_PATH=os.path.join(PROJECT_ROOT_DIR,"images",CHAPTER_ID)os.makedirs(IMAGES_PATH,exist_ok=True)defsave_fig(fig_id,tight_layout=True,fig_extension="png",resolution=300):path=os.path.join(IMAGES_PATH,fig_id+"."+fig_extension)print("Saving figure",fig_id)iftight_layout:plt.tight_layout()plt.savefig(path,format=fig_extension,dpi=resolution)
在图片上加注释:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
sample_data.plot(kind='scatter',x="GDP per capita",y='Life satisfaction',figsize=(5,3))plt.axis([0,60000,0,10])position_text={"Hungary":(5000,1),# 文本框左下角xy"Korea":(18000,1.7),"France":(29000,2.4),"Australia":(40000,3.0),"United States":(52000,3.8),}forcountry,pos_textinposition_text.items():pos_data_x,pos_data_y=sample_data.loc[country]country="U.S."ifcountry=="United States"elsecountry# xy:箭头位置, xytext:文本框位置,shrink:箭头缩短10%plt.annotate(country,xy=(pos_data_x,pos_data_y),xytext=pos_text,arrowprops=dict(facecolor='black',width=0.5,shrink=0.1,headwidth=5))plt.plot(pos_data_x,pos_data_y,"ro")plt.xlabel("GDP per capita (USD)")save_fig('money_happy_scatterplot')plt.show()