defrank_feature_by_corr(df,method='pearson'):"""calculate Pearson correlation coefficient between feature and label, and rank the results"""feature_corr=dict()forfeatureinnumerical_features:feature_corr[feature]=df[feature].corr(df[label[0]],method)corr_df=pd.DataFrame(feature_corr.items(),columns=['feature','corr'])corr_df['abs']=np.abs(corr_df['corr'])returncorr_df.sort_values('abs',ascending=False).reset_index(drop=True)defplot_feature_correlation(df,feature):fig=px.scatter(df,x=feature,y=label[0], \
marginal_x="histogram",marginal_y="histogram",\
width=600,height=450)fig.show()pcc_rank=rank_feature_by_corr(df)foriinrange(2):plot_feature_correlation(df,pcc_rank.loc[i,'feature'])
importplotly.figure_factoryasffdefplot_label_distribution_per_class(df,feature):hist_data=[]group_labels=[]forname,groupindf.groupby(feature):hist_data.append(group[label[0]])group_labels.append(name)# Create distplot with curve_type set to 'normal'fig=ff.create_distplot(hist_data,group_labels,show_hist=False)# Add titlefig.update_layout(title_text='Curve and Rug Plot')fig.show()plot_label_distribution_per_class(df,categorical_features[0])