当前位置：首页 > news >正文

灯箱网站开发创造自己的网站

news 2026/4/20 5:40:16

灯箱网站开发,创造自己的网站,淘宝做导航网站有哪些,360建站公司事先说明#xff1a; 由于每次都要导入库和处理中文乱码问题#xff0c;我都是在最前面先写好#xff0c;后面的代码就不在写了。要是copy到自己本地的话#xff0c;就要把下面的代码也copy下。 # 准备工作import pandas as pd import numpy as np from matplotlib impor…事先说明由于每次都要导入库和处理中文乱码问题我都是在最前面先写好后面的代码就不在写了。要是copy到自己本地的话就要把下面的代码也copy下。 # 准备工作import pandas as pd import numpy as np from matplotlib import pyplot as plt import matplotlibmatplotlib.rc(font,familyFangSong) First 需求给定最流行的1000部电影的相关的数据统计Rating和runtime的分布情况分析毫无疑问分布情况肯定是直方图把所有数据中是runtime和Rating的列选出来求极差设置组距设置/绘制直方图代码 # 统计最流行1000部电影的Rating和runtime分布情况file_path ./IMDB-Movie-Data.csvdf pd.read_csv(file_path) # print(df.head(1)) # print(df.info())#rating,runtime分布情况 #选择图形直方图 #准备数据 runtime_data df[Runtime (Minutes)].values# 计算极差 max_runtime runtime_data.max() min_runtime runtime_data.min()# 计算组数 # print(max_runtime-min_runtime) num_runtime int((max_runtime-min_runtime)//5)#设置图形的大小 plt.figure(figsize(20,8),dpi200) plt.hist(runtime_data,num_runtime)_x [min_runtime] i min_runtime while imax_runtime25:i i5_x.append(i) plt.xticks(_x,rotation45) plt.title(时长runtime的分布直方图)plt.show() # 准备数据 Ratint_data df[Rating].valuesmax_Rating Ratint_data.max() min_Rating Ratint_data.min()num_Rating int((max_Rating-min_Rating)//0.5)plt.figure(figsize(20,8),dpi200) plt.hist(Ratint_data,num_Rating)# 设置不等宽组距_ x[1.9,3.5] i3.5 while imax_Rating0.5:i0.5_x.append(i) plt.xticks(_x) plt.title(评分Rating的分布直方图)plt.show() 效果 Second 需求给定最流行的1000部电影的相关的数据统计这些电影的类型分析毫无疑问连续数据的分布用条形图选出电影中类型的那一列数据用相关方法把其变成列表构造全零数组遍历每个电影。如果有该类型则赋值为1否则不变排序绘制条形图代码 # 统计最流行1000部电影的类型# 准备数据 file_pathIMDB-Movie-Data.csvdfpd.read_csv(file_path) # print(df[Genre].head())# 统计电影的类型 temp_listdf[Genre].str.split(,).tolist() # print(temp_list) genre_listlist(set(i for j in temp_list for i in j)) # print(genre_list)# 构造全零的数组 zeros_dfpd.DataFrame(np.zeros((df.shape[0],len(genre_list))),columnsgenre_list) # print(zeros_df.head())# 给每个电影存在的类型赋值为1 for i in range(df.shape[0]):zeros_df.loc[i,temp_list[i]]1 # print(zeros_df.head())# 统计每种类型的电影的和 genre_countzeros_df.sum(axis0) # print(genre_count)# 排序 genre_countgenre_count.sort_values() # print(genre_count) _xgenre_count.index _ygenre_count.values # print(_x,_y)# 绘制条形图 plt.figure(figsize(20,8),dpi200) plt.bar(range(len(_x)),_y) plt.xticks(range(len(_x)),_x) plt.xlabel(电影类型) plt.ylabel(电影数量) plt.title(最流行的1000部电影的分类) plt.show() 效果思考学习某一列是字符串类型并且有多个值。我们可以通过此题学到一种解决办法以后可以套用用字符串方法进行切割转化成列表两层循环取出类型 # 通过字符串的方法进行切割 temp_listdf[Genre].str.split(,).tolist()# 套用两层循环用set是去重 genre_listlist(set(i for j in temp_list for i in j)) 对于某一特征有多个属性而我们要统计属性的数量。我们可以通过此题学到一种解决办法以后可以套用构造全零数组维度根据实际情况来一般情况下0轴是样本数量1轴是属性数量列标签也是属包含所有属性0表示没有这种属性遍历每个样本的该特征的所有属性如果有则将该位置的值变为1统计求和 # 构造全零的数组 zeros_dfpd.DataFrame(np.zeros((df.shape[0],len(genre_list))),columnsgenre_list) # print(zeros_df.head())# 给每个电影存在的类型赋值为1 for i in range(df.shape[0]):zeros_df.loc[i,temp_list[i]]1 # print(zeros_df.head())# 统计每种类型的电影的和 genre_countzeros_df.sum(axis0) # print(genre_count) Third 需求给定Starbucks所有店铺的相关数据求中美两国Starbucks的数量绘制店铺总数前十的国家的图绘制中国每个城市省市的店铺数量的图分析统计中美两国Starbucks的数量用pandas自带的分组操作按国家Country分类用聚合count方法选出中美两国绘制店铺总数前十的国家的图根据第一问的数据进行排序绘制图形绘制图形呈现中国每个城市的店铺数量找出中国的数据用pandas自带的分组操作按省市State/Province分类用聚合count方法绘制图形代码 # 统计中美两国Starbucks的数量# 准备数据 file_pathstarbucks_store_worldwide.csv dfpd.read_csv(file_path) # print(df.head())# 根据国家分组 country_datadf.groupby(byCountry) # print(country_data) # for country,values in country_data: # print(country) # print(values)# 测试看country_data统计出来的是什么数据 # tcountry_data[Ownership Type] # tcountry_data[Brand] # print(t) # for i in t: # print(i)# 调用聚合方法得到答案 # country_countcountry_data[Ownership Type].count().sort_values() country_countcountry_data[Brand].count().sort_values() # print(country_count) print(美国Starbucks数量str(country_count[US])) print(中国Starbucks数量str(country_count[CN])) # 绘制店铺总数前十的国家的图country_maxcountry_count[-10:] # print(country_max) _xcountry_max.index _ycountry_max.values # print(_x) # print(_y)plt.figure(figsize(20,8),dpi200) plt.bar(range(len(_x)),_y) plt.xticks(range(len(_x)),_x) plt.title(starbucks店铺总数前十的国家) plt.show() # 绘制图形呈现中国每个城市的店铺数量china_datadf[df[Country]CN] # print(china_data)china_provincechina_data.groupby(byState/Province) # for province,values in china_province: # if(int(province)31): # print(province) # print(values)china_provincechina_province[Brand].count().sort_values() # print(china_province)_xchina_province.index _ychina_province.valuesplt.figure(figsize(20,8),dpi200) plt.bar(range(len(_x)),_y) plt.xticks(range(len(_x)),_x) plt.title(中国每个城市的店铺数量) plt.show() 效果思考学习学会使用pandas自带的分组操作注意操作之后得到的迭代器应该是迭代器毕竟不能直接看数据但是支持遍历等操作对于上一步得到的迭代器使用聚合count可以直接统计出各个组内的数据数量 Fourth 需求给出全球排名前10000本书相关数据统计不同年份的书籍数量不同年份的书籍的平均评分情况分析相信经过前面三个案例的练习这个案例应该可以轻松解决。所以我就偷个懒不写分析了代码 # 不同年份书籍的数量file_pathbooks.csvdfpd.read_csv(file_path) year_datadf[pd.notnull(df[original_publication_year])].groupby(byoriginal_publication_year).count()[id] # year_datadf.groupby(byoriginal_publication_year).count()[id] print(year_data) # 不同年份的书籍平均评分rating_datadf[pd.notnull(df[original_publication_year])] rating_meanrating_data[average_rating].groupby(byrating_data[original_publication_year]).mean()_xrating_mean.index _yrating_mean.valuesplt.figure(figsize(20,8),dpi200) plt.plot(range(len(_x)),_y) plt.xticks(list(range(len(_x)))[::5],_x[::5].astype(int),rotation45) plt.title(不同年份的书籍平均评分) plt.show() 效果

查看全文

http://www.hkea.cn/news/14337339/