本文共 7433 字,大约阅读时间需要 24 分钟。
源数据(前12)
数据预处理(DATE格式转换):
import pandas as pdunrate = pd.read_csv("UNRATE.csv")unrate["DATE"] = pd.to_datetime(unrate["DATE"])print(unrate.head(12))DATE VALUE0 1948-01-01 3.41 1948-02-01 3.82 1948-03-01 4.03 1948-04-01 3.94 1948-05-01 3.55 1948-06-01 3.66 1948-07-01 3.67 1948-08-01 3.98 1948-09-01 3.89 1948-10-01 3.710 1948-11-01 3.811 1948-12-01 4.0绘制折线图:
import matplotlib.pyplot as plt
plt.plot()plt.show()firts_twelve = unrate = unrate[0:12]plt.plot(firts_twelve["DATE"],firts_twelve["VALUE"])plt.show()x坐标标注旋转
plt.xticks(rotation = 45)坐标轴与标题标注
plt.xlabel("Month") plt.ylabel("Unemployment Rate")plt.title("Monthly Unemployment Trend,1948")子图操作:
fig = plt.figure()#新建绘图区域ax1 = fig.add_subplot(4,3,1)ax2 = fig.add_subplot(4,3,2)ax3 = fig.add_subplot(4,3,6)fig = plt.figure(figsize=(6,6))#指定画图区域大小
ax1 = fig.add_subplot(2,1,1)ax2 = fig.add_subplot(2,1,2)ax1.plot(np.arange(5),np.random.randint(1,5,5))ax2.plot(np.arange(10),np.arange(10)*3)plt.show()同一坐标系下绘制多条线:
fig = plt.figure(figsize=(6,3))
plt.plot(unrate[0:12]['MONTH'], unrate[0:12]['VALUE'], c='red')plt.plot(unrate[12:24]['MONTH'], unrate[12:24]['VALUE'], c='blue')plt.show()fig = plt.figure(figsize=(10,6))
colors = ['red', 'blue', 'green', 'orange', 'black']for i in range(5): start_index = i*12 end_index = (i+1)*12 subset = unrate[start_index:end_index] plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i]) plt.show()曲线标签:fig = plt.figure(figsize=(5,3))
colors = ['red', 'blue', 'green', 'orange', 'black']for i in range(5): start_index = i*12 end_index = (i+1)*12 subset = unrate[start_index:end_index] label = str(1948 + i) plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i], label=label)plt.legend(loc='best')#标签定位#print (help(plt.legend))plt.show()loc= best upper right upper left lower left lower right right center left center right lower center upper center center完整折线图: fig = plt.figure(figsize=(5,3)) colors = ['red', 'blue', 'green', 'orange', 'black'] for i in range(5): start_index = i*12 end_index = (i+1)*12 subset = unrate[start_index:end_index] label = str(1948 + i) plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i], label=label) plt.legend(loc='upper left') plt.xlabel('Month, Integer') plt.ylabel('Unemployment Rate, Percent') plt.title('Monthly Unemployment Trends, 1948-1952') plt.show()
绘制条形图:
import pandas as pdreviews = pd.read_csv('fandango_scores.csv')cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']norm_reviews = reviews[cols]print(norm_reviews[:1])FILM RT_user_norm Metacritic_user_nom \0 Avengers: Age of Ultron (2015) 4.3 3.55 IMDB_norm Fandango_Ratingvalue Fandango_Stars 0 3.9 4.5 5.0
import matplotlib.pyplot as plt
from numpy import arange#The Axes.bar() method has 2 required parameters, left and height. #We use the left parameter to specify the x coordinates of the left sides of the bar. #We use the height parameter to specify the height of each barnum_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']bar_heights = norm_reviews.ix[0, num_cols].valuesprint (bar_heights)bar_positions = arange(5) + 0.75print (bar_positions)fig, ax = plt.subplots()ax.bar(bar_positions, bar_heights, 0.5)plt.show()[4.3 3.55 3.9 4.5 5.0][0.75 1.75 2.75 3.75 4.75]
横向条形图:
import matplotlib.pyplot as pltfrom numpy import arangenum_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']bar_widths = norm_reviews.ix[0, num_cols].valuesbar_positions = arange(5) + 0.75tick_positions = range(1,6)fig, ax = plt.subplots()ax.barh(bar_positions, bar_widths, 0.5)ax.set_yticks(tick_positions)ax.set_yticklabels(num_cols)ax.set_ylabel('Rating Source')ax.set_xlabel('Average Rating')ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')plt.show()散点图:
fig, ax = plt.subplots()ax.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm'])ax.set_xlabel('Fandango')ax.set_ylabel('Rotten Tomatoes')plt.show()柱形图:
import pandas as pdimport matplotlib.pyplot as pltreviews = pd.read_csv('fandango_scores.csv')cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']norm_reviews = reviews[cols]print(norm_reviews[:5])FILM RT_user_norm Metacritic_user_nom \0 Avengers: Age of Ultron (2015) 4.3 3.55 1 Cinderella (2015) 4.0 3.75 2 Ant-Man (2015) 4.5 4.05 3 Do You Believe? (2015) 4.2 2.35 4 Hot Tub Time Machine 2 (2015) 1.4 1.70 IMDB_norm Fandango_Ratingvalue 0 3.90 4.5 1 3.55 4.5 2 3.90 4.5 3 2.70 4.5 4 2.55 3.0fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts() fandango_distribution = fandango_distribution.sort_index() imdb_distribution = norm_reviews['IMDB_norm'].value_counts() imdb_distribution = imdb_distribution.sort_index() print(fandango_distribution) print(imdb_distribution)
2.7 22.8 22.9 53.0 43.1 33.2 53.3 43.4 93.5 93.6 83.7 93.8 53.9 124.0 74.1 164.2 124.3 114.4 74.5 94.6 44.8 3Name: Fandango_Ratingvalue, dtype: int642.00 12.10 12.15 12.20 12.30 22.45 22.50 12.55 12.60 22.70 42.75 52.80 22.85 12.90 12.95 33.00 23.05 43.10 13.15 93.20 63.25 43.30 93.35 73.40 13.45 73.50 43.55 73.60 103.65 53.70 83.75 63.80 33.85 43.90 93.95 24.00 14.05 14.10 44.15 14.20 24.30 1Name: IMDB_norm, dtype: int64fig, ax = plt.subplots() #ax.hist(norm_reviews['Fandango_Ratingvalue'])#绘制柱形图 #ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20)#规定20条 ax.hist(norm_reviews['Fandango_Ratingvalue'], range=(4, 5),bins=20)#4到5范围内20条 plt.show()
fig = plt.figure(figsize=(5,20))
ax1 = fig.add_subplot(4,1,1)ax2 = fig.add_subplot(4,1,2)ax3 = fig.add_subplot(4,1,3)ax4 = fig.add_subplot(4,1,4)ax1.hist(norm_reviews['Fandango_Ratingvalue'], bins=20, range=(0, 5))ax1.set_title('Distribution of Fandango Ratings')ax1.set_ylim(0, 50)#y轴范围plt.show()箱型图:fig, ax = plt.subplots()
ax.boxplot(norm_reviews['RT_user_norm'].values)ax.set_xticklabels(['Rotten Tomatoes'])ax.set_ylim(0, 5)plt.show()num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']
fig, ax = plt.subplots()ax.boxplot(norm_reviews[num_cols].values)ax.set_xticklabels(num_cols, rotation=90)ax.set_ylim(0,5)plt.show()去坐标锯齿:
fig, ax = plt.subplots()# Add your code here.fig, ax = plt.subplots()ax.tick_params(bottom="off", top="off", left="off", right="off")plt.show()去边框:
fig, ax = plt.subplots()# Add your code here.fig, ax = plt.subplots()for key,spine in ax.spines.items(): spine.set_visible(False)plt.show()RGB颜色通道:
cb_dark_blue = (0/255, 107/255, 164/255)线宽:
ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women', linewidth=10)曲线标注:
ax.text(2005, 87, 'Men')ax.text(2002, 8, 'Women')