# 使用简单移动平均线的输出预测

(a) 通过计算滚动平均值从历史数据进行预测

(b) 与 (a) 相同，但具有额外的内在增长参数

``````# a function for predicting future values of a time series, based on a simple moving average
# parameter "length" determines amount of time into the future predicted
# parameter "past" is a vector with all initial historic values; moving average will start as the mean over all of these values
sma_forecast = function(past,length){
# create a empty vector of desired "length"
future = rep(0,times = length)

# connect past and future into one vector
prediction = c(past,future)

# iterate through prediction vector and populate the missing values
for(i in (length(past)+1):length(prediction)){
prediction[i] = mean(prediction[(i-length(past)):(i-1)])
}

# return the prediction as the final result
return(prediction)
}``````

``````# read in readxl
``````##   year   country    total
## 1 2018 Argentina   466649
## 2 2018   Austria   164900
## 3 2018   Belgium   308493
## 4 2018    Brazil  2879809
## 6 2018     China 27809196``````
``````# view tail
tail(data_df)``````
``````##     year    country    total
## 835 1999     Turkey   297862
## 836 1999    Ukraine     1918
## 837 1999         UK  1973519
## 838 1999        USA 13024978
## 839 1999 Uzbekistan    44433
## 840 1999     Others    11965``````
``````# filter out USA, using dplyr
library(dplyr)
data_df = filter(data_df,country=="USA")
# plot time series, using ggplot2 in R
library(ggplot2)
ggplot(data_df) +
geom_path(mapping = aes(x = year, y = total/1000000),
size = 2,
color = "red") +
labs(title = "US automotive industry production output",
subtitle = "historical OICA data, for 1999 - 2018") +
xlab("year") +
ylab("output [millions of units]") +
ylim(0,15)``````
``````# create new data frame with the historical data and the predictions; make it ggplot2 friendly
# -- re-arrange old data_df
library(dplyr)
data_df = data_df %>% arrange(desc(-year))
# -- use the sma_forecast function to calculate a forecast, based on simple moving average
predictionVals = sma_forecast(past=data_df\$total,length = 10)
# -- create new empty dataframe
plot_df = as.data.frame(matrix(nrow=length(predictionVals),ncol= 4))
colnames(plot_df) = c("year","country","total","category")
plot_df\$total = predictionVals
plot_df\$category[1:nrow(data_df)] = "history"
plot_df\$category[(nrow(data_df)+1):length(predictionVals)] = "prediction"
plot_df\$year = data_df\$year[1]:(data_df\$year[1]+length(predictionVals)-1)
plot_df\$country = data_df\$country[1]
# -- plot the content of the dataframe, using gpgplot2
ggplot(plot_df) +
geom_point(mapping = aes(x = year,
y = total/1000000,
color = category),
size = 2) +
labs(title = "US autmotive industry production output",
subtitle = "A prediction from historic OICA data, based on moving average calculation") +
xlab("year") +
ylab("output [millions of units]") +
ylim(0,15)``````

(a) 分割训练和测试集以评估方法

(b) 不同国家、时间间隔和预测长度的评估方法

(c) 对不同于生产输出数据的数据进行测试预测

（四）……