I have an MLP rating to predict the prices of the stock market prediction [0:] and the sentiment score on the 'sentiment_score' items. Now I want to try an RNN to do the same. However, I cook during the adjustment.
The MLP classifier was:
from sklearn.neural_network import MLPClassifier
from datetime import datetime, timedelta
years = [2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016]
prediction_list = []
for year in years:
# Splitting the training and testing data
train_start_date = str(year) + '-01-01'
train_end_date = str(year) + '-10-31'
test_start_date = str(year) + '-11-01'
test_end_date = str(year) + '-12-31'
train = df.ix[train_start_date : train_end_date]
test = df.ix[test_start_date:test_end_date]
# Calculating the sentiment score
sentiment_score_list = []
for date, row in train.T.iteritems():
sentiment_score = np.asarray([df.loc[date, 'compound'],df.loc[date, 'neg'],df.loc[date, 'neu'],df.loc[date, 'pos']])
#sentiment_score = np.asarray([df.loc[date, 'neg'],df.loc[date, 'pos']])
sentiment_score_list.append(sentiment_score)
numpy_df_train = np.asarray(sentiment_score_list)
sentiment_score_list = []
for date, row in test.T.iteritems():
sentiment_score = np.asarray([df.loc[date, 'compound'],df.loc[date, 'neg'],df.loc[date, 'neu'],df.loc[date, 'pos']])
#sentiment_score = np.asarray([df.loc[date, 'neg'],df.loc[date, 'pos']])
sentiment_score_list.append(sentiment_score)
numpy_df_test = np.asarray(sentiment_score_list)
# Generating models
mlpc = MLPClassifier(hidden_layer_sizes=(100, 200, 100), activation='relu',
solver='lbfgs', alpha=0.005, learning_rate_init = 0.001, shuffle=False) # span = 20 # best 1
mlpc.fit(numpy_df_train, train['prices'])
prediction = mlpc.predict(numpy_df_test)
prediction_list.append(prediction)
#print train_start_date + ' ' + train_end_date + ' ' + test_start_date + ' ' + test_end_date
idx = pd.date_range(test_start_date, test_end_date)
#print year
predictions_df_list = pd.DataFrame(data=prediction[0:], index = idx, columns=['prices'])
difference_test_predicted_prices = offset_value(test_start_date, test, predictions_df_list)
# Adding offset to all the advpredictions_df price values
predictions_df_list['prices'] = predictions_df_list['prices'] + difference_test_predicted_prices
predictions_df_list
# Smoothing the plot
span = 20
# modification for ewma
sma = predictions_df_list["prices"].rolling(window=span, min_periods=span).mean()[:span]
rest = predictions_df_list["prices"][span:]
df_ewm = pd.concat([sma, rest]).ewm(span=span, adjust=False).mean()
predictions_df_list['ewma']= df_ewm
# end of modifications
predictions_df_list['actual_value'] = test['prices']
# modification for actual_value_ewma
sma = predictions_df_list["actual_value"].rolling(window=span, min_periods=span).mean()[:span]
rest = predictions_df_list["actual_value"][span:]
df_actual_value_ewma = pd.concat([sma, rest]).ewm(span=span, adjust=False).mean()
#predictions_df_list['actual_value_ewma'] = pd.DataFrame.ewm(predictions_df_list["actual_value"], span=20)
predictions_df_list['actual_value_ewma'] = df_actual_value_ewma
# Changing column names
predictions_df_list.columns = ['predicted_price', 'average_predicted_price', 'actual_price', 'average_actual_price']
predictions_df_list.plot()
predictions_df_list_average = predictions_df_list[['average_predicted_price', 'average_actual_price']]
predictions_df_list_average.plot()
# predictions_df_list.show()
It works well! Well, you do not have the offset_value ()
but it's supposed to work. So I have this:
import numpy
import matplotlib.pyplot as plt
import pandas
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout
# load the dataset
years = [2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016]
prediction_list = []
for year in years:
# Splitting the training and testing data
train_start_date = str(year) + '-01-01'
train_end_date = str(year) + '-10-31'
test_start_date = str(year) + '-11-01'
test_end_date = str(year) + '-12-31'
train = df.ix[train_start_date : train_end_date]
test = df.ix[test_start_date:test_end_date]
# Calculating the sentiment score
sentiment_score_list = []
for date, row in train.T.iteritems():
sentiment_score = np.asarray([df.loc[date, 'compound'],df.loc[date, 'neg'],df.loc[date, 'neu'],df.loc[date, 'pos']])
#sentiment_score = np.asarray([df.loc[date, 'neg'],df.loc[date, 'pos']])
sentiment_score_list.append(sentiment_score)
numpy_df_train = np.asarray(sentiment_score_list)
sentiment_score_list = []
for date, row in test.T.iteritems():
sentiment_score = np.asarray([df.loc[date, 'compound'],df.loc[date, 'neg'],df.loc[date, 'neu'],df.loc[date, 'pos']])
#sentiment_score = np.asarray([df.loc[date, 'neg'],df.loc[date, 'pos']])
sentiment_score_list.append(sentiment_score)
numpy_df_test = np.asarray(sentiment_score_list)
# create and fit the LSTM network
model = Sequential()
model.add(Dense(64, input_dim=4, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
model.fit(numpy_df_test, y_train,
epochs=20,
batch_size=128)
score = model.evaluate(x_test, y_test, batch_size=128)
and I have the following error:
ValueError: the input matrices must have the same number of samples that the destination matrices. 61 samples of input and 2922 target samples.