Predicting Historical Volatility for the S&P500

Predicting Historical Volatility is easy with RapidMiner. The attached process uses RapidMiner to recreate a research paper Options trading driven by volatility directional accuracy on how to predict historical volatility (HV) for the S&P500. The idea was to predict the HV 5 trading days ahead from Friday to Friday and then compare it with the Implied Volatility (IV) of the S&P500.  If the directions of HV and IV converge or diverge, then you would execute a specific type of option trade.

I did take some liberties with the research paper. At first I did use a Neural Net algorithm to train the data and I got a greater than 50% directional accuracy. When I switched to a SVM with and RBF kernel, I got it over 60%. Then when I added optimization for the Training and Testing Windows, gamma, and C parameters, I managed to get this over 70%.

I did test this "live" by paper trading it and managed to be right 7 out of 10 times. I did not execute any actually trades.

    <?xml version="1.0" encoding="UTF-8"?><process version="7.6.003">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="7.6.003" expanded="true" name="Process">
        <process expanded="true">
          <operator activated="true" class="subprocess" compatibility="7.6.003" expanded="true" height="82" name="Load S&amp;P500 Data for HV calc" width="90" x="45" y="34">
            <process expanded="true">
              <operator activated="true" class="read_csv" compatibility="7.6.003" expanded="true" height="68" name="Read Downloaded S&amp;P500" width="90" x="45" y="34">
                <parameter key="csv_file" value="C:\Users\Thomas Ott\Downloads\^GSPC.csv"/>
                <parameter key="column_separators" value=","/>
                <parameter key="first_row_as_names" value="false"/>
                <list key="annotations">
                  <parameter key="0" value="Name"/>
                </list>
                <list key="data_set_meta_data_information">
                  <parameter key="0" value="Date.true.polynominal.attribute"/>
                  <parameter key="1" value="Open.true.real.attribute"/>
                  <parameter key="2" value="High.true.real.attribute"/>
                  <parameter key="3" value="Low.true.real.attribute"/>
                  <parameter key="4" value="Close.true.real.attribute"/>
                  <parameter key="5" value="Adj Close.true.real.attribute"/>
                  <parameter key="6" value="Volume.true.real.attribute"/>
                </list>
                <description align="center" color="transparent" colored="false" width="126">Due to Yahoo changes, must download CSV file manually for now!</description>
              </operator>
              <operator activated="true" class="series:lag_series" compatibility="7.4.000" expanded="true" height="82" name="Lag Series" width="90" x="179" y="34">
                <list key="attributes">
                  <parameter key="Close" value="1"/>
                </list>
                <description align="center" color="transparent" colored="false" width="126">Lag Series for LN calculation</description>
              </operator>
              <operator activated="true" class="generate_attributes" compatibility="7.6.003" expanded="true" height="82" name="Calc Natural Log" width="90" x="313" y="34">
                <list key="function_descriptions">
                  <parameter key="Log Returns" value="ln(Close/[Close-1])"/>
                </list>
              </operator>
              <operator activated="true" class="select_attributes" compatibility="7.6.003" expanded="true" height="82" name="Select Attributes for ETL" width="90" x="447" y="34">
                <parameter key="attribute_filter_type" value="single"/>
                <parameter key="attribute" value="^GSPC_CLOSE-1"/>
                <parameter key="invert_selection" value="true"/>
              </operator>
              <operator activated="true" class="replace_missing_values" compatibility="7.6.003" expanded="true" height="103" name="Replace Missing Values" width="90" x="581" y="34">
                <parameter key="attribute_filter_type" value="value_type"/>
                <parameter key="value_type" value="numeric"/>
                <parameter key="default" value="zero"/>
                <list key="columns"/>
              </operator>
              <connect from_op="Read Downloaded S&amp;P500" from_port="output" to_op="Lag Series" to_port="example set input"/>
              <connect from_op="Lag Series" from_port="example set output" to_op="Calc Natural Log" to_port="example set input"/>
              <connect from_op="Calc Natural Log" from_port="example set output" to_op="Select Attributes for ETL" to_port="example set input"/>
              <connect from_op="Select Attributes for ETL" from_port="example set output" to_op="Replace Missing Values" to_port="example set input"/>
              <connect from_op="Replace Missing Values" from_port="example set output" to_port="out 1"/>
              <portSpacing port="source_in 1" spacing="0"/>
              <portSpacing port="sink_out 1" spacing="0"/>
              <portSpacing port="sink_out 2" spacing="0"/>
            </process>
            <description align="center" color="transparent" colored="false" width="126">Load CSV data</description>
          </operator>
          <operator activated="true" class="subprocess" compatibility="7.6.003" expanded="true" height="82" name="Prep Data for X-Val" width="90" x="179" y="34">
            <process expanded="true">
              <operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing" width="90" x="45" y="34">
                <parameter key="window_size" value="5"/>
                <parameter key="label_attribute" value="^GSPC_CLOSE"/>
              </operator>
              <operator activated="true" class="generate_aggregation" compatibility="7.6.003" expanded="true" height="82" name="Generate Aggregation" width="90" x="179" y="34">
                <parameter key="attribute_name" value="StDev"/>
                <parameter key="attribute_filter_type" value="subset"/>
                <parameter key="attributes" value="Log Returns-0|Log Returns-1|Log Returns-2|Log Returns-3|Log Returns-4"/>
                <parameter key="aggregation_function" value="standard_deviation"/>
              </operator>
              <operator activated="true" class="multiply" compatibility="7.6.003" expanded="true" height="103" name="Multiply" width="90" x="313" y="34"/>
              <operator activated="true" class="select_attributes" compatibility="7.6.003" expanded="true" height="82" name="Select StDev and Date" width="90" x="514" y="187">
                <parameter key="attribute_filter_type" value="subset"/>
                <parameter key="attributes" value="Date-0|StDev"/>
              </operator>
              <operator activated="true" class="select_attributes" compatibility="7.6.003" expanded="true" height="82" name="Select Close and LN" width="90" x="514" y="34">
                <parameter key="attribute_filter_type" value="subset"/>
                <parameter key="attributes" value="Date-0|Date-9|Log Returns-9|^GSPC_CLOSE-0|^GSPC_CLOSE-9|Close-0"/>
              </operator>
              <operator activated="true" class="generate_attributes" compatibility="7.6.003" expanded="true" height="82" name="Generate HV dummy for manipulation" width="90" x="648" y="187">
                <list key="function_descriptions">
                  <parameter key="HVdummy" value="StDev*sqrt(252)"/>
                </list>
              </operator>
              <operator activated="true" class="join" compatibility="7.6.003" expanded="true" height="82" name="Join" width="90" x="715" y="34">
                <parameter key="join_type" value="outer"/>
                <parameter key="use_id_attribute_as_key" value="false"/>
                <list key="key_attributes">
                  <parameter key="Date-0" value="Date-0"/>
                </list>
              </operator>
              <operator activated="true" class="rename" compatibility="7.6.003" expanded="true" height="82" name="Rename" width="90" x="782" y="136">
                <parameter key="old_name" value="HVdummy"/>
                <parameter key="new_name" value="HV 5day Daily"/>
                <list key="rename_additional_attributes">
                  <parameter key="Close-0" value="S&amp;P500_Close"/>
                  <parameter key="Date-0" value="Date"/>
                </list>
              </operator>
              <operator activated="true" class="select_attributes" compatibility="7.6.003" expanded="true" height="82" name="Select Final Attributes" width="90" x="916" y="136">
                <parameter key="attribute_filter_type" value="subset"/>
                <parameter key="attributes" value="Date|S&amp;P500_Close|HV 10day Daily|HV 5day Daily"/>
              </operator>
              <operator activated="true" class="nominal_to_date" compatibility="7.6.003" expanded="true" height="82" name="Nominal to Date" width="90" x="1050" y="136">
                <parameter key="attribute_name" value="Date"/>
                <parameter key="date_format" value="yyyy-MM-dd"/>
              </operator>
              <operator activated="true" class="date_to_numerical" compatibility="7.6.003" expanded="true" height="82" name="Date to Numerical" width="90" x="1184" y="136">
                <parameter key="attribute_name" value="Date"/>
                <parameter key="time_unit" value="day"/>
                <parameter key="day_relative_to" value="week"/>
                <parameter key="keep_old_attribute" value="true"/>
              </operator>
              <operator activated="true" class="filter_examples" compatibility="7.6.003" expanded="true" height="103" name="Filter Examples" width="90" x="1318" y="136">
                <list key="filters_list">
                  <parameter key="filters_entry_key" value="Date_day.eq.6"/>
                </list>
              </operator>
              <operator activated="true" class="select_attributes" compatibility="7.6.003" expanded="true" height="82" name="Toss out Date_day" width="90" x="1452" y="136">
                <parameter key="attribute_filter_type" value="single"/>
                <parameter key="attribute" value="Date_day"/>
                <parameter key="invert_selection" value="true"/>
              </operator>
              <operator activated="true" class="series:lag_series" compatibility="7.4.000" expanded="true" height="82" name="Lag HV" width="90" x="1586" y="136">
                <list key="attributes">
                  <parameter key="HV 5day Daily" value="1"/>
                </list>
              </operator>
              <operator activated="true" class="series:moving_average" compatibility="7.4.000" expanded="true" height="82" name="Moving Average" width="90" x="1720" y="136">
                <parameter key="attribute_name" value="HV 5day Daily"/>
                <parameter key="window_width" value="13"/>
              </operator>
              <operator activated="true" class="replace_missing_values" compatibility="7.6.003" expanded="true" height="103" name="Replace Missing Values (2)" width="90" x="1854" y="136">
                <parameter key="attribute_filter_type" value="subset"/>
                <parameter key="attributes" value="average(HV 5day Daily)|HV 5day Daily-1"/>
                <parameter key="default" value="zero"/>
                <list key="columns"/>
              </operator>
              <operator activated="true" class="rename" compatibility="7.6.003" expanded="true" height="82" name="Rename Attributes Humanely" width="90" x="1988" y="136">
                <parameter key="old_name" value="average(HV 5day Daily)"/>
                <parameter key="new_name" value="HV MA13"/>
                <list key="rename_additional_attributes">
                  <parameter key="HV 5day Daily-1" value="Naive HV"/>
                </list>
              </operator>
              <connect from_port="in 1" to_op="Windowing" to_port="example set input"/>
              <connect from_op="Windowing" from_port="example set output" to_op="Generate Aggregation" to_port="example set input"/>
              <connect from_op="Generate Aggregation" from_port="example set output" to_op="Multiply" to_port="input"/>
              <connect from_op="Multiply" from_port="output 1" to_op="Select Close and LN" to_port="example set input"/>
              <connect from_op="Multiply" from_port="output 2" to_op="Select StDev and Date" to_port="example set input"/>
              <connect from_op="Select StDev and Date" from_port="example set output" to_op="Generate HV dummy for manipulation" to_port="example set input"/>
              <connect from_op="Select Close and LN" from_port="example set output" to_op="Join" to_port="left"/>
              <connect from_op="Generate HV dummy for manipulation" from_port="example set output" to_op="Join" to_port="right"/>
              <connect from_op="Join" from_port="join" to_op="Rename" to_port="example set input"/>
              <connect from_op="Rename" from_port="example set output" to_op="Select Final Attributes" to_port="example set input"/>
              <connect from_op="Select Final Attributes" from_port="example set output" to_op="Nominal to Date" to_port="example set input"/>
              <connect from_op="Nominal to Date" from_port="example set output" to_op="Date to Numerical" to_port="example set input"/>
              <connect from_op="Date to Numerical" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
              <connect from_op="Filter Examples" from_port="example set output" to_op="Toss out Date_day" to_port="example set input"/>
              <connect from_op="Toss out Date_day" from_port="example set output" to_op="Lag HV" to_port="example set input"/>
              <connect from_op="Lag HV" from_port="example set output" to_op="Moving Average" to_port="example set input"/>
              <connect from_op="Moving Average" from_port="example set output" to_op="Replace Missing Values (2)" to_port="example set input"/>
              <connect from_op="Replace Missing Values (2)" from_port="example set output" to_op="Rename Attributes Humanely" to_port="example set input"/>
              <connect from_op="Rename Attributes Humanely" from_port="example set output" to_port="out 1"/>
              <portSpacing port="source_in 1" spacing="0"/>
              <portSpacing port="source_in 2" spacing="0"/>
              <portSpacing port="sink_out 1" spacing="0"/>
              <portSpacing port="sink_out 2" spacing="0"/>
            </process>
            <description align="center" color="transparent" colored="false" width="126">Data Prep</description>
          </operator>
          <operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Window for Training" width="90" x="313" y="187">
            <parameter key="window_size" value="1"/>
            <parameter key="create_label" value="true"/>
            <parameter key="label_attribute" value="HV 5day Daily"/>
            <parameter key="add_incomplete_windows" value="true"/>
          </operator>
          <operator activated="true" class="optimize_parameters_grid" compatibility="7.6.003" expanded="true" height="124" name="Optimize Parameters (Grid)" width="90" x="447" y="34">
            <list key="parameters">
              <parameter key="Backtesting.training_window_width" value="[6;12;4;linear]"/>
              <parameter key="Backtesting.test_window_width" value="[6;12;4;linear]"/>
              <parameter key="SVM for HV Calc.kernel_gamma" value="[.01;1000;5;logarithmic]"/>
              <parameter key="SVM for HV Calc.C" value="[0;1000;5;linear]"/>
            </list>
            <process expanded="true">
              <operator activated="true" class="series:sliding_window_validation" compatibility="7.4.000" expanded="true" height="124" name="Backtesting" width="90" x="179" y="34">
                <parameter key="training_window_width" value="6"/>
                <parameter key="test_window_width" value="6"/>
                <parameter key="cumulative_training" value="true"/>
                <process expanded="true">
                  <operator activated="true" class="support_vector_machine" compatibility="7.6.003" expanded="true" height="124" name="SVM for HV Calc" width="90" x="179" y="34">
                    <parameter key="kernel_type" value="radial"/>
                    <parameter key="kernel_gamma" value="0.10000000000000002"/>
                    <parameter key="C" value="200.0"/>
                  </operator>
                  <connect from_port="training" to_op="SVM for HV Calc" to_port="training set"/>
                  <connect from_op="SVM for HV Calc" from_port="model" to_port="model"/>
                  <portSpacing port="source_training" spacing="0"/>
                  <portSpacing port="sink_model" spacing="0"/>
                  <portSpacing port="sink_through 1" spacing="0"/>
                </process>
                <process expanded="true">
                  <operator activated="true" class="apply_model" compatibility="7.1.001" expanded="true" height="82" name="Apply Model In Testing" width="90" x="45" y="34">
                    <list key="application_parameters"/>
                  </operator>
                  <operator activated="true" class="series:forecasting_performance" compatibility="7.4.000" expanded="true" height="82" name="Forecast Performance" width="90" x="246" y="34">
                    <parameter key="horizon" value="1"/>
                    <parameter key="main_criterion" value="prediction_trend_accuracy"/>
                  </operator>
                  <connect from_port="model" to_op="Apply Model In Testing" to_port="model"/>
                  <connect from_port="test set" to_op="Apply Model In Testing" to_port="unlabelled data"/>
                  <connect from_op="Apply Model In Testing" from_port="labelled data" to_op="Forecast Performance" to_port="labelled data"/>
                  <connect from_op="Forecast Performance" from_port="performance" to_port="averagable 1"/>
                  <portSpacing port="source_model" spacing="0"/>
                  <portSpacing port="source_test set" spacing="0"/>
                  <portSpacing port="source_through 1" spacing="0"/>
                  <portSpacing port="sink_averagable 1" spacing="0"/>
                  <portSpacing port="sink_averagable 2" spacing="0"/>
                </process>
              </operator>
              <operator activated="true" class="log" compatibility="7.6.003" expanded="true" height="82" name="Log" width="90" x="313" y="85">
                <parameter key="filename" value="tmp"/>
                <list key="log">
                  <parameter key="Gamma" value="operator.SVM for HV Calc.parameter.kernel_gamma"/>
                  <parameter key="C" value="operator.SVM for HV Calc.parameter.C"/>
                  <parameter key="Training Width" value="operator.Backtesting.parameter.training_window_width"/>
                  <parameter key="Testing Width" value="operator.Backtesting.parameter.test_window_width"/>
                  <parameter key="Forecast Perf" value="operator.Backtesting.value.performance"/>
                  <parameter key="Culm Training" value="operator.Backtesting.parameter.cumulative_training"/>
                </list>
              </operator>
              <connect from_port="input 1" to_op="Backtesting" to_port="training"/>
              <connect from_op="Backtesting" from_port="model" to_port="result 1"/>
              <connect from_op="Backtesting" from_port="averagable 1" to_op="Log" to_port="through 1"/>
              <connect from_op="Log" from_port="through 1" to_port="performance"/>
              <portSpacing port="source_input 1" spacing="0"/>
              <portSpacing port="source_input 2" spacing="0"/>
              <portSpacing port="sink_performance" spacing="0"/>
              <portSpacing port="sink_result 1" spacing="0"/>
              <portSpacing port="sink_result 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Window for Prediction" width="90" x="447" y="289">
            <parameter key="window_size" value="1"/>
          </operator>
          <operator activated="true" class="apply_model" compatibility="7.1.001" expanded="true" height="82" name="Apply SVM model" width="90" x="648" y="136">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="7.6.003" expanded="true" height="82" name="Generate Attributes" width="90" x="782" y="136">
            <list key="function_descriptions">
              <parameter key="Absolute Error" value="([prediction(label)]-[HV 5day Daily-0])/[HV 5day Daily-0]"/>
            </list>
          </operator>
          <connect from_op="Load S&amp;P500 Data for HV calc" from_port="out 1" to_op="Prep Data for X-Val" to_port="in 1"/>
          <connect from_op="Prep Data for X-Val" from_port="out 1" to_op="Window for Training" to_port="example set input"/>
          <connect from_op="Window for Training" from_port="example set output" to_op="Optimize Parameters (Grid)" to_port="input 1"/>
          <connect from_op="Window for Training" from_port="original" to_op="Window for Prediction" to_port="example set input"/>
          <connect from_op="Optimize Parameters (Grid)" from_port="performance" to_port="result 1"/>
          <connect from_op="Optimize Parameters (Grid)" from_port="parameter" to_port="result 2"/>
          <connect from_op="Optimize Parameters (Grid)" from_port="result 1" to_op="Apply SVM model" to_port="model"/>
          <connect from_op="Window for Prediction" from_port="example set output" to_op="Apply SVM model" to_port="unlabelled data"/>
          <connect from_op="Apply SVM model" from_port="labelled data" to_op="Generate Attributes" to_port="example set input"/>
          <connect from_op="Generate Attributes" from_port="example set output" to_port="result 3"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <portSpacing port="sink_result 3" spacing="0"/>
          <portSpacing port="sink_result 4" spacing="0"/>
        </process>
      </operator>
    </process>

The data file is here: ^GSPC

Show Comments