Neural Market Trends

Predicting Historical Volatility for the S&P500;

You can read this in 15 minutes. This post has been viewed 142 times.

Here’s a great process and tutorial on how to use RapidMiner to predict historical volatility. This process was adapted from “Options Trading Driven by Volatility Directional Accuracy (right-click to save)” research paper, from Applied Economics 2007, 39, pages 253-260.

The intent is to forecast the direction of historical volatility (HV), one week forward every Friday and compare it with the implied volatility (IV). If the HV is forecast to the rise from last week and the IV drops, then execute some type of trade. After playing with this for a while I was able to get a better level of forecast accuracy (around 70%), which is awesome IMHO.

<?xml version="1.0" encoding="UTF-8"?><process version="7.6.003">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="7.6.003" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="subprocess" compatibility="7.6.003" expanded="true" height="82" name="Load S&amp;P500 Data for HV calc" width="90" x="45" y="34">
        <process expanded="true">
          <operator activated="true" class="read_csv" compatibility="7.6.003" expanded="true" height="68" name="Read Downloaded S&amp;P500" width="90" x="45" y="34">
            <parameter key="csv_file" value="C:\Users\Thomas Ott\Downloads\^GSPC.csv"/>
            <parameter key="column_separators" value=","/>
            <parameter key="first_row_as_names" value="false"/>
            <list key="annotations">
              <parameter key="0" value="Name"/>
            </list>
            <list key="data_set_meta_data_information">
              <parameter key="0" value="Date.true.polynominal.attribute"/>
              <parameter key="1" value="Open.true.real.attribute"/>
              <parameter key="2" value="High.true.real.attribute"/>
              <parameter key="3" value="Low.true.real.attribute"/>
              <parameter key="4" value="Close.true.real.attribute"/>
              <parameter key="5" value="Adj Close.true.real.attribute"/>
              <parameter key="6" value="Volume.true.real.attribute"/>
            </list>
            <description align="center" color="transparent" colored="false" width="126">Due to Yahoo changes, must download CSV file manually for now!</description>
          </operator>
          <operator activated="true" class="series:lag_series" compatibility="7.4.000" expanded="true" height="82" name="Lag Series" width="90" x="179" y="34">
            <list key="attributes">
              <parameter key="Close" value="1"/>
            </list>
            <description align="center" color="transparent" colored="false" width="126">Lag Series for LN calculation</description>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="7.6.003" expanded="true" height="82" name="Calc Natural Log" width="90" x="313" y="34">
            <list key="function_descriptions">
              <parameter key="Log Returns" value="ln(Close/[Close-1])"/>
            </list>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="7.6.003" expanded="true" height="82" name="Select Attributes for ETL" width="90" x="447" y="34">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="^GSPC_CLOSE-1"/>
            <parameter key="invert_selection" value="true"/>
          </operator>
          <operator activated="true" class="replace_missing_values" compatibility="7.6.003" expanded="true" height="103" name="Replace Missing Values" width="90" x="581" y="34">
            <parameter key="attribute_filter_type" value="value_type"/>
            <parameter key="value_type" value="numeric"/>
            <parameter key="default" value="zero"/>
            <list key="columns"/>
          </operator>
          <connect from_op="Read Downloaded S&amp;P500" from_port="output" to_op="Lag Series" to_port="example set input"/>
          <connect from_op="Lag Series" from_port="example set output" to_op="Calc Natural Log" to_port="example set input"/>
          <connect from_op="Calc Natural Log" from_port="example set output" to_op="Select Attributes for ETL" to_port="example set input"/>
          <connect from_op="Select Attributes for ETL" from_port="example set output" to_op="Replace Missing Values" to_port="example set input"/>
          <connect from_op="Replace Missing Values" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
        <description align="center" color="transparent" colored="false" width="126">Load CSV data</description>
      </operator>
      <operator activated="true" class="subprocess" compatibility="7.6.003" expanded="true" height="82" name="Prep Data for X-Val" width="90" x="179" y="34">
        <process expanded="true">
          <operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing" width="90" x="45" y="34">
            <parameter key="window_size" value="5"/>
            <parameter key="label_attribute" value="^GSPC_CLOSE"/>
          </operator>
          <operator activated="true" class="generate_aggregation" compatibility="7.6.003" expanded="true" height="82" name="Generate Aggregation" width="90" x="179" y="34">
            <parameter key="attribute_name" value="StDev"/>
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attributes" value="Log Returns-0|Log Returns-1|Log Returns-2|Log Returns-3|Log Returns-4"/>
            <parameter key="aggregation_function" value="standard_deviation"/>
          </operator>
          <operator activated="true" class="multiply" compatibility="7.6.003" expanded="true" height="103" name="Multiply" width="90" x="313" y="34"/>
          <operator activated="true" class="select_attributes" compatibility="7.6.003" expanded="true" height="82" name="Select StDev and Date" width="90" x="514" y="187">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attributes" value="Date-0|StDev"/>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="7.6.003" expanded="true" height="82" name="Select Close and LN" width="90" x="514" y="34">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attributes" value="Date-0|Date-9|Log Returns-9|^GSPC_CLOSE-0|^GSPC_CLOSE-9|Close-0"/>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="7.6.003" expanded="true" height="82" name="Generate HV dummy for manipulation" width="90" x="648" y="187">
            <list key="function_descriptions">
              <parameter key="HVdummy" value="StDev*sqrt(252)"/>
            </list>
          </operator>
          <operator activated="true" class="join" compatibility="7.6.003" expanded="true" height="82" name="Join" width="90" x="715" y="34">
            <parameter key="join_type" value="outer"/>
            <parameter key="use_id_attribute_as_key" value="false"/>
            <list key="key_attributes">
              <parameter key="Date-0" value="Date-0"/>
            </list>
          </operator>
          <operator activated="true" class="rename" compatibility="7.6.003" expanded="true" height="82" name="Rename" width="90" x="782" y="136">
            <parameter key="old_name" value="HVdummy"/>
            <parameter key="new_name" value="HV 5day Daily"/>
            <list key="rename_additional_attributes">
              <parameter key="Close-0" value="S&amp;P500_Close"/>
              <parameter key="Date-0" value="Date"/>
            </list>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="7.6.003" expanded="true" height="82" name="Select Final Attributes" width="90" x="916" y="136">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attributes" value="Date|S&amp;P500_Close|HV 10day Daily|HV 5day Daily"/>
          </operator>
          <operator activated="true" class="nominal_to_date" compatibility="7.6.003" expanded="true" height="82" name="Nominal to Date" width="90" x="1050" y="136">
            <parameter key="attribute_name" value="Date"/>
            <parameter key="date_format" value="yyyy-MM-dd"/>
          </operator>
          <operator activated="true" class="date_to_numerical" compatibility="7.6.003" expanded="true" height="82" name="Date to Numerical" width="90" x="1184" y="136">
            <parameter key="attribute_name" value="Date"/>
            <parameter key="time_unit" value="day"/>
            <parameter key="day_relative_to" value="week"/>
            <parameter key="keep_old_attribute" value="true"/>
          </operator>
          <operator activated="true" class="filter_examples" compatibility="7.6.003" expanded="true" height="103" name="Filter Examples" width="90" x="1318" y="136">
            <list key="filters_list">
              <parameter key="filters_entry_key" value="Date_day.eq.6"/>
            </list>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="7.6.003" expanded="true" height="82" name="Toss out Date_day" width="90" x="1452" y="136">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="Date_day"/>
            <parameter key="invert_selection" value="true"/>
          </operator>
          <operator activated="true" class="series:lag_series" compatibility="7.4.000" expanded="true" height="82" name="Lag HV" width="90" x="1586" y="136">
            <list key="attributes">
              <parameter key="HV 5day Daily" value="1"/>
            </list>
          </operator>
          <operator activated="true" class="series:moving_average" compatibility="7.4.000" expanded="true" height="82" name="Moving Average" width="90" x="1720" y="136">
            <parameter key="attribute_name" value="HV 5day Daily"/>
            <parameter key="window_width" value="13"/>
          </operator>
          <operator activated="true" class="replace_missing_values" compatibility="7.6.003" expanded="true" height="103" name="Replace Missing Values (2)" width="90" x="1854" y="136">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attributes" value="average(HV 5day Daily)|HV 5day Daily-1"/>
            <parameter key="default" value="zero"/>
            <list key="columns"/>
          </operator>
          <operator activated="true" class="rename" compatibility="7.6.003" expanded="true" height="82" name="Rename Attributes Humanely" width="90" x="1988" y="136">
            <parameter key="old_name" value="average(HV 5day Daily)"/>
            <parameter key="new_name" value="HV MA13"/>
            <list key="rename_additional_attributes">
              <parameter key="HV 5day Daily-1" value="Naive HV"/>
            </list>
          </operator>
          <connect from_port="in 1" to_op="Windowing" to_port="example set input"/>
          <connect from_op="Windowing" from_port="example set output" to_op="Generate Aggregation" to_port="example set input"/>
          <connect from_op="Generate Aggregation" from_port="example set output" to_op="Multiply" to_port="input"/>
          <connect from_op="Multiply" from_port="output 1" to_op="Select Close and LN" to_port="example set input"/>
          <connect from_op="Multiply" from_port="output 2" to_op="Select StDev and Date" to_port="example set input"/>
          <connect from_op="Select StDev and Date" from_port="example set output" to_op="Generate HV dummy for manipulation" to_port="example set input"/>
          <connect from_op="Select Close and LN" from_port="example set output" to_op="Join" to_port="left"/>
          <connect from_op="Generate HV dummy for manipulation" from_port="example set output" to_op="Join" to_port="right"/>
          <connect from_op="Join" from_port="join" to_op="Rename" to_port="example set input"/>
          <connect from_op="Rename" from_port="example set output" to_op="Select Final Attributes" to_port="example set input"/>
          <connect from_op="Select Final Attributes" from_port="example set output" to_op="Nominal to Date" to_port="example set input"/>
          <connect from_op="Nominal to Date" from_port="example set output" to_op="Date to Numerical" to_port="example set input"/>
          <connect from_op="Date to Numerical" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
          <connect from_op="Filter Examples" from_port="example set output" to_op="Toss out Date_day" to_port="example set input"/>
          <connect from_op="Toss out Date_day" from_port="example set output" to_op="Lag HV" to_port="example set input"/>
          <connect from_op="Lag HV" from_port="example set output" to_op="Moving Average" to_port="example set input"/>
          <connect from_op="Moving Average" from_port="example set output" to_op="Replace Missing Values (2)" to_port="example set input"/>
          <connect from_op="Replace Missing Values (2)" from_port="example set output" to_op="Rename Attributes Humanely" to_port="example set input"/>
          <connect from_op="Rename Attributes Humanely" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
        <description align="center" color="transparent" colored="false" width="126">Data Prep</description>
      </operator>
      <operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Window for Training" width="90" x="313" y="187">
        <parameter key="window_size" value="1"/>
        <parameter key="create_label" value="true"/>
        <parameter key="label_attribute" value="HV 5day Daily"/>
        <parameter key="add_incomplete_windows" value="true"/>
      </operator>
      <operator activated="true" class="optimize_parameters_grid" compatibility="7.6.003" expanded="true" height="124" name="Optimize Parameters (Grid)" width="90" x="447" y="34">
        <list key="parameters">
          <parameter key="Backtesting.training_window_width" value="[6;12;4;linear]"/>
          <parameter key="Backtesting.test_window_width" value="[6;12;4;linear]"/>
          <parameter key="SVM for HV Calc.kernel_gamma" value="[.01;1000;5;logarithmic]"/>
          <parameter key="SVM for HV Calc.C" value="[0;1000;5;linear]"/>
        </list>
        <process expanded="true">
          <operator activated="true" class="series:sliding_window_validation" compatibility="7.4.000" expanded="true" height="124" name="Backtesting" width="90" x="179" y="34">
            <parameter key="training_window_width" value="12"/>
            <parameter key="test_window_width" value="12"/>
            <parameter key="cumulative_training" value="true"/>
            <process expanded="true">
              <operator activated="true" class="support_vector_machine" compatibility="7.6.003" expanded="true" height="124" name="SVM for HV Calc" width="90" x="179" y="34">
                <parameter key="kernel_type" value="radial"/>
                <parameter key="kernel_gamma" value="0.01"/>
                <parameter key="C" value="400.0"/>
              </operator>
              <connect from_port="training" to_op="SVM for HV Calc" to_port="training set"/>
              <connect from_op="SVM for HV Calc" from_port="model" to_port="model"/>
              <portSpacing port="source_training" spacing="0"/>
              <portSpacing port="sink_model" spacing="0"/>
              <portSpacing port="sink_through 1" spacing="0"/>
            </process>
            <process expanded="true">
              <operator activated="true" class="apply_model" compatibility="7.1.001" expanded="true" height="82" name="Apply Model In Testing" width="90" x="45" y="34">
                <list key="application_parameters"/>
              </operator>
              <operator activated="true" class="series:forecasting_performance" compatibility="7.4.000" expanded="true" height="82" name="Forecast Performance" width="90" x="246" y="34">
                <parameter key="horizon" value="1"/>
                <parameter key="main_criterion" value="prediction_trend_accuracy"/>
              </operator>
              <connect from_port="model" to_op="Apply Model In Testing" to_port="model"/>
              <connect from_port="test set" to_op="Apply Model In Testing" to_port="unlabelled data"/>
              <connect from_op="Apply Model In Testing" from_port="labelled data" to_op="Forecast Performance" to_port="labelled data"/>
              <connect from_op="Forecast Performance" from_port="performance" to_port="averagable 1"/>
              <portSpacing port="source_model" spacing="0"/>
              <portSpacing port="source_test set" spacing="0"/>
              <portSpacing port="source_through 1" spacing="0"/>
              <portSpacing port="sink_averagable 1" spacing="0"/>
              <portSpacing port="sink_averagable 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="log" compatibility="7.6.003" expanded="true" height="82" name="Log" width="90" x="313" y="85">
            <parameter key="filename" value="tmp"/>
            <list key="log">
              <parameter key="Gamma" value="operator.SVM for HV Calc.parameter.kernel_gamma"/>
              <parameter key="C" value="operator.SVM for HV Calc.parameter.C"/>
              <parameter key="Training Width" value="operator.Backtesting.parameter.training_window_width"/>
              <parameter key="Testing Width" value="operator.Backtesting.parameter.test_window_width"/>
              <parameter key="Forecast Perf" value="operator.Backtesting.value.performance"/>
              <parameter key="Culm Training" value="operator.Backtesting.parameter.cumulative_training"/>
            </list>
          </operator>
          <connect from_port="input 1" to_op="Backtesting" to_port="training"/>
          <connect from_op="Backtesting" from_port="model" to_port="result 1"/>
          <connect from_op="Backtesting" from_port="averagable 1" to_op="Log" to_port="through 1"/>
          <connect from_op="Log" from_port="through 1" to_port="performance"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="source_input 2" spacing="0"/>
          <portSpacing port="sink_performance" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Window for Prediction" width="90" x="447" y="289">
        <parameter key="window_size" value="1"/>
      </operator>
      <operator activated="true" class="apply_model" compatibility="7.1.001" expanded="true" height="82" name="Apply SVM model" width="90" x="648" y="136">
        <list key="application_parameters"/>
      </operator>
      <operator activated="true" class="generate_attributes" compatibility="7.6.003" expanded="true" height="82" name="Generate Attributes" width="90" x="782" y="136">
        <list key="function_descriptions">
          <parameter key="Absolute Error" value="([prediction(label)]-[HV 5day Daily-0])/[HV 5day Daily-0]"/>
        </list>
      </operator>
      <connect from_op="Load S&amp;P500 Data for HV calc" from_port="out 1" to_op="Prep Data for X-Val" to_port="in 1"/>
      <connect from_op="Prep Data for X-Val" from_port="out 1" to_op="Window for Training" to_port="example set input"/>
      <connect from_op="Window for Training" from_port="example set output" to_op="Optimize Parameters (Grid)" to_port="input 1"/>
      <connect from_op="Window for Training" from_port="original" to_op="Window for Prediction" to_port="example set input"/>
      <connect from_op="Optimize Parameters (Grid)" from_port="performance" to_port="result 1"/>
      <connect from_op="Optimize Parameters (Grid)" from_port="parameter" to_port="result 2"/>
      <connect from_op="Optimize Parameters (Grid)" from_port="result 1" to_op="Apply SVM model" to_port="model"/>
      <connect from_op="Window for Prediction" from_port="example set output" to_op="Apply SVM model" to_port="unlabelled data"/>
      <connect from_op="Apply SVM model" from_port="labelled data" to_op="Generate Attributes" to_port="example set input"/>
      <connect from_op="Generate Attributes" from_port="example set output" to_port="result 3"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="0"/>
    </process>
  </operator>
</process>

The data file: ^GSPC.csv

♥Share the love: Digg, StumbleUpon, Reddit

Commentary