Sep 25 2009

Asian Call Option Pricing with Trust

Asian Call Options are valued based on their average price over a time-period. This program estimates an Asian Call Option that expires 250 periods from now, based on a stock currently at $50, with a $50 strike, and a volatility of 0.4. It runs 500,000 simulations, and averages their average price to get the price of the option.

Not too realistic, but fun to play with.

#include <vector_functions.h>
 
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
 
#include <thrust/transform_reduce.h>
 
// Mersenne Twister code reproduced & modified from: 
//										http://svn.jcornwall.me.uk/applications/MersenneTwisterGPU/
//										http://www.jcornwall.me.uk/2009/04/mersenne-twisters-in-cuda/
#include <cassert>
#include <cstdio>
#include <vector>
 
#define CUDA_SAFE_CALL(call)                                        \
  do {                                                              \
    cudaError err = call;                                           \
    if( cudaSuccess != err) {                                       \
      fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
        __FILE__, __LINE__, cudaGetErrorString( err) );             \
      exit(EXIT_FAILURE);                                           \
    }                                                               \
  } while(0)
 
#define MT_MM     9
#define MT_NN     19
#define MT_WMASK  0xFFFFFFFFU
#define MT_UMASK  0xFFFFFFFEU
#define MT_LMASK  0x1U
#define MT_RNG_COUNT 32768
#define MT_SHIFT0 12
#define MT_SHIFTB 7
#define MT_SHIFTC 15
#define MT_SHIFT1 18
 
// Record format for MersenneTwister.dat, created by spawnTwisters.c
struct mt_struct_stripped {
	unsigned int matrix_a;
	unsigned int mask_b;
	unsigned int mask_c;
	unsigned int seed;
};
 
// Per-thread state object for a single twister.
struct MersenneTwisterState {
	unsigned int mt[MT_NN];
	int iState;
	unsigned int mti1;
};
 
// Preloaded, offline-generated seed data structure.
__device__ static mt_struct_stripped MT[MT_RNG_COUNT];
 
__device__ unsigned int MersenneTwisterGenerate(MersenneTwisterState &state, unsigned int threadID) {
	int iState1 = state.iState + 1;
	int iStateM = state.iState + MT_MM;
 
	if(iState1 >= MT_NN) iState1 -= MT_NN;
	if(iStateM >= MT_NN) iStateM -= MT_NN;
 
	unsigned int mti = state.mti1;
	state.mti1 = state.mt[iState1];
	unsigned int mtiM = state.mt[iStateM];
 
	unsigned int x = (mti & MT_UMASK) | (state.mti1 & MT_LMASK);
	x = mtiM ^ (x >> 1) ^ ((x & 1) ? MT[threadID].matrix_a : 0);
	state.mt[state.iState] = x;
	state.iState = iState1;
 
	// Tempering transformation.
	x ^= (x >> MT_SHIFT0);
	x ^= (x << MT_SHIFTB) & MT[threadID].mask_b;
	x ^= (x << MT_SHIFTC) & MT[threadID].mask_c;
	x ^= (x >> MT_SHIFT1);
 
	return x;
}
 
#define TWISTER_WARM_UP 0
 
__device__ void MersenneTwisterInitialize(MersenneTwisterState &state, unsigned int threadID) {
	state.mt[0] = MT[threadID].seed;
	for(int i = 1; i < MT_NN; ++ i) {
		state.mt[i] = (1812433253U * (state.mt[i - 1] ^ (state.mt[i - 1] >> 30)) + i) & MT_WMASK;
	}
 
	state.iState = 0;
	state.mti1 = state.mt[0];
 
	// warm up the twister
        #if TWISTER_WARM_UP
	for(int i = 0; i < 10000; ++ i) {
		MersenneTwisterGenerate(state, threadID);
	}
        #endif
 
}
 
// 
__global__ void TestMersenneTwister(float *outArr, int nNumbers) {
	unsigned int tid = __mul24(blockIdx.x, blockDim.x) + threadIdx.x;
 
	MersenneTwisterState mtState;
	MersenneTwisterInitialize(mtState, tid);
 
	for(int i = tid; i < nNumbers; i += __mul24(blockDim.x, gridDim.x)) {
		// Make a floating-point number between 0...1 from integer 0...UINT_MAX.
		outArr[i] = float(MersenneTwisterGenerate(mtState, tid)) / 4294967295.0f;
	}
}
 
// Initialize seeds for the Mersenne Twister
void InitializeSeeds() {
	mt_struct_stripped *mtStripped = new mt_struct_stripped[MT_RNG_COUNT];
 
	FILE *datFile = fopen("MersenneTwister.dat", "rb");
	assert(datFile);
	assert(fread(mtStripped, sizeof(mt_struct_stripped) * MT_RNG_COUNT, 1, datFile));
	fclose(datFile);
 
	// Seed the structure with low-quality random numbers. Twisters will need "warming up"
	// before the RNG quality improves.
	srand(time(0));
	for(int i = 0; i < MT_RNG_COUNT; ++ i) {
		mtStripped[i].seed = rand();
	}
 
	// Upload the initial configurations to the GPU.
	CUDA_SAFE_CALL(cudaMemcpyToSymbol(MT, mtStripped, sizeof(mt_struct_stripped) * MT_RNG_COUNT, 0, cudaMemcpyHostToDevice));
	delete[] mtStripped;
}
 
// Generates random numbers on the GPU
void GenerateRandomNumbers(float *randomNumbers, int nRandomNumbers) {
	// Read offline-generated initial configuration file.
	float *randomNumbersDev;
	CUDA_SAFE_CALL(cudaMalloc((void **)&randomNumbersDev, sizeof(float) * nRandomNumbers));
 
	dim3 threads(512, 1);
	dim3 grid(MT_RNG_COUNT / 512, 1, 1);
 
	TestMersenneTwister<<<grid, threads>>>(randomNumbersDev, nRandomNumbers);
 
	CUDA_SAFE_CALL(cudaMemcpy(randomNumbers, randomNumbersDev, sizeof(float) * nRandomNumbers, cudaMemcpyDeviceToHost));
	CUDA_SAFE_CALL(cudaFree(randomNumbersDev));
}
// End Mersenne Twister Code
 
//Box Muller transform
#define PI 3.14159265358979f
__device__ 
void BoxMuller(float &u1, float &u2){
    float   r = sqrtf(-2.0f * logf(u1));
    float phi = 2 * PI * u2;
 
    u1 = (r * __cosf(phi));
	u2 = (r * __sinf(phi));
}
 
struct AsianOptionPricer {
	int term;
	float price0;
	float strike;
	float volatility;
	int numPeriods;
	float deltaT;
	float invariant;
 
	AsianOptionPricer(int t, float s, float v, float rfr, int p) : 
		term(t),
		strike(s), 
		volatility(v),
		numPeriods(p) {
			deltaT = (float)term / numPeriods;
			invariant = (rfr - volatility * volatility / 2) * deltaT;
	}
 
	__device__
	float operator()(float price0) const {
		__shared__ MersenneTwisterState mtState;
		unsigned int thrOffset = blockIdx.x * blockDim.x + threadIdx.x;
 
		MersenneTwisterInitialize(mtState, thrOffset);
 
		float sum = 0;
		float price = price0;
 
		// since we move the price two steps a loop, we only go from 0 to numPeriods/2
		for(int i = 0; i < (numPeriods) / 2; i++) {
			float u1 = float(MersenneTwisterGenerate(mtState, thrOffset)) / 4294967295.0f;
			float u2 = float(MersenneTwisterGenerate(mtState, thrOffset)) / 4294967295.0f;
 
			BoxMuller(u1, u2); //transform uniform into two independent standard normals
 
			// move the price a step
			price = price * __expf(invariant + volatility * u1 * sqrt(deltaT));
			sum = sum + price;
 
			// move the price a second step
			price = price * __expf(invariant + volatility * u2 * sqrt(deltaT));
			sum = sum + price;
		}
 
		float save = (sum / numPeriods) - strike;
		if(save < 0) {
			return 0.0;
		} else {
			return save;
		}
	}
};
 
int main() {
	float S0 = 50;
	int M = 500000;
 
	InitializeSeeds();
 
	thrust::device_vector<float> payoff(M); 
	thrust::fill(payoff.begin(), payoff.end(), S0);
 
	float r = 0.1;
	float T = 1;
	float sigma = 0.4;
 
	AsianOptionPricer pricer(T, 50, sigma, r, 250);
 
	float sum = thrust::transform_reduce(payoff.begin(), payoff.end(), pricer, 0.0f, thrust::plus<float>());
	printf("The price is: %f\n", (sum / M) * exp(-r * T));
 
	return 0;
}

Results:

macbook:asian option thrust choffstein$ ./price_asian_option_c++
Simulation with 500000 trials and 250 increments per trial took 14689312 milliseconds.
The price is: 5.59305
macbook:asian option thrust choffstein$ ./price_asian_option
Simulation with 500000 trials and 250 increments per trial took 991164 milliseconds.
The price is: 5.59295

Impressive speedup, no? About 7% of the time of my comparable C++ program (note: not exact replica). One should also note that the speedup is drastically reduced as the number of simulations decreases — most likely due to data copying speeds from to and from the GPU.

  • Share/Bookmark