Skip to content

Python for Quantitative Finance

🎯 Learning Objectives

  • Learn Python basics for finance
  • Master NumPy for numerical computing
  • Master Pandas for data manipulation
  • Learn to work with financial data

Python is the most popular language for quantitative finance. This chapter covers essential Python skills you'll use daily as a quant researcher/trader.

Python Basics Review

Data Types

# Numbers
price = 150.50  # float
shares = 100    # int

# Strings
symbol = "AAPL"
company = "Apple Inc."

# Lists
prices = [150.0, 151.5, 149.8, 152.3]

# Dictionaries
stock_info = {
    'symbol': 'AAPL',
    'price': 150.50,
    'volume': 50000000
}

Control Flow

# If statements
if price > 150:
    print("Price is above $150")
elif price < 150:
    print("Price is below $150")
else:
    print("Price is exactly $150")

# Loops
for price in prices:
    print(f"Price: ${price}")

# List comprehensions
high_prices = [p for p in prices if p > 150]

Functions

def calculate_return(price_today, price_yesterday):
    """Calculate simple return"""
    return (price_today - price_yesterday) / price_yesterday

return_pct = calculate_return(152.0, 150.0)
print(f"Return: {return_pct:.4f}")  # 0.0133 = 1.33%

NumPy for Numerical Computing

Arrays

import numpy as np

# Create arrays
prices = np.array([150.0, 151.5, 149.8, 152.3, 153.0])
returns = np.array([0.01, -0.011, 0.017, 0.005])

# Array operations
mean_price = np.mean(prices)
std_price = np.std(prices)
max_price = np.max(prices)
min_price = np.min(prices)

print(f"Mean: ${mean_price:.2f}")
print(f"Std Dev: ${std_price:.2f}")

Array Operations

# Element-wise operations
doubled = prices * 2
squared = prices ** 2

# Mathematical functions
log_prices = np.log(prices)
exp_returns = np.exp(returns)

# Statistical functions
mean = np.mean(returns)
variance = np.var(returns)
std_dev = np.std(returns)

Linear Algebra

# Dot product (portfolio return)
weights = np.array([0.4, 0.3, 0.3])
returns = np.array([0.05, 0.03, 0.02])
portfolio_return = np.dot(weights, returns)

# Matrix operations
matrix = np.array([[1, 2], [3, 4]])
inverse = np.linalg.inv(matrix)

Pandas for Data Manipulation

Series and DataFrames

import pandas as pd

# Create Series
prices_series = pd.Series([150.0, 151.5, 149.8, 152.3], 
                          index=['2023-01-01', '2023-01-02', 
                                 '2023-01-03', '2023-01-04'])

# Create DataFrame
data = {
    'Open': [150.0, 151.0, 149.5, 152.0],
    'High': [151.5, 152.0, 150.5, 153.0],
    'Low': [149.5, 150.5, 149.0, 151.5],
    'Close': [151.0, 150.5, 150.0, 152.5],
    'Volume': [50000000, 45000000, 55000000, 48000000]
}
df = pd.DataFrame(data, 
                  index=pd.date_range('2023-01-01', periods=4))

Reading Financial Data

import yfinance as yf

# Download stock data
ticker = yf.Ticker("AAPL")
data = ticker.history(period="1y")

# Basic info
print(data.head())
print(data.describe())
print(data.info())

Data Selection and Filtering

# Select columns
closes = data['Close']
high_low = data[['High', 'Low']]

# Filter by condition
high_volume = data[data['Volume'] > 50000000]
price_above_150 = data[data['Close'] > 150]

# Date filtering
recent_data = data['2023-01-01':'2023-06-30']

Calculating Returns

# Simple returns
data['Returns'] = data['Close'].pct_change()

# Log returns
data['Log_Returns'] = np.log(data['Close'] / data['Close'].shift(1))

# Cumulative returns
data['Cumulative_Returns'] = (1 + data['Returns']).cumprod() - 1

Rolling Statistics

# Moving averages
data['MA_20'] = data['Close'].rolling(window=20).mean()
data['MA_50'] = data['Close'].rolling(window=50).mean()

# Rolling volatility
data['Volatility_20'] = data['Returns'].rolling(window=20).std() * np.sqrt(252)

# Rolling max/min
data['Rolling_High_20'] = data['High'].rolling(window=20).max()
data['Rolling_Low_20'] = data['Low'].rolling(window=20).min()

Grouping and Aggregation

# Group by month
monthly_returns = data['Returns'].resample('M').sum()
monthly_vol = data['Returns'].resample('M').std()

# Multiple stocks
stocks = ['AAPL', 'GOOGL', 'MSFT']
multi_data = yf.download(stocks, period="1y")['Close']
multi_returns = multi_data.pct_change()

Working with Financial Data

Data Cleaning

# Handle missing values
data = data.dropna()  # Remove rows with NaN
data = data.fillna(method='ffill')  # Forward fill

# Remove duplicates
data = data.drop_duplicates()

# Handle outliers
Q1 = data['Returns'].quantile(0.25)
Q3 = data['Returns'].quantile(0.75)
IQR = Q3 - Q1
data_clean = data[(data['Returns'] >= Q1 - 1.5*IQR) & 
                  (data['Returns'] <= Q3 + 1.5*IQR)]

Time Series Operations

# Set date as index
data.index = pd.to_datetime(data.index)

# Resample to different frequencies
daily = data.resample('D').last()
weekly = data.resample('W').last()
monthly = data.resample('M').last()

# Shift data (lag)
data['Price_Lag1'] = data['Close'].shift(1)
data['Price_Lag5'] = data['Close'].shift(5)

# Lead data
data['Price_Lead1'] = data['Close'].shift(-1)

Technical Indicators

# RSI (Relative Strength Index)
def calculate_rsi(prices, period=14):
    delta = prices.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

data['RSI'] = calculate_rsi(data['Close'])

# MACD
exp1 = data['Close'].ewm(span=12, adjust=False).mean()
exp2 = data['Close'].ewm(span=26, adjust=False).mean()
data['MACD'] = exp1 - exp2
data['Signal'] = data['MACD'].ewm(span=9, adjust=False).mean()

Data Visualization

Matplotlib Basics

import matplotlib.pyplot as plt

# Line plot
plt.figure(figsize=(12, 6))
plt.plot(data.index, data['Close'])
plt.title('Stock Price Over Time')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.grid(True)
plt.show()

# Multiple plots
fig, axes = plt.subplots(2, 1, figsize=(12, 10))
axes[0].plot(data.index, data['Close'])
axes[0].set_title('Price')
axes[1].plot(data.index, data['Volume'])
axes[1].set_title('Volume')
plt.tight_layout()
plt.show()

Seaborn for Statistical Visualization

import seaborn as sns

# Distribution plot
sns.histplot(data['Returns'], kde=True)
plt.title('Returns Distribution')
plt.show()

# Correlation heatmap
correlation = multi_returns.corr()
sns.heatmap(correlation, annot=True, cmap='coolwarm')
plt.title('Stock Correlation Matrix')
plt.show()

Candlestick Charts

from mplfinance.original_flavor import candlestick_ohlc
import matplotlib.dates as mdates

# Prepare data
ohlc = data[['Open', 'High', 'Low', 'Close']].copy()
ohlc.index = mdates.date2num(ohlc.index)

fig, ax = plt.subplots(figsize=(12, 6))
candlestick_ohlc(ax, ohlc.values, width=0.6, colorup='green', colordown='red')
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.title('Candlestick Chart')
plt.show()

Essential Libraries

yfinance - Yahoo Finance Data

import yfinance as yf

# Get stock info
ticker = yf.Ticker("AAPL")
info = ticker.info
print(f"Company: {info['longName']}")
print(f"Sector: {info['sector']}")
print(f"Market Cap: ${info['marketCap']:,}")

# Historical data
hist = ticker.history(period="5y")

scipy - Scientific Computing

from scipy import stats

# Statistical tests
returns = data['Returns'].dropna()
t_stat, p_value = stats.ttest_1samp(returns, 0)
print(f"T-statistic: {t_stat:.4f}, P-value: {p_value:.4f}")

# Normal distribution
mu, sigma = stats.norm.fit(returns)
print(f"Mean: {mu:.4f}, Std: {sigma:.4f}")

scikit-learn - Machine Learning

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

# Prepare data
X = data[['Volume', 'RSI']].values
y = data['Returns'].shift(-1).dropna().values
X = X[:-1]  # Align with y

# Train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = LinearRegression()
model.fit(X_train, y_train)

# Predict
predictions = model.predict(X_test)

Best Practices

Code Organization

# Use functions for reusable code
def calculate_sharpe_ratio(returns, risk_free_rate=0.02):
    excess_returns = returns - risk_free_rate / 252
    sharpe = np.sqrt(252) * excess_returns.mean() / returns.std()
    return sharpe

# Use classes for complex strategies
class MovingAverageStrategy:
    def __init__(self, short_window=20, long_window=50):
        self.short_window = short_window
        self.long_window = long_window

    def generate_signals(self, data):
        data['Short_MA'] = data['Close'].rolling(self.short_window).mean()
        data['Long_MA'] = data['Close'].rolling(self.long_window).mean()
        data['Signal'] = 0
        data.loc[data['Short_MA'] > data['Long_MA'], 'Signal'] = 1
        data.loc[data['Short_MA'] < data['Long_MA'], 'Signal'] = -1
        return data

Performance Tips

# Use vectorized operations (fast)
returns = prices.pct_change()  # Fast

# Avoid loops when possible (slow)
# for i in range(len(prices)):
#     returns[i] = (prices[i] - prices[i-1]) / prices[i-1]  # Slow

# Use .loc for filtering (fast)
filtered = data.loc[data['Volume'] > 1000000]  # Fast

# Avoid chained indexing (slow)
# data[data['Volume'] > 1000000]['Close']  # Slow

Practice Exercises

Exercise 1: Calculate Portfolio Metrics

Given portfolio with: - Stock A: 40% weight, 10% return, 15% volatility - Stock B: 35% weight, 8% return, 12% volatility
- Stock C: 25% weight, 6% return, 10% volatility - Correlation: A-B = 0.5, A-C = 0.3, B-C = 0.4

Calculate portfolio return and volatility.

Exercise 2: Build Technical Indicator

Create a function to calculate Bollinger Bands: - Middle band = 20-day moving average - Upper band = MA + 2 * standard deviation - Lower band = MA - 2 * standard deviation

Exercise 3: Analyze Stock Data

Download data for a stock and: 1. Calculate daily returns 2. Calculate 20-day and 50-day moving averages 3. Calculate RSI 4. Plot price with moving averages 5. Plot returns distribution


Key Takeaways: - NumPy: Fast numerical computing with arrays - Pandas: Powerful data manipulation for time series - yfinance: Easy access to financial data - Vectorized operations are much faster than loops - Practice with real data to build skills


Previous: Financial Markets Basics | Next: Financial Data Analysis