Python for Quantitative Finance¶

🎯 Learning Objectives

Learn Python basics for finance
Master NumPy for numerical computing
Master Pandas for data manipulation
Learn to work with financial data

Python is the most popular language for quantitative finance. This chapter covers essential Python skills you'll use daily as a quant researcher/trader.

Python Basics Review¶

Data Types¶

# Numbers
price = 150.50  # float
shares = 100    # int

# Strings
symbol = "AAPL"
company = "Apple Inc."

# Lists
prices = [150.0, 151.5, 149.8, 152.3]

# Dictionaries
stock_info = {
    'symbol': 'AAPL',
    'price': 150.50,
    'volume': 50000000
}

Control Flow¶

# If statements
if price > 150:
    print("Price is above $150")
elif price < 150:
    print("Price is below $150")
else:
    print("Price is exactly $150")

# Loops
for price in prices:
    print(f"Price: ${price}")

# List comprehensions
high_prices = [p for p in prices if p > 150]

Functions¶

def calculate_return(price_today, price_yesterday):
    """Calculate simple return"""
    return (price_today - price_yesterday) / price_yesterday

return_pct = calculate_return(152.0, 150.0)
print(f"Return: {return_pct:.4f}")  # 0.0133 = 1.33%

NumPy for Numerical Computing¶

Arrays¶

import numpy as np

# Create arrays
prices = np.array([150.0, 151.5, 149.8, 152.3, 153.0])
returns = np.array([0.01, -0.011, 0.017, 0.005])

# Array operations
mean_price = np.mean(prices)
std_price = np.std(prices)
max_price = np.max(prices)
min_price = np.min(prices)

print(f"Mean: ${mean_price:.2f}")
print(f"Std Dev: ${std_price:.2f}")

Array Operations¶

# Element-wise operations
doubled = prices * 2
squared = prices ** 2

# Mathematical functions
log_prices = np.log(prices)
exp_returns = np.exp(returns)

# Statistical functions
mean = np.mean(returns)
variance = np.var(returns)
std_dev = np.std(returns)

Linear Algebra¶

# Dot product (portfolio return)
weights = np.array([0.4, 0.3, 0.3])
returns = np.array([0.05, 0.03, 0.02])
portfolio_return = np.dot(weights, returns)

# Matrix operations
matrix = np.array([[1, 2], [3, 4]])
inverse = np.linalg.inv(matrix)

Pandas for Data Manipulation¶

Series and DataFrames¶

import pandas as pd

# Create Series
prices_series = pd.Series([150.0, 151.5, 149.8, 152.3], 
                          index=['2023-01-01', '2023-01-02', 
                                 '2023-01-03', '2023-01-04'])

# Create DataFrame
data = {
    'Open': [150.0, 151.0, 149.5, 152.0],
    'High': [151.5, 152.0, 150.5, 153.0],
    'Low': [149.5, 150.5, 149.0, 151.5],
    'Close': [151.0, 150.5, 150.0, 152.5],
    'Volume': [50000000, 45000000, 55000000, 48000000]
}
df = pd.DataFrame(data, 
                  index=pd.date_range('2023-01-01', periods=4))

Reading Financial Data¶

import yfinance as yf

# Download stock data
ticker = yf.Ticker("AAPL")
data = ticker.history(period="1y")

# Basic info
print(data.head())
print(data.describe())
print(data.info())

Data Selection and Filtering¶

# Select columns
closes = data['Close']
high_low = data[['High', 'Low']]

# Filter by condition
high_volume = data[data['Volume'] > 50000000]
price_above_150 = data[data['Close'] > 150]

# Date filtering
recent_data = data['2023-01-01':'2023-06-30']

Calculating Returns¶

# Simple returns
data['Returns'] = data['Close'].pct_change()

# Log returns
data['Log_Returns'] = np.log(data['Close'] / data['Close'].shift(1))

# Cumulative returns
data['Cumulative_Returns'] = (1 + data['Returns']).cumprod() - 1

Rolling Statistics¶

# Moving averages
data['MA_20'] = data['Close'].rolling(window=20).mean()
data['MA_50'] = data['Close'].rolling(window=50).mean()

# Rolling volatility
data['Volatility_20'] = data['Returns'].rolling(window=20).std() * np.sqrt(252)

# Rolling max/min
data['Rolling_High_20'] = data['High'].rolling(window=20).max()
data['Rolling_Low_20'] = data['Low'].rolling(window=20).min()

Grouping and Aggregation¶

# Group by month
monthly_returns = data['Returns'].resample('M').sum()
monthly_vol = data['Returns'].resample('M').std()

# Multiple stocks
stocks = ['AAPL', 'GOOGL', 'MSFT']
multi_data = yf.download(stocks, period="1y")['Close']
multi_returns = multi_data.pct_change()

Working with Financial Data¶

Data Cleaning¶

# Handle missing values
data = data.dropna()  # Remove rows with NaN
data = data.fillna(method='ffill')  # Forward fill

# Remove duplicates
data = data.drop_duplicates()

# Handle outliers
Q1 = data['Returns'].quantile(0.25)
Q3 = data['Returns'].quantile(0.75)
IQR = Q3 - Q1
data_clean = data[(data['Returns'] >= Q1 - 1.5*IQR) & 
                  (data['Returns'] <= Q3 + 1.5*IQR)]

Time Series Operations¶

# Set date as index
data.index = pd.to_datetime(data.index)

# Resample to different frequencies
daily = data.resample('D').last()
weekly = data.resample('W').last()
monthly = data.resample('M').last()

# Shift data (lag)
data['Price_Lag1'] = data['Close'].shift(1)
data['Price_Lag5'] = data['Close'].shift(5)

# Lead data
data['Price_Lead1'] = data['Close'].shift(-1)

Technical Indicators¶

# RSI (Relative Strength Index)
def calculate_rsi(prices, period=14):
    delta = prices.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

data['RSI'] = calculate_rsi(data['Close'])

# MACD
exp1 = data['Close'].ewm(span=12, adjust=False).mean()
exp2 = data['Close'].ewm(span=26, adjust=False).mean()
data['MACD'] = exp1 - exp2
data['Signal'] = data['MACD'].ewm(span=9, adjust=False).mean()

Data Visualization¶

Matplotlib Basics¶

import matplotlib.pyplot as plt

# Line plot
plt.figure(figsize=(12, 6))
plt.plot(data.index, data['Close'])
plt.title('Stock Price Over Time')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.grid(True)
plt.show()

# Multiple plots
fig, axes = plt.subplots(2, 1, figsize=(12, 10))
axes[0].plot(data.index, data['Close'])
axes[0].set_title('Price')
axes[1].plot(data.index, data['Volume'])
axes[1].set_title('Volume')
plt.tight_layout()
plt.show()

Seaborn for Statistical Visualization¶

import seaborn as sns

# Distribution plot
sns.histplot(data['Returns'], kde=True)
plt.title('Returns Distribution')
plt.show()

# Correlation heatmap
correlation = multi_returns.corr()
sns.heatmap(correlation, annot=True, cmap='coolwarm')
plt.title('Stock Correlation Matrix')
plt.show()

Candlestick Charts¶

from mplfinance.original_flavor import candlestick_ohlc
import matplotlib.dates as mdates

# Prepare data
ohlc = data[['Open', 'High', 'Low', 'Close']].copy()
ohlc.index = mdates.date2num(ohlc.index)

fig, ax = plt.subplots(figsize=(12, 6))
candlestick_ohlc(ax, ohlc.values, width=0.6, colorup='green', colordown='red')
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.title('Candlestick Chart')
plt.show()

Essential Libraries¶

yfinance - Yahoo Finance Data¶

import yfinance as yf

# Get stock info
ticker = yf.Ticker("AAPL")
info = ticker.info
print(f"Company: {info['longName']}")
print(f"Sector: {info['sector']}")
print(f"Market Cap: ${info['marketCap']:,}")

# Historical data
hist = ticker.history(period="5y")

scipy - Scientific Computing¶

from scipy import stats

# Statistical tests
returns = data['Returns'].dropna()
t_stat, p_value = stats.ttest_1samp(returns, 0)
print(f"T-statistic: {t_stat:.4f}, P-value: {p_value:.4f}")

# Normal distribution
mu, sigma = stats.norm.fit(returns)
print(f"Mean: {mu:.4f}, Std: {sigma:.4f}")

scikit-learn - Machine Learning¶

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

# Prepare data
X = data[['Volume', 'RSI']].values
y = data['Returns'].shift(-1).dropna().values
X = X[:-1]  # Align with y

# Train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = LinearRegression()
model.fit(X_train, y_train)

# Predict
predictions = model.predict(X_test)

Best Practices¶

Code Organization¶

# Use functions for reusable code
def calculate_sharpe_ratio(returns, risk_free_rate=0.02):
    excess_returns = returns - risk_free_rate / 252
    sharpe = np.sqrt(252) * excess_returns.mean() / returns.std()
    return sharpe

# Use classes for complex strategies
class MovingAverageStrategy:
    def __init__(self, short_window=20, long_window=50):
        self.short_window = short_window
        self.long_window = long_window

    def generate_signals(self, data):
        data['Short_MA'] = data['Close'].rolling(self.short_window).mean()
        data['Long_MA'] = data['Close'].rolling(self.long_window).mean()
        data['Signal'] = 0
        data.loc[data['Short_MA'] > data['Long_MA'], 'Signal'] = 1
        data.loc[data['Short_MA'] < data['Long_MA'], 'Signal'] = -1
        return data

Performance Tips¶

# Use vectorized operations (fast)
returns = prices.pct_change()  # Fast

# Avoid loops when possible (slow)
# for i in range(len(prices)):
#     returns[i] = (prices[i] - prices[i-1]) / prices[i-1]  # Slow

# Use .loc for filtering (fast)
filtered = data.loc[data['Volume'] > 1000000]  # Fast

# Avoid chained indexing (slow)
# data[data['Volume'] > 1000000]['Close']  # Slow

Practice Exercises¶

Exercise 1: Calculate Portfolio Metrics¶

Given portfolio with: - Stock A: 40% weight, 10% return, 15% volatility - Stock B: 35% weight, 8% return, 12% volatility
- Stock C: 25% weight, 6% return, 10% volatility - Correlation: A-B = 0.5, A-C = 0.3, B-C = 0.4

Calculate portfolio return and volatility.

Exercise 2: Build Technical Indicator¶

Create a function to calculate Bollinger Bands: - Middle band = 20-day moving average - Upper band = MA + 2 * standard deviation - Lower band = MA - 2 * standard deviation

Exercise 3: Analyze Stock Data¶

Download data for a stock and: 1. Calculate daily returns 2. Calculate 20-day and 50-day moving averages 3. Calculate RSI 4. Plot price with moving averages 5. Plot returns distribution

Key Takeaways: - NumPy: Fast numerical computing with arrays - Pandas: Powerful data manipulation for time series - yfinance: Easy access to financial data - Vectorized operations are much faster than loops - Practice with real data to build skills

Previous: Financial Markets Basics | Next: Financial Data Analysis