Financial Data Analysis¶
🎯 Learning Objectives
- Learn to analyze financial time series data
- Understand returns, volatility, and risk metrics
- Master data visualization for finance
- Learn statistical analysis techniques
This chapter covers essential techniques for analyzing financial data, calculating key metrics, and visualizing market behavior.
Returns Analysis¶
Simple Returns¶
import pandas as pd
import numpy as np
import yfinance as yf
# Get data
data = yf.download("AAPL", period="1y")['Close']
# Calculate simple returns
returns = data.pct_change().dropna()
# Basic statistics
print(f"Mean Daily Return: {returns.mean():.4f}")
print(f"Std Dev: {returns.std():.4f}")
print(f"Annualized Return: {returns.mean() * 252:.4f}")
print(f"Annualized Volatility: {returns.std() * np.sqrt(252):.4f}")
Log Returns¶
# Log returns (better for statistical analysis)
log_returns = np.log(data / data.shift(1)).dropna()
# Cumulative log returns
cumulative_log_returns = log_returns.cumsum()
Cumulative Returns¶
# Cumulative simple returns
cumulative_returns = (1 + returns).cumprod() - 1
# Plot cumulative returns
import matplotlib.pyplot as plt
plt.figure(figsize=(12, 6))
plt.plot(cumulative_returns.index, cumulative_returns)
plt.title('Cumulative Returns')
plt.ylabel('Cumulative Return')
plt.grid(True)
plt.show()
Volatility Analysis¶
Historical Volatility¶
# Rolling volatility (20-day window)
rolling_vol = returns.rolling(window=20).std() * np.sqrt(252)
# Plot volatility
plt.figure(figsize=(12, 6))
plt.plot(rolling_vol.index, rolling_vol)
plt.title('20-Day Rolling Volatility')
plt.ylabel('Annualized Volatility')
plt.grid(True)
plt.show()
Volatility Clustering¶
# Check for volatility clustering
from scipy import stats
# GARCH-like analysis
squared_returns = returns ** 2
autocorr = [squared_returns.autocorr(lag=i) for i in range(1, 21)]
plt.figure(figsize=(10, 6))
plt.bar(range(1, 21), autocorr)
plt.title('Autocorrelation of Squared Returns (Volatility Clustering)')
plt.xlabel('Lag')
plt.ylabel('Autocorrelation')
plt.show()
Risk Metrics¶
Value at Risk (VaR)¶
from scipy import stats
# Parametric VaR (assuming normal distribution)
confidence_level = 0.95
z_score = stats.norm.ppf(1 - confidence_level)
var_parametric = returns.mean() - z_score * returns.std()
# Historical VaR
var_historical = np.percentile(returns, (1 - confidence_level) * 100)
print(f"95% VaR (Parametric): {var_parametric:.4f}")
print(f"95% VaR (Historical): {var_historical:.4f}")
Conditional VaR (CVaR)¶
# CVaR (Expected Shortfall)
cvar = returns[returns <= var_historical].mean()
print(f"95% CVaR: {cvar:.4f}")
Maximum Drawdown¶
# Calculate drawdown
cumulative = (1 + returns).cumprod()
running_max = cumulative.expanding().max()
drawdown = (cumulative - running_max) / running_max
# Maximum drawdown
max_drawdown = drawdown.min()
print(f"Maximum Drawdown: {max_drawdown:.4f} ({max_drawdown*100:.2f}%)")
# Plot drawdown
plt.figure(figsize=(12, 6))
plt.fill_between(drawdown.index, drawdown, 0, alpha=0.3, color='red')
plt.title('Drawdown Over Time')
plt.ylabel('Drawdown')
plt.grid(True)
plt.show()
Performance Metrics¶
Sharpe Ratio¶
def sharpe_ratio(returns, risk_free_rate=0.02):
excess_returns = returns - risk_free_rate / 252
sharpe = np.sqrt(252) * excess_returns.mean() / returns.std()
return sharpe
sharpe = sharpe_ratio(returns)
print(f"Sharpe Ratio: {sharpe:.4f}")
Sortino Ratio¶
def sortino_ratio(returns, risk_free_rate=0.02):
excess_returns = returns - risk_free_rate / 252
downside_returns = excess_returns[excess_returns < 0]
downside_std = downside_returns.std()
sortino = np.sqrt(252) * excess_returns.mean() / downside_std
return sortino
sortino = sortino_ratio(returns)
print(f"Sortino Ratio: {sortino:.4f}")
Calmar Ratio¶
def calmar_ratio(returns):
annual_return = returns.mean() * 252
max_dd = abs(drawdown.min())
calmar = annual_return / max_dd
return calmar
calmar = calmar_ratio(returns)
print(f"Calmar Ratio: {calmar:.4f}")
Correlation Analysis¶
Pairwise Correlation¶
# Multiple stocks
stocks = ['AAPL', 'GOOGL', 'MSFT', 'AMZN']
multi_data = yf.download(stocks, period="1y")['Close']
multi_returns = multi_data.pct_change().dropna()
# Correlation matrix
correlation_matrix = multi_returns.corr()
print(correlation_matrix)
# Visualize
import seaborn as sns
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Stock Correlation Matrix')
plt.show()
Rolling Correlation¶
# Rolling correlation between two stocks
rolling_corr = multi_returns['AAPL'].rolling(window=60).corr(multi_returns['GOOGL'])
plt.figure(figsize=(12, 6))
plt.plot(rolling_corr.index, rolling_corr)
plt.title('60-Day Rolling Correlation: AAPL vs GOOGL')
plt.ylabel('Correlation')
plt.grid(True)
plt.show()
Distribution Analysis¶
Returns Distribution¶
# Plot returns distribution
plt.figure(figsize=(12, 6))
plt.hist(returns, bins=50, density=True, alpha=0.7, label='Returns')
plt.xlabel('Return')
plt.ylabel('Density')
plt.title('Returns Distribution')
plt.legend()
plt.grid(True)
plt.show()
# Test for normality
from scipy.stats import jarque_bera
jb_stat, jb_pvalue = jarque_bera(returns)
print(f"Jarque-Bera Test: Stat={jb_stat:.4f}, p-value={jb_pvalue:.4f}")
Q-Q Plot¶
from scipy import stats
# Q-Q plot against normal distribution
stats.probplot(returns, dist="norm", plot=plt)
plt.title('Q-Q Plot: Returns vs Normal Distribution')
plt.show()
Time Series Analysis¶
Stationarity Test¶
from statsmodels.tsa.stattools import adfuller
# Augmented Dickey-Fuller test
adf_result = adfuller(returns)
print(f"ADF Statistic: {adf_result[0]:.4f}")
print(f"p-value: {adf_result[1]:.4f}")
print(f"Critical Values: {adf_result[4]}")
if adf_result[1] < 0.05:
print("Returns are stationary")
else:
print("Returns are not stationary")
Autocorrelation¶
from statsmodels.tsa.stattools import acf, pacf
# Autocorrelation function
autocorr = acf(returns, nlags=20)
partial_autocorr = pacf(returns, nlags=20)
# Plot ACF and PACF
fig, axes = plt.subplots(2, 1, figsize=(12, 8))
axes[0].stem(range(len(autocorr)), autocorr)
axes[0].set_title('Autocorrelation Function (ACF)')
axes[0].set_xlabel('Lag')
axes[1].stem(range(len(partial_autocorr)), partial_autocorr)
axes[1].set_title('Partial Autocorrelation Function (PACF)')
axes[1].set_xlabel('Lag')
plt.tight_layout()
plt.show()
Portfolio Analysis¶
Portfolio Returns¶
# Portfolio with equal weights
weights = np.array([0.25, 0.25, 0.25, 0.25])
portfolio_returns = (multi_returns * weights).sum(axis=1)
# Portfolio statistics
print(f"Portfolio Mean Return: {portfolio_returns.mean():.4f}")
print(f"Portfolio Volatility: {portfolio_returns.std():.4f}")
print(f"Portfolio Sharpe Ratio: {sharpe_ratio(portfolio_returns):.4f}")
Efficient Frontier¶
# Generate random portfolios
num_portfolios = 10000
results = np.zeros((3, num_portfolios))
for i in range(num_portfolios):
weights = np.random.random(len(stocks))
weights /= weights.sum()
portfolio_return = np.sum(multi_returns.mean() * weights) * 252
portfolio_std = np.sqrt(np.dot(weights.T, np.dot(multi_returns.cov() * 252, weights)))
sharpe = portfolio_return / portfolio_std
results[0,i] = portfolio_return
results[1,i] = portfolio_std
results[2,i] = sharpe
# Plot efficient frontier
plt.figure(figsize=(12, 8))
plt.scatter(results[1,:], results[0,:], c=results[2,:], cmap='viridis')
plt.colorbar(label='Sharpe Ratio')
plt.xlabel('Volatility')
plt.ylabel('Expected Return')
plt.title('Efficient Frontier')
plt.show()
Key Takeaways¶
- Returns analysis: Simple vs log returns, cumulative returns
- Volatility: Historical volatility, volatility clustering
- Risk metrics: VaR, CVaR, maximum drawdown
- Performance: Sharpe, Sortino, Calmar ratios
- Correlation: Pairwise and rolling correlations
- Distribution: Normality tests, Q-Q plots
- Time series: Stationarity, autocorrelation
- Portfolio: Portfolio returns, efficient frontier
Previous: Python for Quantitative Finance | Next: Technical Analysis