// Master data analysis, visualization, statistics, and storytelling techniques to extract insights from data.
| name | data-science |
| description | Master data analysis, visualization, statistics, and storytelling techniques to extract insights from data. |
Analyze data and extract actionable insights using statistics and visualization.
First Steps
import pandas as pd
import numpy as np
df = pd.read_csv('data.csv')
print(df.head())
print(df.info())
print(df.describe())
print(df.isnull().sum())
Distribution Analysis
from scipy import stats
# T-test: compare two groups
t_stat, p_value = stats.ttest_ind(group1, group2)
# P-value interpretation
if p_value < 0.05:
print("Reject null hypothesis")
Common Tests
Correlation Coefficient
Causal Inference
Categorical Data
Numerical Data
Relationships
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
# Matplotlib: Low-level, flexible
plt.figure(figsize=(10, 6))
plt.hist(data, bins=30)
plt.show()
# Seaborn: High-level, statistical
sns.heatmap(corr_matrix, annot=True)
plt.show()
# Plotly: Interactive, web-ready
fig = px.scatter(df, x='x', y='y', color='category')
fig.show()
Components
Decomposition
from statsmodels.tsa.seasonal import seasonal_decompose
result = seasonal_decompose(series, model='additive', period=12)
result.plot()
Forecasting
from scipy.stats import chi2_contingency
contingency_table = [[control_success, control_fail],
[test_success, test_fail]]
chi2, p_value, dof, expected = chi2_contingency(contingency_table)
Key Metrics