Posts

Showing posts from May, 2025
 1. import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt from sklearn.datasets import fetch_california_housing data = fetch_california_housing(as_frame=True) housing_df = data.frame numerical_features = housing_df.select_dtypes(include=[np.number]).columns plt.figure(figsize=(15, 10)) for i, feature in enumerate(numerical_features):     plt.subplot(3, 3, i + 1)     sns.histplot(housing_df[feature], kde=True, bins=30, color='blue')     plt.title(f'Distribution of {feature}') plt.tight_layout() plt.show() plt.figure(figsize=(15, 10)) for i, feature in enumerate(numerical_features):     plt.subplot(3, 3, i + 1)     sns.boxplot(x=housing_df[feature], color='orange')     plt.title(f'Box Plot of {feature}') plt.tight_layout() plt.show() print("Outliers Detection:") outliers_summary = {} for feature in numerical_features:     Q1 = housing_df[feature].quantile(0.25)     Q...