# -*- coding: utf-8 -*- """1957_249_949 Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/1q6DU2jTXfNY0uMxaBV2w2niCrYcsW86S """ import numpy as np import pandas as pd import os for dirname, _, filenames in os.walk('/kaggle/input'): for filename in filenames: print(os.path.join(dirname, filename)) import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score import matplotlib.pyplot as plt import seaborn as sns data = pd.read_csv('/content/internet_usage.csv') data.head() data.tail() data.describe() numeric_cols = data.columns[2:] data[numeric_cols] = data[numeric_cols].apply(pd.to_numeric, errors='coerce') data = data.dropna(subset=numeric_cols, how='all') data = data.fillna(data.mean(numeric_only=True)) years = [int(col) for col in numeric_cols] data['avg_usage'] = data[numeric_cols].mean(axis=1) data['usage_change'] = data[numeric_cols].iloc[:, -1] - data[numeric_cols].iloc[:, 0] data['rate_change'] = data['usage_change'] / (years[-1] - years [0]) features = ['avg_usage', 'usage_change', 'rate_change'] target_year = 2023 target = str(target_year) X = data[features] y= data[target] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) model = RandomForestRegressor(n_estimators=100, random_state=42) model.fit(X_train, y_train) y_pred = model.predict(X_test) mse = mean_squared_error(y_test, y_pred) mae = mean_absolute_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) print(f"Mean Squared Error: {mse}") print(f"Mean Absolute Error: {mae}") print(f"R-squared: {r2}") plt.figure(figsize=(10, 6)) plt.scatter(y_test, y_pred) plt.xlabel("Actual Values") plt.ylabel("Predicted Values") plt.title("Actual vs. Predicted Values") plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red') plt.show() feature_importance = model.feature_importances_ feature_names = X.columns plt.figure(figsize=(10, 6)) sns.barplot(x=feature_importance, y=feature_names) plt.title("Feature Importance") plt.show() def predict_future_usage(model, data, features, future_years): predictions = {} for year in future_years: new_data = data.copy() new_data[str(year)] = model.predict(new_data[features]) predictions[year] = new_data[str(year)] data[str(year)] = new_data[str(year)] return predictions future_years = [2024, 2025] future_predictions = predict_future_usage(model, data, features, future_years) print("\nFuture Predictions:") for year, predictions in future_predictions.items(): print(f"Predictions for {year}:") print(predictions.head())