antitheft159 commited on
Commit
9c9dfe4
·
verified ·
1 Parent(s): fed8d5d

Upload 1957_249_949.py

Browse files
Files changed (1) hide show
  1. 1957_249_949.py +98 -0
1957_249_949.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """1957_249_949
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1q6DU2jTXfNY0uMxaBV2w2niCrYcsW86S
8
+ """
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+
13
+ import os
14
+ for dirname, _, filenames in os.walk('/kaggle/input'):
15
+ for filename in filenames:
16
+ print(os.path.join(dirname, filename))
17
+
18
+ import pandas as pd
19
+ import numpy as np
20
+ from sklearn.model_selection import train_test_split
21
+ from sklearn.ensemble import RandomForestRegressor
22
+ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
23
+ import matplotlib.pyplot as plt
24
+ import seaborn as sns
25
+
26
+ data = pd.read_csv('/content/internet_usage.csv')
27
+
28
+ data.head()
29
+
30
+ data.tail()
31
+
32
+ data.describe()
33
+
34
+ numeric_cols = data.columns[2:]
35
+ data[numeric_cols] = data[numeric_cols].apply(pd.to_numeric, errors='coerce')
36
+ data = data.dropna(subset=numeric_cols, how='all')
37
+ data = data.fillna(data.mean(numeric_only=True))
38
+
39
+ years = [int(col) for col in numeric_cols]
40
+ data['avg_usage'] = data[numeric_cols].mean(axis=1)
41
+ data['usage_change'] = data[numeric_cols].iloc[:, -1] - data[numeric_cols].iloc[:, 0]
42
+ data['rate_change'] = data['usage_change'] / (years[-1] - years [0])
43
+
44
+ features = ['avg_usage', 'usage_change', 'rate_change']
45
+ target_year = 2023
46
+ target = str(target_year)
47
+
48
+ X = data[features]
49
+ y= data[target]
50
+
51
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
52
+
53
+ model = RandomForestRegressor(n_estimators=100, random_state=42)
54
+ model.fit(X_train, y_train)
55
+
56
+ y_pred = model.predict(X_test)
57
+
58
+ mse = mean_squared_error(y_test, y_pred)
59
+ mae = mean_absolute_error(y_test, y_pred)
60
+ r2 = r2_score(y_test, y_pred)
61
+
62
+ print(f"Mean Squared Error: {mse}")
63
+ print(f"Mean Absolute Error: {mae}")
64
+ print(f"R-squared: {r2}")
65
+
66
+ plt.figure(figsize=(10, 6))
67
+ plt.scatter(y_test, y_pred)
68
+ plt.xlabel("Actual Values")
69
+ plt.ylabel("Predicted Values")
70
+ plt.title("Actual vs. Predicted Values")
71
+ plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red')
72
+ plt.show()
73
+
74
+ feature_importance = model.feature_importances_
75
+ feature_names = X.columns
76
+
77
+ plt.figure(figsize=(10, 6))
78
+ sns.barplot(x=feature_importance, y=feature_names)
79
+ plt.title("Feature Importance")
80
+ plt.show()
81
+
82
+ def predict_future_usage(model, data, features, future_years):
83
+ predictions = {}
84
+ for year in future_years:
85
+ new_data = data.copy()
86
+ new_data[str(year)] = model.predict(new_data[features])
87
+ predictions[year] = new_data[str(year)]
88
+ data[str(year)] = new_data[str(year)]
89
+
90
+ return predictions
91
+
92
+ future_years = [2024, 2025]
93
+ future_predictions = predict_future_usage(model, data, features, future_years)
94
+
95
+ print("\nFuture Predictions:")
96
+ for year, predictions in future_predictions.items():
97
+ print(f"Predictions for {year}:")
98
+ print(predictions.head())