{ "cells": [ { "cell_type": "markdown", "id": "393947c2-a7b9-44cd-9818-1ad2f52fcacc", "metadata": {}, "source": [ "# imports and data prep" ] }, { "cell_type": "code", "execution_count": 1, "id": "faa42afd-3493-446a-9b6c-fe7714a046d1", "metadata": {}, "outputs": [], "source": [ "from tqdm import tqdm\n", "import pandas as pd\n", "import json\n", "import numpy as np\n", "import random \n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier\n", "from sklearn.svm import SVC\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.metrics import classification_report, roc_auc_score, accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, make_scorer, jaccard_score\n", "from sklearn.model_selection import cross_validate\n", "from sklearn.svm import LinearSVR\n", "from utils.helpers import *" ] }, { "cell_type": "code", "execution_count": 2, "id": "d71073ba", "metadata": {}, "outputs": [], "source": [ "df = pd.read_parquet('data/dataset_01_all.parquet')" ] }, { "cell_type": "code", "execution_count": 3, "id": "390bad53", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | 18 | \n", "19 | \n", "20 | \n", "21 | \n", "22 | \n", "23 | \n", "24 | \n", "25 | \n", "26 | \n", "27 | \n", "... | \n", "159 | \n", "160 | \n", "161 | \n", "162 | \n", "163 | \n", "164 | \n", "soil | \n", "name | \n", "fold | \n", "train | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
5961 | \n", "0.0478 | \n", "0.0491 | \n", "0.0519 | \n", "0.0532 | \n", "0.0536 | \n", "0.0535 | \n", "0.0535 | \n", "0.0538 | \n", "0.0546 | \n", "0.0555 | \n", "... | \n", "0.1585 | \n", "0.1588 | \n", "0.1592 | \n", "0.1595 | \n", "0.1598 | \n", "0.1602 | \n", "1.0 | \n", "10801920.0 | \n", "1.0 | \n", "1.0 | \n", "
11667 | \n", "0.0424 | \n", "0.0441 | \n", "0.0467 | \n", "0.0487 | \n", "0.0486 | \n", "0.0487 | \n", "0.0483 | \n", "0.0488 | \n", "0.0499 | \n", "0.0507 | \n", "... | \n", "0.1504 | \n", "0.1507 | \n", "0.1509 | \n", "0.1512 | \n", "0.1515 | \n", "0.1518 | \n", "1.0 | \n", "10801920.0 | \n", "1.0 | \n", "1.0 | \n", "
7145 | \n", "0.0462 | \n", "0.0478 | \n", "0.0497 | \n", "0.0508 | \n", "0.0508 | \n", "0.0506 | \n", "0.0510 | \n", "0.0515 | \n", "0.0525 | \n", "0.0532 | \n", "... | \n", "0.1564 | \n", "0.1567 | \n", "0.1570 | \n", "0.1572 | \n", "0.1575 | \n", "0.1577 | \n", "1.0 | \n", "10801920.0 | \n", "1.0 | \n", "1.0 | \n", "
13835 | \n", "0.0452 | \n", "0.0460 | \n", "0.0482 | \n", "0.0489 | \n", "0.0493 | \n", "0.0496 | \n", "0.0499 | \n", "0.0506 | \n", "0.0520 | \n", "0.0524 | \n", "... | \n", "0.1549 | \n", "0.1552 | \n", "0.1556 | \n", "0.1559 | \n", "0.1562 | \n", "0.1565 | \n", "0.0 | \n", "10801920.0 | \n", "1.0 | \n", "1.0 | \n", "
5139 | \n", "0.0453 | \n", "0.0465 | \n", "0.0488 | \n", "0.0498 | \n", "0.0495 | \n", "0.0501 | \n", "0.0504 | \n", "0.0515 | \n", "0.0523 | \n", "0.0530 | \n", "... | \n", "0.1452 | \n", "0.1455 | \n", "0.1459 | \n", "0.1463 | \n", "0.1466 | \n", "0.1469 | \n", "1.0 | \n", "10801920.0 | \n", "1.0 | \n", "1.0 | \n", "
5 rows × 151 columns
\n", "\n", " | Index | \n", "Value | \n", "IndexPreds | \n", "Preds | \n", "Soil | \n", "CM | \n", "
---|---|---|---|---|---|---|
5133 | \n", "OSAVI | \n", "0.137800 | \n", "y_osavi | \n", "0 | \n", "1.0 | \n", "FP | \n", "
2014 | \n", "BSI | \n", "0.206537 | \n", "y_bsi | \n", "1 | \n", "1.0 | \n", "TP | \n", "
9500 | \n", "SR | \n", "2.381135 | \n", "y_sr | \n", "0 | \n", "0.0 | \n", "TN | \n", "
9432 | \n", "SR | \n", "1.592430 | \n", "y_sr | \n", "0 | \n", "0.0 | \n", "TN | \n", "
7955 | \n", "EVI | \n", "0.071112 | \n", "y_evi | \n", "1 | \n", "1.0 | \n", "TP | \n", "
2901 | \n", "BSI | \n", "0.348636 | \n", "y_bsi | \n", "0 | \n", "0.0 | \n", "TN | \n", "
4783 | \n", "OSAVI | \n", "0.085839 | \n", "y_osavi | \n", "1 | \n", "1.0 | \n", "TP | \n", "
9623 | \n", "SR | \n", "1.715224 | \n", "y_sr | \n", "0 | \n", "0.0 | \n", "TN | \n", "
5307 | \n", "OSAVI | \n", "0.305338 | \n", "y_osavi | \n", "0 | \n", "0.0 | \n", "TN | \n", "
7729 | \n", "EVI | \n", "0.238622 | \n", "y_evi | \n", "0 | \n", "0.0 | \n", "TN | \n", "