{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Data Quality and Coverage\n",
    "\n",
    "Real-world time series often have gaps — sensor outages, missing transmissions, or maintenance windows. TimeDataModel provides built-in tools to visualize coverage and validate data integrity."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-03-01T13:36:32.963565Z",
     "iopub.status.busy": "2026-03-01T13:36:32.963423Z",
     "iopub.status.idle": "2026-03-01T13:36:33.048772Z",
     "shell.execute_reply": "2026-03-01T13:36:33.048484Z"
    }
   },
   "outputs": [
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'timedatamodel'",
     "output_type": "error",
     "traceback": [
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mModuleNotFoundError\u001b[39m                       Traceback (most recent call last)",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 5\u001b[39m\n\u001b[32m      1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mdatetime\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m datetime, timedelta, timezone\n\u001b[32m      3\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mnumpy\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mnp\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m5\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mtimedatamodel\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mtdm\u001b[39;00m\n\u001b[32m      7\u001b[39m base = datetime(\u001b[32m2024\u001b[39m, \u001b[32m1\u001b[39m, \u001b[32m15\u001b[39m, tzinfo=timezone.utc)\n\u001b[32m      8\u001b[39m week_hours = [base + timedelta(hours=i) \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[32m168\u001b[39m)]\n",
      "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'timedatamodel'"
     ]
    }
   ],
   "source": [
    "from datetime import datetime, timedelta, timezone\n",
    "\n",
    "import numpy as np\n",
    "\n",
    "import timedatamodel as tdm\n",
    "\n",
    "base = datetime(2024, 1, 15, tzinfo=timezone.utc)\n",
    "week_hours = [base + timedelta(hours=i) for i in range(168)]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Coverage bars on a TimeSeries\n",
    "\n",
    "Create a week of hourly data with a simulated outage (hours 50-70 missing)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-03-01T13:36:33.050072Z",
     "iopub.status.busy": "2026-03-01T13:36:33.050005Z",
     "iopub.status.idle": "2026-03-01T13:36:33.063381Z",
     "shell.execute_reply": "2026-03-01T13:36:33.063042Z"
    }
   },
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'tdm' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mNameError\u001b[39m                                 Traceback (most recent call last)",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[2]\u001b[39m\u001b[32m, line 9\u001b[39m\n\u001b[32m      2\u001b[39m values_full = (\u001b[32m100\u001b[39m + rng.normal(\u001b[32m0\u001b[39m, \u001b[32m15\u001b[39m, \u001b[32m168\u001b[39m)).tolist()\n\u001b[32m      4\u001b[39m values_with_gap = [\n\u001b[32m      5\u001b[39m     \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mif\u001b[39;00m \u001b[32m50\u001b[39m <= i < \u001b[32m70\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m v\n\u001b[32m      6\u001b[39m     \u001b[38;5;28;01mfor\u001b[39;00m i, v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(values_full)\n\u001b[32m      7\u001b[39m ]\n\u001b[32m----> \u001b[39m\u001b[32m9\u001b[39m ts_sensor = \u001b[43mtdm\u001b[49m.TimeSeries(\n\u001b[32m     10\u001b[39m     tdm.Frequency.PT1H,\n\u001b[32m     11\u001b[39m     timestamps=week_hours,\n\u001b[32m     12\u001b[39m     values=values_with_gap,\n\u001b[32m     13\u001b[39m     name=\u001b[33m\"\u001b[39m\u001b[33msensor_A\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m     14\u001b[39m     unit=\u001b[33m\"\u001b[39m\u001b[33mMW\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m     15\u001b[39m     data_type=tdm.DataType.MEASUREMENT,\n\u001b[32m     16\u001b[39m )\n\u001b[32m     18\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mHas missing: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mts_sensor.has_missing\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m     19\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mTotal points: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(ts_sensor)\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m, missing: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28msum\u001b[39m(\u001b[32m1\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mfor\u001b[39;00m\u001b[38;5;250m \u001b[39mv\u001b[38;5;250m \u001b[39m\u001b[38;5;129;01min\u001b[39;00m\u001b[38;5;250m \u001b[39mts_sensor.values\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mif\u001b[39;00m\u001b[38;5;250m \u001b[39mv\u001b[38;5;250m \u001b[39m\u001b[38;5;129;01mis\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m)\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n",
      "\u001b[31mNameError\u001b[39m: name 'tdm' is not defined"
     ]
    }
   ],
   "source": [
    "rng = np.random.default_rng(42)\n",
    "values_full = (100 + rng.normal(0, 15, 168)).tolist()\n",
    "\n",
    "values_with_gap = [\n",
    "    None if 50 <= i < 70 else v\n",
    "    for i, v in enumerate(values_full)\n",
    "]\n",
    "\n",
    "ts_sensor = tdm.TimeSeries(\n",
    "    tdm.Frequency.PT1H,\n",
    "    timestamps=week_hours,\n",
    "    values=values_with_gap,\n",
    "    name=\"sensor_A\",\n",
    "    unit=\"MW\",\n",
    "    data_type=tdm.DataType.MEASUREMENT,\n",
    ")\n",
    "\n",
    "print(f\"Has missing: {ts_sensor.has_missing}\")\n",
    "print(f\"Total points: {len(ts_sensor)}, missing: {sum(1 for v in ts_sensor.values if v is None)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-03-01T13:36:33.064473Z",
     "iopub.status.busy": "2026-03-01T13:36:33.064399Z",
     "iopub.status.idle": "2026-03-01T13:36:33.070669Z",
     "shell.execute_reply": "2026-03-01T13:36:33.070312Z"
    }
   },
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'ts_sensor' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mNameError\u001b[39m                                 Traceback (most recent call last)",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[3]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43mts_sensor\u001b[49m.coverage_bar()\n",
      "\u001b[31mNameError\u001b[39m: name 'ts_sensor' is not defined"
     ]
    }
   ],
   "source": [
    "ts_sensor.coverage_bar()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Coverage bars on a TimeSeriesTable\n",
    "\n",
    "With multiple columns, each gets its own coverage row — making it easy to spot which signals have gaps."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-03-01T13:36:33.071638Z",
     "iopub.status.busy": "2026-03-01T13:36:33.071579Z",
     "iopub.status.idle": "2026-03-01T13:36:33.080321Z",
     "shell.execute_reply": "2026-03-01T13:36:33.079923Z"
    }
   },
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'tdm' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mNameError\u001b[39m                                 Traceback (most recent call last)",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[4]\u001b[39m\u001b[32m, line 17\u001b[39m\n\u001b[32m      6\u001b[39m sensor_c = [\n\u001b[32m      7\u001b[39m     \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mif\u001b[39;00m (\u001b[32m20\u001b[39m <= i < \u001b[32m30\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m \u001b[32m140\u001b[39m <= i < \u001b[32m155\u001b[39m) \u001b[38;5;28;01melse\u001b[39;00m v\n\u001b[32m      8\u001b[39m     \u001b[38;5;28;01mfor\u001b[39;00m i, v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(values_full)\n\u001b[32m      9\u001b[39m ]\n\u001b[32m     11\u001b[39m vals = np.column_stack([\n\u001b[32m     12\u001b[39m     [v \u001b[38;5;28;01mif\u001b[39;00m v \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m np.nan \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m sensor_a],\n\u001b[32m     13\u001b[39m     [v \u001b[38;5;28;01mif\u001b[39;00m v \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m np.nan \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m sensor_b],\n\u001b[32m     14\u001b[39m     [v \u001b[38;5;28;01mif\u001b[39;00m v \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m np.nan \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m sensor_c],\n\u001b[32m     15\u001b[39m ])\n\u001b[32m---> \u001b[39m\u001b[32m17\u001b[39m table = \u001b[43mtdm\u001b[49m.TimeSeriesTable(\n\u001b[32m     18\u001b[39m     tdm.Frequency.PT1H,\n\u001b[32m     19\u001b[39m     timestamps=week_hours,\n\u001b[32m     20\u001b[39m     values=vals,\n\u001b[32m     21\u001b[39m     names=[\u001b[33m\"\u001b[39m\u001b[33msensor_A\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33msensor_B\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33msensor_C\u001b[39m\u001b[33m\"\u001b[39m],\n\u001b[32m     22\u001b[39m     units=[\u001b[33m\"\u001b[39m\u001b[33mMW\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mMW\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mMW\u001b[39m\u001b[33m\"\u001b[39m],\n\u001b[32m     23\u001b[39m )\n\u001b[32m     24\u001b[39m table.coverage_bar()\n",
      "\u001b[31mNameError\u001b[39m: name 'tdm' is not defined"
     ]
    }
   ],
   "source": [
    "sensor_a = values_with_gap\n",
    "sensor_b = [\n",
    "    None if 100 <= i < 130 else v\n",
    "    for i, v in enumerate(values_full)\n",
    "]\n",
    "sensor_c = [\n",
    "    None if (20 <= i < 30 or 140 <= i < 155) else v\n",
    "    for i, v in enumerate(values_full)\n",
    "]\n",
    "\n",
    "vals = np.column_stack([\n",
    "    [v if v is not None else np.nan for v in sensor_a],\n",
    "    [v if v is not None else np.nan for v in sensor_b],\n",
    "    [v if v is not None else np.nan for v in sensor_c],\n",
    "])\n",
    "\n",
    "table = tdm.TimeSeriesTable(\n",
    "    tdm.Frequency.PT1H,\n",
    "    timestamps=week_hours,\n",
    "    values=vals,\n",
    "    names=[\"sensor_A\", \"sensor_B\", \"sensor_C\"],\n",
    "    units=[\"MW\", \"MW\", \"MW\"],\n",
    ")\n",
    "table.coverage_bar()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Coverage bars on a TimeSeriesCube\n",
    "\n",
    "Cubes show one bar per label in the non-time dimension."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-03-01T13:36:33.081312Z",
     "iopub.status.busy": "2026-03-01T13:36:33.081259Z",
     "iopub.status.idle": "2026-03-01T13:36:33.089363Z",
     "shell.execute_reply": "2026-03-01T13:36:33.088986Z"
    }
   },
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'tdm' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mNameError\u001b[39m                                 Traceback (most recent call last)",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[5]\u001b[39m\u001b[32m, line 7\u001b[39m\n\u001b[32m      1\u001b[39m cube_data = np.array([\n\u001b[32m      2\u001b[39m     [v \u001b[38;5;28;01mif\u001b[39;00m v \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m np.nan \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m sensor_a],\n\u001b[32m      3\u001b[39m     [v \u001b[38;5;28;01mif\u001b[39;00m v \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m np.nan \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m sensor_b],\n\u001b[32m      4\u001b[39m     [v \u001b[38;5;28;01mif\u001b[39;00m v \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m np.nan \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m sensor_c],\n\u001b[32m      5\u001b[39m ])\n\u001b[32m----> \u001b[39m\u001b[32m7\u001b[39m cube = \u001b[43mtdm\u001b[49m.TimeSeriesCube(\n\u001b[32m      8\u001b[39m     tdm.Frequency.PT1H,\n\u001b[32m      9\u001b[39m     dimensions=[\n\u001b[32m     10\u001b[39m         tdm.Dimension(\u001b[33m\"\u001b[39m\u001b[33msensor\u001b[39m\u001b[33m\"\u001b[39m, [\u001b[33m\"\u001b[39m\u001b[33mA\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mB\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mC\u001b[39m\u001b[33m\"\u001b[39m]),\n\u001b[32m     11\u001b[39m         tdm.Dimension(\u001b[33m\"\u001b[39m\u001b[33mvalid_time\u001b[39m\u001b[33m\"\u001b[39m, week_hours),\n\u001b[32m     12\u001b[39m     ],\n\u001b[32m     13\u001b[39m     values=cube_data,\n\u001b[32m     14\u001b[39m     name=\u001b[33m\"\u001b[39m\u001b[33msensor_grid\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m     15\u001b[39m     unit=\u001b[33m\"\u001b[39m\u001b[33mMW\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m     16\u001b[39m )\n\u001b[32m     17\u001b[39m cube.coverage_bar()\n",
      "\u001b[31mNameError\u001b[39m: name 'tdm' is not defined"
     ]
    }
   ],
   "source": [
    "cube_data = np.array([\n",
    "    [v if v is not None else np.nan for v in sensor_a],\n",
    "    [v if v is not None else np.nan for v in sensor_b],\n",
    "    [v if v is not None else np.nan for v in sensor_c],\n",
    "])\n",
    "\n",
    "cube = tdm.TimeSeriesCube(\n",
    "    tdm.Frequency.PT1H,\n",
    "    dimensions=[\n",
    "        tdm.Dimension(\"sensor\", [\"A\", \"B\", \"C\"]),\n",
    "        tdm.Dimension(\"valid_time\", week_hours),\n",
    "    ],\n",
    "    values=cube_data,\n",
    "    name=\"sensor_grid\",\n",
    "    unit=\"MW\",\n",
    ")\n",
    "cube.coverage_bar()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Coverage bars on a TimeSeriesCollection\n",
    "\n",
    "Collections map all series onto a shared global time range, so you can compare coverage across heterogeneous data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-03-01T13:36:33.090418Z",
     "iopub.status.busy": "2026-03-01T13:36:33.090362Z",
     "iopub.status.idle": "2026-03-01T13:36:33.097224Z",
     "shell.execute_reply": "2026-03-01T13:36:33.096838Z"
    }
   },
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'tdm' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mNameError\u001b[39m                                 Traceback (most recent call last)",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[6]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m ts_short = \u001b[43mtdm\u001b[49m.TimeSeries(\n\u001b[32m      2\u001b[39m     tdm.Frequency.PT1H,\n\u001b[32m      3\u001b[39m     timestamps=week_hours[:\u001b[32m72\u001b[39m],\n\u001b[32m      4\u001b[39m     values=values_full[:\u001b[32m72\u001b[39m],\n\u001b[32m      5\u001b[39m     name=\u001b[33m\"\u001b[39m\u001b[33mshort_range\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m      6\u001b[39m     unit=\u001b[33m\"\u001b[39m\u001b[33mMW\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m      7\u001b[39m )\n\u001b[32m      9\u001b[39m collection = tdm.TimeSeriesCollection(\n\u001b[32m     10\u001b[39m     [ts_sensor, ts_short],\n\u001b[32m     11\u001b[39m     name=\u001b[33m\"\u001b[39m\u001b[33mSensor comparison\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m     12\u001b[39m )\n\u001b[32m     13\u001b[39m collection.coverage_bar()\n",
      "\u001b[31mNameError\u001b[39m: name 'tdm' is not defined"
     ]
    }
   ],
   "source": [
    "ts_short = tdm.TimeSeries(\n",
    "    tdm.Frequency.PT1H,\n",
    "    timestamps=week_hours[:72],\n",
    "    values=values_full[:72],\n",
    "    name=\"short_range\",\n",
    "    unit=\"MW\",\n",
    ")\n",
    "\n",
    "collection = tdm.TimeSeriesCollection(\n",
    "    [ts_sensor, ts_short],\n",
    "    name=\"Sensor comparison\",\n",
    ")\n",
    "collection.coverage_bar()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Validation\n",
    "\n",
    "`validate()` checks that timestamps are strictly increasing and that the step between consecutive timestamps matches the declared frequency. It returns a list of warning strings — empty means everything is fine."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-03-01T13:36:33.098168Z",
     "iopub.status.busy": "2026-03-01T13:36:33.098112Z",
     "iopub.status.idle": "2026-03-01T13:36:33.109355Z",
     "shell.execute_reply": "2026-03-01T13:36:33.108903Z"
    }
   },
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'ts_sensor' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mNameError\u001b[39m                                 Traceback (most recent call last)",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[7]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m warnings = \u001b[43mts_sensor\u001b[49m.validate()\n\u001b[32m      2\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mWarnings for ts_sensor: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mwarnings\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n",
      "\u001b[31mNameError\u001b[39m: name 'ts_sensor' is not defined"
     ]
    }
   ],
   "source": [
    "warnings = ts_sensor.validate()\n",
    "print(f\"Warnings for ts_sensor: {warnings}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Catching problems\n",
    "\n",
    "Let's create a series with intentionally bad timestamps to trigger validation warnings."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-03-01T13:36:33.110296Z",
     "iopub.status.busy": "2026-03-01T13:36:33.110241Z",
     "iopub.status.idle": "2026-03-01T13:36:33.118222Z",
     "shell.execute_reply": "2026-03-01T13:36:33.117895Z"
    }
   },
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'tdm' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mNameError\u001b[39m                                 Traceback (most recent call last)",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[8]\u001b[39m\u001b[32m, line 9\u001b[39m\n\u001b[32m      1\u001b[39m bad_timestamps = [\n\u001b[32m      2\u001b[39m     datetime(\u001b[32m2024\u001b[39m, \u001b[32m1\u001b[39m, \u001b[32m15\u001b[39m, \u001b[32m0\u001b[39m, tzinfo=timezone.utc),\n\u001b[32m      3\u001b[39m     datetime(\u001b[32m2024\u001b[39m, \u001b[32m1\u001b[39m, \u001b[32m15\u001b[39m, \u001b[32m1\u001b[39m, tzinfo=timezone.utc),\n\u001b[32m   (...)\u001b[39m\u001b[32m      6\u001b[39m     datetime(\u001b[32m2024\u001b[39m, \u001b[32m1\u001b[39m, \u001b[32m15\u001b[39m, \u001b[32m5\u001b[39m, tzinfo=timezone.utc),\n\u001b[32m      7\u001b[39m ]\n\u001b[32m----> \u001b[39m\u001b[32m9\u001b[39m ts_bad = \u001b[43mtdm\u001b[49m.TimeSeries(\n\u001b[32m     10\u001b[39m     tdm.Frequency.PT1H,\n\u001b[32m     11\u001b[39m     timestamps=bad_timestamps,\n\u001b[32m     12\u001b[39m     values=[\u001b[32m10.0\u001b[39m, \u001b[32m20.0\u001b[39m, \u001b[32m30.0\u001b[39m, \u001b[32m40.0\u001b[39m, \u001b[32m50.0\u001b[39m],\n\u001b[32m     13\u001b[39m     name=\u001b[33m\"\u001b[39m\u001b[33mbad_data\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m     14\u001b[39m )\n\u001b[32m     16\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m w \u001b[38;5;129;01min\u001b[39;00m ts_bad.validate():\n\u001b[32m     17\u001b[39m     \u001b[38;5;28mprint\u001b[39m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m  WARNING: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mw\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n",
      "\u001b[31mNameError\u001b[39m: name 'tdm' is not defined"
     ]
    }
   ],
   "source": [
    "bad_timestamps = [\n",
    "    datetime(2024, 1, 15, 0, tzinfo=timezone.utc),\n",
    "    datetime(2024, 1, 15, 1, tzinfo=timezone.utc),\n",
    "    datetime(2024, 1, 15, 1, tzinfo=timezone.utc),  # duplicate!\n",
    "    datetime(2024, 1, 15, 4, tzinfo=timezone.utc),  # gap: skipped hours 2-3\n",
    "    datetime(2024, 1, 15, 5, tzinfo=timezone.utc),\n",
    "]\n",
    "\n",
    "ts_bad = tdm.TimeSeries(\n",
    "    tdm.Frequency.PT1H,\n",
    "    timestamps=bad_timestamps,\n",
    "    values=[10.0, 20.0, 30.0, 40.0, 50.0],\n",
    "    name=\"bad_data\",\n",
    ")\n",
    "\n",
    "for w in ts_bad.validate():\n",
    "    print(f\"  WARNING: {w}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Practical example: multi-sensor data feed audit\n",
    "\n",
    "Imagine you receive data from 5 sensors. Quickly assess which ones are reliable."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-03-01T13:36:33.119356Z",
     "iopub.status.busy": "2026-03-01T13:36:33.119304Z",
     "iopub.status.idle": "2026-03-01T13:36:33.127834Z",
     "shell.execute_reply": "2026-03-01T13:36:33.127407Z"
    }
   },
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'tdm' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mNameError\u001b[39m                                 Traceback (most recent call last)",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[9]\u001b[39m\u001b[32m, line 16\u001b[39m\n\u001b[32m     13\u001b[39m         \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(start, end):\n\u001b[32m     14\u001b[39m             vals[i] = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m     15\u001b[39m     sensors.append(\n\u001b[32m---> \u001b[39m\u001b[32m16\u001b[39m         \u001b[43mtdm\u001b[49m.TimeSeries(\n\u001b[32m     17\u001b[39m             tdm.Frequency.PT1H,\n\u001b[32m     18\u001b[39m             timestamps=week_hours,\n\u001b[32m     19\u001b[39m             values=vals,\n\u001b[32m     20\u001b[39m             name=name,\n\u001b[32m     21\u001b[39m             unit=\u001b[33m\"\u001b[39m\u001b[33mMW\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m     22\u001b[39m         )\n\u001b[32m     23\u001b[39m     )\n\u001b[32m     25\u001b[39m audit = tdm.TimeSeriesCollection(sensors, name=\u001b[33m\"\u001b[39m\u001b[33mTurbine fleet audit\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m     26\u001b[39m audit.coverage_bar()\n",
      "\u001b[31mNameError\u001b[39m: name 'tdm' is not defined"
     ]
    }
   ],
   "source": [
    "gap_ranges = {\n",
    "    \"turbine_1\": [],\n",
    "    \"turbine_2\": [(30, 45)],\n",
    "    \"turbine_3\": [(10, 20), (80, 100)],\n",
    "    \"turbine_4\": [(0, 50)],\n",
    "    \"turbine_5\": [(60, 65), (120, 130), (150, 160)],\n",
    "}\n",
    "\n",
    "sensors = []\n",
    "for name, gaps in gap_ranges.items():\n",
    "    vals = values_full.copy()\n",
    "    for start, end in gaps:\n",
    "        for i in range(start, end):\n",
    "            vals[i] = None\n",
    "    sensors.append(\n",
    "        tdm.TimeSeries(\n",
    "            tdm.Frequency.PT1H,\n",
    "            timestamps=week_hours,\n",
    "            values=vals,\n",
    "            name=name,\n",
    "            unit=\"MW\",\n",
    "        )\n",
    "    )\n",
    "\n",
    "audit = tdm.TimeSeriesCollection(sensors, name=\"Turbine fleet audit\")\n",
    "audit.coverage_bar()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Summary\n",
    "\n",
    "- `coverage_bar()` is available on `TimeSeries`, `TimeSeriesTable`, `TimeSeriesCube`, and `TimeSeriesCollection`\n",
    "- It renders as a color-coded SVG in notebooks and Unicode blocks in terminals\n",
    "- `validate()` catches non-monotonic timestamps and frequency inconsistencies\n",
    "- `has_missing` is a quick boolean check for any gaps\n",
    "\n",
    "Next up: **nb_08** demonstrates I/O and interoperability with pandas, numpy, polars, JSON, and CSV."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.14.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
