{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7b57ccd0-a7f5-43a6-9938-68c775cf86f9",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Follows the DBNL Quickstart at https://docs.dbnl.com/get-started/quickstart\n",
    "!pip install --upgrade dbnl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e99228e1-aeff-4d58-807a-eae4c94685f6",
   "metadata": {},
   "outputs": [],
   "source": [
    "import dbnl\n",
    "import pandas as pd\n",
    "import pyarrow.parquet as pq\n",
    "from datetime import UTC, datetime, timedelta\n",
    "\n",
    "# Make sure your version matches the docs at https://docs.dbnl.com/\n",
    "print(\"dbnl version:\", dbnl.__version__)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "26a736c9-a755-4e02-b4bb-fe8238498f6a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Login to DBNL (using default Sandbox url)\n",
    "dbnl.login(\n",
    "    api_url=\"http://localhost:8080/api\",\n",
    "    api_token=\"\", # found at http://localhost:8080/tokens\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1961d4ef-f120-4c98-bb26-c0e5b3119421",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a new project\n",
    "project = dbnl.get_or_create_project(\n",
    "    name=\"Quickstart Demo\",\n",
    "    schedule=\"daily\",  # How often DBNL analyzes new data\n",
    "    default_llm_model_name=\"quickstart_model\" # From step (2) in quickstart\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "22dcdc85-02ec-4837-9736-a7e1a365af7c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load 14 days of real LLM conversation logs from public S3 bucket\n",
    "base_path = \"s3://dbnl-demo-public/outing_agent_log_data\"\n",
    "s3_files = [f\"{base_path}/day_{i:02d}.parquet\" for i in range(1, 15)]\n",
    "day_dfs = [pq.read_table(f).to_pandas(types_mapper=pd.ArrowDtype, ignore_metadata=True) for f in s3_files]\n",
    "\n",
    "# Adjust timestamps to current time so data appears recent\n",
    "delta = datetime.now(tz=UTC) - day_dfs[-1][\"timestamp\"].max()\n",
    "delta = timedelta(days=round(delta / timedelta(days=1)))\n",
    "for df in day_dfs:\n",
    "    df[\"timestamp\"] = df[\"timestamp\"] + delta"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cfc03866-28c4-437c-bd96-3b06c9525415",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Upload the data, DBNL needs at least 7 days to establish behavioral baselines\n",
    "print(\"Uploading data...\")\n",
    "print(f\"See status at: http://localhost:8080/ns/{project.namespace_id}/projects/{project.id}/status\")\n",
    "for idx, day_df in enumerate(day_dfs):\n",
    "    print(f\"{idx + 1} / {len(day_dfs)} publishing log data\")\n",
    "    data_start_t = min(day_df['timestamp']).replace(hour=0, minute=0, second=0, microsecond=0)\n",
    "    data_end_t = data_start_t + timedelta(days=1)\n",
    "    try:\n",
    "        dbnl.log(\n",
    "            project_id=project.id,\n",
    "            data_start_time=data_start_t,\n",
    "            data_end_time=data_end_t,\n",
    "            data=day_df,\n",
    "        )\n",
    "    except Exception as e:\n",
    "        if \"Data already exists\" in str(e):\n",
    "            continue\n",
    "        raise\n",
    "    \n",
    "print(\"You can now explore your data in DBNL!\")\n",
    "print(f\"http://localhost:8080/ns/{project.namespace_id}/projects/{project.id}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
