ServiceNow · hkhrouf · Apr 27, 2026 · Apr 27, 2026 · May 19, 2026 · May 19, 2026
diff --git a/data/notebooks/csvs/flag-125.csv b/data/notebooks/csvs/flag-125.csv
diff --git a/data/notebooks/csvs/flag-80.csv b/data/notebooks/csvs/flag-80.csv
diff --git a/data/notebooks/csvs/flag-81.csv b/data/notebooks/csvs/flag-81.csv
diff --git a/data/notebooks/csvs/flag-82.csv b/data/notebooks/csvs/flag-82.csv
diff --git a/data/notebooks/csvs/flag-83.csv b/data/notebooks/csvs/flag-83.csv
diff --git a/data/notebooks/csvs/flag-84.csv b/data/notebooks/csvs/flag-84.csv
diff --git a/data/notebooks/csvs/flag-85.csv b/data/notebooks/csvs/flag-85.csv
diff --git a/data/notebooks/csvs/flag-86.csv b/data/notebooks/csvs/flag-86.csv
diff --git a/data/notebooks/csvs/flag-87.csv b/data/notebooks/csvs/flag-87.csv
diff --git a/data/notebooks/csvs/flag-88.csv b/data/notebooks/csvs/flag-88.csv
diff --git a/data/notebooks/csvs/flag-89.csv b/data/notebooks/csvs/flag-89.csv
diff --git a/data/notebooks/csvs/flag-90.csv b/data/notebooks/csvs/flag-90.csv
diff --git a/data/notebooks/csvs/flag-91.csv b/data/notebooks/csvs/flag-91.csv
diff --git a/data/notebooks/csvs/flag-92.csv b/data/notebooks/csvs/flag-92.csv
diff --git a/data/notebooks/csvs/flag-93.csv b/data/notebooks/csvs/flag-93.csv
diff --git a/data/notebooks/flag-1.ipynb b/data/notebooks/flag-1.ipynb
@@ -16,7 +16,7 @@
     "\n",
     "Difficulty: 4 out of 5 - This analysis requires advanced data analysis skills due to the complex and multi-layered nature of the investigation needed to extract meaningful insights.\n",
     "\n",
-    "Category: Incidents Management"
+    "Category: Incident Management"
    ]
   },
   {
@@ -341,7 +341,43 @@
     }
    ],
    "source": [
-    "{\n    \"data_type\": \"descriptive\",\n    \"insight\": \"hardware incidents is significantly higher than others\",\n    \"insight_value\": {\n        \"x_val\": \"Hardware\",\n        \"y_val\": 336\n    },\n    \"plot\": {\n        \"plot_type\": \"histogram\",\n        \"title\": \"Incidents by Category\",\n        \"x_axis\": {\n            \"name\": \"Category\",\n            \"value\": [\n                \"Hardware\",\n                \"Software\",\n                \"Network\",\n                \"Inquiry / Help\",\n                \"Database\"\n            ],\n            \"description\": \"This represents the different categories of incidents.\"\n        },\n        \"y_axis\": {\n            \"name\": \"Number of Incidents\",\n            \"value\": [\n                336,\n                41,\n                51,\n                32,\n                40\n            ],\n            \"description\": \"This represents the number of incidents in each category.\"\n        },\n        \"description\": \"The histogram displays the distribution of incidents across different categories. Each bar represents a category and the length of the bar corresponds to the number of incidents in that category. The values are annotated on each bar. The 'Hardware' category has the highest number of incidents.\"\n    },\n    \"question\": \"What is the distribution of incidents across all categories?\",\n    \"actionable_insight\": \"With the Hardware category having the highest number of incidents, it could be beneficial to allocate more resources or provide additional training to the team handling this category to effectively manage and resolve these incidents.\"\n}"
+    "{\n",
+    "    \"data_type\": \"descriptive\",\n",
+    "    \"insight\": \"hardware incidents is significantly higher than others\",\n",
+    "    \"insight_value\": {\n",
+    "        \"x_val\": \"Hardware\",\n",
+    "        \"y_val\": 336\n",
+    "    },\n",
+    "    \"plot\": {\n",
+    "        \"plot_type\": \"histogram\",\n",
+    "        \"title\": \"Incidents by Category\",\n",
+    "        \"x_axis\": {\n",
+    "            \"name\": \"Category\",\n",
+    "            \"value\": [\n",
+    "                \"Hardware\",\n",
+    "                \"Software\",\n",
+    "                \"Network\",\n",
+    "                \"Inquiry / Help\",\n",
+    "                \"Database\"\n",
+    "            ],\n",
+    "            \"description\": \"This represents the different categories of incidents.\"\n",
+    "        },\n",
+    "        \"y_axis\": {\n",
+    "            \"name\": \"Number of Incidents\",\n",
+    "            \"value\": [\n",
+    "                336,\n",
+    "                41,\n",
+    "                51,\n",
+    "                32,\n",
+    "                40\n",
+    "            ],\n",
+    "            \"description\": \"This represents the number of incidents in each category.\"\n",
+    "        },\n",
+    "        \"description\": \"The histogram displays the distribution of incidents across different categories. Each bar represents a category and the length of the bar corresponds to the number of incidents in that category. The values are annotated on each bar. The 'Hardware' category has the highest number of incidents.\"\n",
+    "    },\n",
+    "    \"question\": \"What is the distribution of incidents across all categories?\",\n",
+    "    \"actionable_insight\": \"With the Hardware category having the highest number of incidents, it could be beneficial to allocate more resources or provide additional training to the team handling this category to effectively manage and resolve these incidents.\"\n",
+    "}"
    ]
   },
   {
@@ -411,7 +447,7 @@
    "source": [
     "{\n",
     "    \"data_type\": \"diagnostic\",\n",
-    "    \"insight\": \"Specific hardware issues related Printer Malfucntioning are predominantly mentioned in incident descriptions\",\n",
+    "    \"insight\": \"Specific hardware issues related Printer Malfunctioning are predominantly mentioned in incident descriptions\",\n",
     "    \"insight_value\": {\n",
     "        \"category\": \"Hardware\",\n",
     "        \"common_words\": [\"printer\", \"Issue\", \"working properly\", \"malfunctioning\", \"Australia\"]\n",
@@ -1017,4 +1053,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
diff --git a/data/notebooks/flag-1.json b/data/notebooks/flag-1.json
@@ -46,11 +46,11 @@
             },
             "question": "What is the distribution of incidents across all categories?",
             "actionable_insight": "With the Hardware category having the highest number of incidents, it could be beneficial to allocate more resources or provide additional training to the team handling this category to effectively manage and resolve these incidents.",
-            "code": "plot = df.groupby(\"category\").size().plot(kind=\"barh\", color=sns.palettes.mpl_palette(\"Dark2\"))\n\nfig = plt.gcf()\n\n\nfor i in plot.patches:\n    # Get X and Y placement of label from rectangle\n    x_value = i.get_width()\n    y_value = i.get_y() + i.get_height() / 2\n\n    # Use X value as label and format number with one decimal place\n    label = \"{:.1f}\".format(x_value)\n\n    # Create annotation\n    plt.annotate(\n        label,                      \n        (x_value, y_value),         \n        xytext=(-10, 0),            \n        textcoords=\"offset points\", \n        ha='right',                 \n        va='center'                 \n    )\n\n# Set plot title\nplt.title('Incidents Distribution by Category')\n\n# Set x-axis label\nplt.xlabel('Category')\n\n# Set y-axis label\nplt.ylabel('Number of Incidents')\n\n# Display the figure\nplt.show()"
+            "code": "\nplot = df.groupby(\"category\").size().plot(kind=\"barh\", color=sns.palettes.mpl_palette(\"Dark2\"))\n\nfig = plt.gcf()\n\n\nfor i in plot.patches:\n    # Get X and Y placement of label from rectangle\n    x_value = i.get_width()\n    y_value = i.get_y() + i.get_height() / 2\n\n    # Use X value as label and format number with one decimal place\n    label = \"{:.1f}\".format(x_value)\n\n    # Create annotation\n    plt.annotate(\n        label,                      \n        (x_value, y_value),         \n        xytext=(-10, 0),            \n        textcoords=\"offset points\", \n        ha='right',                 \n        va='center'                 \n    )\n\n# Set plot title\nplt.title('Incidents Distribution by Category')\n\n# Set x-axis label\nplt.xlabel('Category')\n\n# Set y-axis label\nplt.ylabel('Number of Incidents')\n\n# Display the figure\nplt.show()"
         },
         {
             "data_type": "diagnostic",
-            "insight": "Specific hardware issues related Printer Malfucntioning are predominantly mentioned in incident descriptions",
+            "insight": "Specific hardware issues related Printer Malfunctioning are predominantly mentioned in incident descriptions",
             "insight_value": {
                 "category": "Hardware",
                 "common_words": [
@@ -104,7 +104,7 @@
                 },
                 "plot description": "The bar plot displays the frequency of the keyword 'Printer' in the incident descriptions. The length of the bar corresponds to the frequency of the keyword. The 'Printer' keyword has a high frequency."
             },
-            "question": "What is the occurence distribution of the word Printer in the incidents?",
+            "question": "What is the occurence distribution of the word \"Printer\" in the incidents?",
             "actionable_insight": "The high frequency of 'Printer' in incident descriptions indicates a specific issue with printers. A focused investigation into the printer issues, possibly involving the printer manufacturer or service provider, could help in resolving these incidents.",
             "code": "# Count the frequency of 'Printer' in 'short_description'\nprinter_incidents = df['short_description'].apply(lambda x: 'Printer' in x).sum()\n\n# Create a DataFrame for plotting\ndf_plot = pd.DataFrame({'Keyword': ['Printer'], 'Frequency': [printer_incidents]})\n\n# Plot the frequency\nplot = df_plot.plot(kind='bar', x='Keyword', y='Frequency', legend=False, color='blue')\n\n# Get the current figure for further manipulation\nfig = plt.gcf()\n\n# Loop through the rectangles (i.e., bars)\nfor i in plot.patches:\n    # Get X and Y placement of label from rectangle\n    x_value = i.get_x() + i.get_width() / 2\n    y_value = i.get_height()\n\n    # Use Y value as label and format number with one decimal place\n    label = \"{:.1f}\".format(y_value)\n\n    # Create annotation\n    plt.annotate(\n        label,                      # Use `label` as label\n        (x_value, y_value),         # Place label at end of the bar\n        xytext=(0, 5),              # Shift text slightly above bar\n        textcoords=\"offset points\", # Interpret `xytext` as offset in points\n        ha='center',                # Horizontally align label \n        va='bottom'                 # Vertically align label at bottom\n    )\n\n# Set plot title\nplt.title('Frequency of Printer in Incident Descriptions')\n\n# Set x-axis label\nplt.xlabel('Keyword')\n\n# Set y-axis label\nplt.ylabel('Frequency')\n\n# Display the figure\nplt.show()"
         },
@@ -216,7 +216,7 @@
     ],
     "insights": [
         "hardware incidents is significantly higher than others",
-        "Specific hardware issues related Printer Malfucntioning are predominantly mentioned in incident descriptions",
+        "Specific hardware issues related Printer Malfunctioning are predominantly mentioned in incident descriptions",
         "Most of the hardware incidents are related to printer issues",
         "Most of the hardware incidents are occurring in the Australia location",
         "There is not a significant increase in hardware incidents over time, it is relatively stable and higher than others.",

diff --git a/data/notebooks/flag-10.ipynb b/data/notebooks/flag-10.ipynb
@@ -335,7 +335,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### **Question 2:** Is there a correlation between the volume of incidents and the ttr?"
+    "### **Question 2:** **Is there a correlation between the volume of incidents and the Time to Resolution (TTR)?**"
    ]
   },
   {
@@ -458,7 +458,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### **Question 3:** Is the increase in ttr uniform across all categories of incidents or is it more pronounced in a specific category?"
+    "### **Question 3:** **Is the increase in Time to Resolution (TTR) uniform across all categories of incidents or is it more pronounced in a specific category?**"
    ]
   },
   {
@@ -564,7 +564,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### **Question 4**: Are there any trends in the productivity of the human agents over time?"
+    "### **Question 4**: How does the productivity compare across different human agents? Is the number of incidents resolved evenly distributed among agents?"
    ]
   },
   {
@@ -614,7 +614,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -655,7 +655,7 @@
     "    },\n",
     "    \"description\": \"The bar chart displays the number of incidents resolved per agent. Each bar represents an agent and the height of the bar represents the number of incidents resolved by that agent. The number of incidents resolved is more or less uniform across all agents, indicating that productivity is fairly balanced.\"\n",
     "},\n",
-    "\"question\": \"Are there any trends in the productivity of the human agents over time? For instance, is there a decrease in the number of incidents resolved per agent over time?\",\n",
+    "\"question\": \"How does the productivity compare across different human agents? Is the number of incidents resolved evenly distributed among agents?\",\n",
     "\"actionable_insight\": \"The uniform productivity across all agents suggests that the workload is evenly distributed and all agents are equally productive. This is a positive indicator of good workload management. However, it would still be beneficial to continually monitor agent productivity and workload to ensure this balance is maintained.\"\n",
     "}"
    ]
@@ -695,4 +695,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
diff --git a/data/notebooks/flag-10.json b/data/notebooks/flag-10.json
@@ -6,7 +6,8 @@
         "role": "Incidents Manager",
         "category": "Incident Management",
         "dataset_description": "The dataset comprises 500 entries simulating ServiceNow incidents table, detailing various attributes such as category, state, open and close dates, involved personnel, and incident specifics like description, and priority. It captures incident management activities with fields like 'opened_at', 'closed_at', 'assigned_to', 'short_description', and 'priority', reflecting the operational handling and urgency of issues across different locations and categories.",
-        "header": "Incident Resolution Time Trends Analysis (Flag 10)"
+        "header": "Incident Resolution Time Trends Analysis (Flag 10)",
+        "difficulty": 3
     },
     "insight_list": [
         {
@@ -55,7 +56,7 @@
                 },
                 "description": "The dual-axis line plot displays the correlation between the volume of incidents and the TTR. The red line represents the number of incidents and the blue line represents the average TTR. As the number of incidents increases, the TTR also tends to increase, indicating a positive correlation."
             },
-            "question": "Is there a correlation between the volume of incidents and the ttr?",
+            "question": "Is there a correlation between the volume of incidents and the Time to Resolution (TTR)?",
             "actionable_insight": "The positive correlation between the volume of incidents and the TTR suggests that as the volume of incidents increases, the TTR also tends to increase. This could be due to resource constraints or inefficiencies in handling a larger volume of incidents. It would be beneficial to assess capacity planning and process efficiency to manage high volume of incidents.",
             "code": "# Group by opened_at date and calculate count of incidents and average ttr\ndf['ttr'] = df['closed_at'] - df['opened_at']\n\n# Convert ttr to days\ndf['ttr_days'] = df['ttr'].dt.days\nincident_ttr_trend = df.groupby(df['opened_at'].dt.date).agg({'number':'count', 'ttr_days':'mean'})\n\n# Plot the trend\nfig, ax1 = plt.subplots(figsize=(10,6))\n\ncolor = 'tab:red'\nax1.set_xlabel('Opened At')\nax1.set_ylabel('Number of Incidents', color=color)\nax1.plot(incident_ttr_trend.index, incident_ttr_trend['number'], color=color)\nax1.tick_params(axis='y', labelcolor=color)\n\nax2 = ax1.twinx()  \ncolor = 'tab:blue'\nax2.set_ylabel('Average TTR (Days)', color=color)  \nax2.plot(incident_ttr_trend.index, incident_ttr_trend['ttr_days'], color=color)\nax2.tick_params(axis='y', labelcolor=color)\n\nfig.tight_layout()  \nplt.title('Correlation Between Volume of Incidents And TTR')\nplt.grid(True)\nplt.show()"
         },
@@ -75,7 +76,7 @@
                 },
                 "description": "The multiple line plot displays the trend of TTR across different categories over time. Each line represents a category and the points on the line represent the average TTR for incidents of that category opened on a particular date. The trend is uniform across all categories, indicating that the increase in TTR is not specific to any particular category."
             },
-            "question": "Is the increase in ttr uniform across all categories of incidents or is it more pronounced in a specific category?",
+            "question": "Is the increase in Time to Resolution (TTR) uniform across all categories of incidents or is it more pronounced in a specific category?",
             "actionable_insight": "The uniform increase in TTR across all categories suggests that the issue is not specific to any particular category. This could indicate a systemic issue in the incident management process. It would be beneficial to investigate the overall process and identify areas for improvement to reduce the TTR.",
             "code": "# Group by category and opened_at date, then calculate average ttr\ncategory_ttr_trend = df.groupby(['category', df['opened_at'].dt.date])['ttr_days'].mean().reset_index()\n\n# Plot the trend for each category\nfig, ax = plt.subplots(figsize=(10,6))\n\nfor category in category_ttr_trend['category'].unique():\n    ax.plot(category_ttr_trend[category_ttr_trend['category'] == category]['opened_at'], \n            category_ttr_trend[category_ttr_trend['category'] == category]['ttr_days'], \n            label=category)\n\nplt.title('Trend of TTR Across Categories Over Time')\nplt.xlabel('Opened At')\nplt.ylabel('Average TTR (Days)')\nplt.legend(loc='best')\nplt.grid(True)\nplt.show()",
             "insight_value": {}
@@ -96,7 +97,7 @@
                 },
                 "description": "The bar chart displays the number of incidents resolved per agent. Each bar represents an agent and the height of the bar represents the number of incidents resolved by that agent. The number of incidents resolved is more or less uniform across all agents, indicating that productivity is fairly balanced."
             },
-            "question": "Are there any trends in the productivity of the human agents over time? For instance, is there a decrease in the number of incidents resolved per agent over time?",
+            "question": "How does the productivity compare across different human agents? Is the number of incidents resolved evenly distributed among agents?",
             "actionable_insight": "The uniform productivity across all agents suggests that the workload is evenly distributed and all agents are equally productive. This is a positive indicator of good workload management. However, it would still be beneficial to continually monitor agent productivity and workload to ensure this balance is maintained.",
             "code": "agent_incident_count = df.groupby('assigned_to')['number'].count()\n\n# Plot the histogram\nagent_incident_count.plot(kind='bar', figsize=(10,6))\n\nplt.title('Number of Incidents Resolved Per Agent')\nplt.xlabel('Agent')\nplt.ylabel('Number of Incidents Resolved')\nplt.grid(True)\nplt.xticks(rotation=45)\nplt.show()",
             "insight_value": {}