Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
278 changes: 278 additions & 0 deletions src/components/Learn/tours/firstPipeline.tour.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,278 @@
{
"id": "first-pipeline",
"displayName": "Guided Tour: Build Your First Pipeline",
"requiresEditor": true,
"steps": [
{
"selector": "[data-tour-anchor=\"no-spotlight\"]",
"content": "Let's build your first pipeline!\n\nA pipeline is a visual graph of three kinds of nodes: **task nodes** that do the work, **input nodes** that pass run-time parameters, and **output nodes** that capture results.\n\nIn this tour we'll connect some components into a working pipeline that loads data, trains a model, and makes predictions.",
"position": "center"
},
{
"selector": "[data-folder-name=\"Standard library\"]",
"mutationObservables": [
"[data-dock-window-content=\"component-library\"]"
],
"resizeObservables": ["[data-dock-window-content=\"component-library\"]"],
"content": "Let's start by opening the **Standard library** folder in the Component Library.",
"position": "right",
"stepInteraction": true,
"interaction": "expand-folder",
"targetFolderName": "Standard library"
},
{
"selector": "[data-folder-name=\"Quick start\"]",
"mutationObservables": [
"[data-dock-window-content=\"component-library\"]"
],
"resizeObservables": ["[data-dock-window-content=\"component-library\"]"],
"content": "Now open **Quick start** to see three premade components.",
"position": "right",
"stepInteraction": true,
"interaction": "expand-folder",
"targetFolderName": "Quick start"
},
{
"selector": "[data-component-name=\"Chicago Taxi Trips dataset\"]",
"mutationObservables": [
"[data-dock-window-content=\"component-library\"]"
],
"resizeObservables": ["[data-dock-window-content=\"component-library\"]"],
"content": "Drag the **Chicago Taxi Trips dataset** onto the canvas. It becomes a **task**, one step in your pipeline.\n\nA task is a unit of execution: it takes inputs, runs some code, and produces outputs.",
"position": "right",
"stepInteraction": true,
"interaction": "add-task",
"targetTaskName": "Chicago Taxi Trips dataset"
},
{
"selector": "[data-tour=\"library-search\"]",
"mutationObservables": [
"[data-dock-window-content=\"component-library\"]"
],
"resizeObservables": ["[data-dock-window-content=\"component-library\"]"],
"content": "When you know what you're looking for, the **search box** is faster than browsing folders.\n\nType **predict** to filter the library down to matching components.",
"position": "right",
"stepInteraction": true,
"interaction": "library-search",
"targetSearchTerm": "predict"
},
{
"selector": "[data-component-name*=\"xgboost predict on csv\" i]",
"mutationObservables": [
"[data-dock-window-content=\"component-library\"]"
],
"resizeObservables": ["[data-dock-window-content=\"component-library\"]"],
"content": "Drag **Xgboost predict on CSV** from the search results onto the canvas.",
"position": "right",
"stepInteraction": true,
"interaction": "add-task",
"targetTaskName": "Xgboost predict on CSV"
},
{
"selector": "[data-dock-window-content=\"component-library\"]",
"highlightedSelectors": [
"[data-dock-window=\"component-library\"]",
"[data-dock-window-content=\"component-library\"]"
],
"ringSelectors": [
"[data-component-name*=\"train xgboost model on csv\" i]"
],
"mutationObservables": [
"[data-dock-window-content=\"component-library\"]"
],
"resizeObservables": ["[data-dock-window-content=\"component-library\"]"],
"content": "Now add **Train XGBoost model on CSV** onto the canvas.\n\nFind it however you like. Search for **train**, or clear the search box and browse Quick start.",
"stepInteraction": true,
"interaction": "add-task",
"targetTaskName": "Train XGBoost model on CSV"
},
{
"selector": "[data-tour=\"editor-canvas\"]",
"highlightedSelectors": ["[data-tour=\"editor-canvas\"]"],
"resizeObservables": ["[data-tour=\"editor-canvas\"]"],
"content": "You now have three tasks on the canvas, but they're not connected yet.\n\nTasks pass data through **edges** that link one task's output to another task's input. Whatever a task produces flows along the edge to the next step; Tangle handles the actual storage and transfer behind the scenes.",
"position": [16, 80]
},
{
"selector": "[data-tour=\"editor-canvas\"]",
"highlightedSelectors": ["[data-tour=\"editor-canvas\"]"],
"resizeObservables": ["[data-tour=\"editor-canvas\"]"],
"content": "Before we start connecting things, take a moment to **lay out your tasks**. Click and drag each task to reposition it so nothing overlaps. The clearer the layout, the easier the next few steps will be.",
"position": [16, 80]
},
{
"selector": "[data-tour=\"editor-canvas\"]",
"highlightedSelectors": ["[data-tour=\"editor-canvas\"]"],
"ringSelectors": [
"[data-task-name=\"Chicago Taxi Trips dataset\"] [data-handleid=\"output_Table\"]",
"[data-task-name=\"Train XGBoost model on CSV\"] [data-handleid=\"input_training_data\"]"
],
"resizeObservables": ["[data-tour=\"editor-canvas\"]"],
"content": "Let's make the first connection.\n\nDrag from the **dataset's** `Table` **output** (right side) to the **train task's** `training_data` **input** (left side). This feeds your data into the training step.",
"position": [16, 80],
"stepInteraction": true,
"interaction": "connect-edge",
"targetEdge": {
"sourceTaskName": "Chicago Taxi Trips dataset",
"sourcePortName": "Table",
"targetTaskName": "Train XGBoost model on CSV",
"targetPortName": "training_data"
}
},
{
"selector": "[data-tour=\"editor-canvas\"]",
"highlightedSelectors": ["[data-tour=\"editor-canvas\"]"],
"ringSelectors": [
"[data-task-name=\"Chicago Taxi Trips dataset\"] [data-handleid=\"output_Table\"]",
"[data-task-name=\"Xgboost predict on CSV\"] [data-handleid=\"input_data\"]"
],
"resizeObservables": ["[data-tour=\"editor-canvas\"]"],
"content": "The prediction step needs the same data.\n\nDrag from the **dataset's** `Table` **output** to the **predict task's** `data` **input**.",
"position": [16, 80],
"stepInteraction": true,
"interaction": "connect-edge",
"targetEdge": {
"sourceTaskName": "Chicago Taxi Trips dataset",
"sourcePortName": "Table",
"targetTaskName": "Xgboost predict on CSV",
"targetPortName": "data"
}
},
{
"selector": "[data-tour=\"editor-canvas\"]",
"highlightedSelectors": ["[data-tour=\"editor-canvas\"]"],
"ringSelectors": [
"[data-task-name=\"Train XGBoost model on CSV\"] [data-handleid=\"output_model\"]",
"[data-task-name=\"Xgboost predict on CSV\"] [data-handleid=\"input_model\"]"
],
"resizeObservables": ["[data-tour=\"editor-canvas\"]"],
"content": "One more edge.\n\nDrag from the **train task's** `model` **output** to the **predict task's** `model` **input**. This hands what the trainer learned to the predictor.",
"position": [16, 80],
"stepInteraction": true,
"interaction": "connect-edge",
"targetEdge": {
"sourceTaskName": "Train XGBoost model on CSV",
"sourcePortName": "model",
"targetTaskName": "Xgboost predict on CSV",
"targetPortName": "model"
}
},
{
"selector": "[data-folder-name=\"Inputs & Outputs\"]",
"ringSelectors": ["[data-component-name=\"Output Node\"]"],
"mutationObservables": [
"[data-dock-window-content=\"component-library\"]"
],
"resizeObservables": ["[data-dock-window-content=\"component-library\"]"],
"content": "Let's expose the model's predictions at the pipeline boundary.\n\nOpen the **Inputs & Outputs** folder in the Component Library and drag an **Output Node** onto the canvas.\n\nOutput nodes capture task results so they're easy to find after the pipeline runs.",
"position": "right",
"stepInteraction": true,
"interaction": "add-output",
"targetComponentName": "Output Node",
"resetLibrarySearch": true
},
{
"selector": "[data-tour=\"editor-canvas\"]",
"highlightedSelectors": ["[data-tour=\"editor-canvas\"]"],
"ringSelectors": [
"[data-task-name=\"Xgboost predict on CSV\"] [data-handleid=\"output_predictions\"]",
"[data-tour-node=\"output\"]"
],
"resizeObservables": ["[data-tour=\"editor-canvas\"]"],
"content": "Now connect the predict task to your new Output node.\n\nDrag from the **predict task's** `predictions` **output** (right side) to the **Output node's** input handle (left side of the new node).",
"position": [16, 80],
"stepInteraction": true,
"interaction": "connect-edge",
"targetEdge": {
"sourceTaskName": "Xgboost predict on CSV",
"sourcePortName": "predictions"
}
},
{
"selector": "[data-tour=\"editor-canvas\"]",
"highlightedSelectors": ["[data-tour=\"editor-canvas\"]"],
"ringSelectors": [
"[data-task-name=\"Chicago Taxi Trips dataset\"] [data-handleid=\"input_Limit\"]"
],
"resizeObservables": ["[data-tour=\"editor-canvas\"]"],
"content": "Now for the other end: make the dataset's row count configurable.\n\nHold **Cmd** (or **Alt**) and drag from the dataset's `Limit` **input handle** (highlighted) into empty canvas. This shortcut creates an **Input node** already connected to that handle.\n\nInput nodes are pipeline-level parameters set at submission time, so the same pipeline can be re-run with different settings.",
"position": [16, 80],
"stepInteraction": true,
"interaction": "add-input"
},
{
"selector": "[data-tour=\"editor-canvas\"]",
"highlightedSelectors": [
"[data-tour=\"editor-canvas\"]",
"[data-window-id=\"context-panel\"]"
],
"ringSelectors": [
"[data-tour-node=\"task\"][data-task-name=\"Train XGBoost model on CSV\"]"
],
"resizeObservables": [
"[data-tour=\"editor-canvas\"]",
"[data-window-id=\"context-panel\"]"
],
"content": "Last thing: there's one required argument on the training step.\n\n**Click the Train XGBoost task** to select it. Its details will appear in the **Task Properties** panel on the right.",
"position": [16, 80],
"stepInteraction": true,
"interaction": "select-task",
"targetTaskName": "Train XGBoost model on CSV"
},
{
"selector": "[data-window-id=\"context-panel\"]",
"highlightedSelectors": [
"[data-window-id=\"context-panel\"]",
"[data-dock-window-content=\"context-panel\"]"
],
"mutationObservables": [
"[data-window-id=\"context-panel\"]",
"[data-dock-window-content=\"context-panel\"]"
],
"resizeObservables": [
"[data-window-id=\"context-panel\"]",
"[data-dock-window-content=\"context-panel\"]"
],
"targetWindowId": "context-panel",
"content": "Task Properties lists every input the task accepts. Each one is an **argument** you can set directly, leave at its default, or feed from another source.\n\nArguments marked with a `*` are required.",
"position": "left"
},
{
"selector": "[data-window-id=\"context-panel\"]",
"highlightedSelectors": [
"[data-window-id=\"context-panel\"]",
"[data-dock-window-content=\"context-panel\"]"
],
"ringSelectors": ["[data-argument-name=\"label_column_name\"]"],
"mutationObservables": [
"[data-window-id=\"context-panel\"]",
"[data-dock-window-content=\"context-panel\"]"
],
"resizeObservables": [
"[data-window-id=\"context-panel\"]",
"[data-dock-window-content=\"context-panel\"]"
],
"targetWindowId": "context-panel",
"content": "Find the `label_column_name` argument (highlighted) and type **tips**. That tells XGBoost which column of the dataset to predict.",
"position": "left",
"stepInteraction": true,
"interaction": "set-argument",
"targetArgumentName": "label_column_name"
},
{
"selector": "[data-dock-window=\"runs-and-submission\"]",
"highlightedSelectors": [
"[data-dock-window=\"runs-and-submission\"]",
"[data-dock-window-content=\"runs-and-submission\"]"
],
"mutationObservables": [
"[data-dock-window-content=\"runs-and-submission\"]"
],
"resizeObservables": [
"[data-dock-window-content=\"runs-and-submission\"]"
],
"content": "Your pipeline is done. Three tasks, a configurable input, an output for the results, and the one required argument set.\n\nOpen **Runs and submission** in the left sidebar to run it. Use **Save as new pipeline** in the menu bar to keep this one.",
"position": "right"
}
]
}
28 changes: 27 additions & 1 deletion src/components/Learn/tours/registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,34 @@ import type { StepType } from "@reactour/tour";
import { publicAsset } from "@/utils/publicAsset";

export type TourStep = StepType & {
interaction?: "undock-window" | "redock-window" | "select-task";
interaction?:
| "undock-window"
| "redock-window"
| "select-task"
| "add-task"
| "add-input"
| "add-output"
| "connect-edge"
| "expand-folder"
| "library-search"
| "set-argument";
targetWindowId?: string;
targetFolderName?: string;
targetArgumentName?: string;
targetSearchTerm?: string;
targetTaskName?: string;
targetComponentName?: string;
// targetTaskName / targetPortName are optional. When omitted, any new
// binding from the source side counts (useful when the target is an IO
// node with an auto-generated entity id we can't predict in JSON).
targetEdge?: {
sourceTaskName: string;
sourcePortName: string;
targetTaskName?: string;
targetPortName?: string;
};
ringSelectors?: string[];
resetLibrarySearch?: boolean;
fallbackContent?: string;
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ interface IONodeSidebarItemProps {
}

export const IONodeSidebarItem = ({ nodeType }: IONodeSidebarItemProps) => {
const displayName = nodeType === "input" ? "Input Node" : "Output Node";
const onDragStart = useCallback(
(event: DragEvent) => {
event.dataTransfer.setData(
Expand All @@ -298,12 +299,11 @@ export const IONodeSidebarItem = ({ nodeType }: IONodeSidebarItemProps) => {
)}
draggable
onDragStart={onDragStart}
data-component-name={displayName}
>
<div className="flex items-center gap-2">
<Icon name="File" className="text-gray-400 shrink-0" />
<span className="truncate text-xs text-gray-800">
{nodeType === "input" ? "Input Node" : "Output Node"}
</span>
<span className="truncate text-xs text-gray-800">{displayName}</span>
</div>
</li>
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ const SearchRequestInput = ({ value, onChange }: SearchRequestProps) => {

return (
<InlineStack align="space-between" gap="2" className="w-full">
<div className="relative flex-1">
<div className="relative flex-1" data-tour="library-search">
<InputGroup
className="px-2 gap-2"
prefixElement={
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ const SearchInput = ({
}: SearchInputProps) => {
return (
<div className="px-2 pb-2 pt-1 flex items-center justify-between gap-2">
<div className="relative w-full">
<div className="relative w-full" data-tour="library-search">
<div className="absolute inset-y-0 left-0 flex items-center pl-2.5 z-10 pointer-events-none">
<Search className="h-3.5 w-3.5 text-gray-400" />
</div>
Expand Down
18 changes: 18 additions & 0 deletions src/providers/TourProvider/tourPipelineLifecycle.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import yaml from "js-yaml";

import type { TourDefinition } from "@/components/Learn/tours/registry";
import {
deleteEntry,
findByStorageKey,
} from "@/services/pipelineStorage/pipelineRegistry";
import type { PipelineStorageService } from "@/services/pipelineStorage/PipelineStorageService";
import { defaultPipelineYamlWithName } from "@/utils/constants";

Expand All @@ -18,6 +22,20 @@ export async function deleteTourPipelineByName(
}
}

// PipelineFile.deleteFile() does driver.delete then deleteEntry as separate
// awaits. If a fast remount lands in that gap (or a prior delete was
// interrupted), the driver has no file but the registry still holds the
// entry — so resolvePipelineByName returns undefined while
// assertStorageKeyUnique still trips. This sweeps that orphan entry.
export async function clearStaleTourRegistryEntry(name: string): Promise<void> {
try {
const entry = await findByStorageKey(name);
if (entry) await deleteEntry(entry.id);
} catch (error) {
console.warn(`Failed to clear stale registry entry "${name}":`, error);
}
}

export async function cleanupOrphanTourPipelines(
storage: PipelineStorageService,
keep?: string | null,
Expand Down
Loading
Loading