Skip to content

Commit 6f83d49

Browse files
authored
[FEAT] Expose dataset as json (huggingface#404)
* refactor: moving out the config generation * create new endpoint route for configuration * chore: add extra patterns to gitignore * open dataset in a new tab * copy url link * review UI * parse accept header * review hover styles * revert style * renames * chore: Explicit function call * fix build * revert styles * review styles
1 parent dd0553a commit 6f83d49

File tree

9 files changed

+206
-141
lines changed

9 files changed

+206
-141
lines changed

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
# Development
88
node_modules
9-
.env
9+
*.env
1010
*.local
1111

1212
# Cache
@@ -50,3 +50,5 @@ lerna-debug.log*
5050
.data
5151
data
5252

53+
# Extra repositories
54+
repos

src/features/datasets/dataset-name.tsx

Lines changed: 42 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import {
66
useVisibleTask$,
77
} from '@builder.io/qwik';
88
import { server$ } from '@builder.io/qwik-city';
9+
import { LuLink } from '@qwikest/icons/lucide';
910
import { Input } from '~/components';
1011
import { useClickOutside } from '~/components/hooks/click/outside';
1112
import { Tooltip } from '~/components/ui/tooltip/tooltip';
@@ -20,6 +21,7 @@ export const DatasetName = component$(() => {
2021
error: '',
2122
name: '',
2223
displayName: activeDataset.value.name,
24+
copied: false,
2325
});
2426

2527
const { updateOnActiveDataset } = useDatasetsStore();
@@ -99,35 +101,51 @@ export const DatasetName = component$(() => {
99101

100102
return (
101103
<div class="w-fit max-w-1/2">
102-
{state.isEditing ? (
103-
<Input
104-
ref={inputRef}
105-
type="text"
106-
value={state.name}
107-
onInput$={handleChange}
108-
onKeyDown$={handleKeyDown}
109-
class="text-md h-6 font-bold p-0 border-none outline-none leading-none max-w-96"
110-
style={{
111-
width: `${state.name.length}ch`,
112-
}}
113-
/>
114-
) : isNameTruncated.value ? (
115-
<Tooltip text={state.name} floating="bottom-end">
104+
<div class="flex w-fit items-center gap-2">
105+
<Tooltip text={state.copied ? 'Copied' : 'Copy link'} floating="bottom">
106+
<LuLink
107+
class={[
108+
'text-neutral-500 hover:text-neutral-600 cursor-pointer',
109+
].join(' ')}
110+
onClick$={$(() => {
111+
navigator.clipboard.writeText(location.href);
112+
state.copied = true;
113+
setTimeout(() => {
114+
state.copied = false;
115+
}, 1200);
116+
})}
117+
/>
118+
</Tooltip>
119+
{state.isEditing ? (
120+
<Input
121+
ref={inputRef}
122+
type="text"
123+
value={state.name}
124+
onInput$={handleChange}
125+
onKeyDown$={handleKeyDown}
126+
class="text-md h-6 font-bold leading-none p-0 border-none outline-none max-w-96"
127+
style={{
128+
width: `${state.name.length}ch`,
129+
}}
130+
/>
131+
) : isNameTruncated.value ? (
132+
<Tooltip text={state.name}>
133+
<h1
134+
class="text-md h-6 font-bold leading-none mt-2 w-96 truncate text-ellipsis whitespace-nowrap"
135+
onClick$={handleEditClick}
136+
>
137+
{state.displayName}
138+
</h1>
139+
</Tooltip>
140+
) : (
116141
<h1
117-
class="text-md font-bold h-6 mt-2 leading-none w-96 truncate text-ellipsis whitespace-nowrap"
142+
class="flex h-12 w-full text-md h-6 font-bold leading-none mt-2 text-ellipsis whitespace-nowrap"
118143
onClick$={handleEditClick}
119144
>
120145
{state.displayName}
121146
</h1>
122-
</Tooltip>
123-
) : (
124-
<h1
125-
class="text-md font-bold h-6 mt-2 leading-none w-fit truncate text-ellipsis whitespace-nowrap"
126-
onClick$={handleEditClick}
127-
>
128-
{state.displayName}
129-
</h1>
130-
)}
147+
)}
148+
</div>
131149
<p class="text-red-300 absolute">{state.error}</p>
132150
</div>
133151
);

src/features/export/save-dataset.tsx

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import { component$ } from '@builder.io/qwik';
2+
23
import { cn } from '@qwik-ui/utils';
34
import { LuDownload } from '@qwikest/icons/lucide';
45
import { Label, Popover, buttonVariants } from '~/components';
6+
import { Tooltip } from '~/components/ui/tooltip/tooltip';
57
import { useSession } from '~/loaders';
68
import { TEMPORAL_ID, useDatasetsStore } from '~/state';
79
import { ExportToHub } from './export-to-hub';
@@ -16,15 +18,17 @@ export const SaveDataset = component$(() => {
1618
<Popover.Trigger
1719
class={cn(
1820
buttonVariants({ look: 'secondary', size: 'sm' }),
19-
'disabled:cursor-not-allowed bg-white',
21+
'disabled:cursor-not-allowed bg-white text-neutral-500 hover:text-neutral-600 dark:bg-slate-800 dark:text-slate-400 dark:hover:text-slate-300',
2022
)}
2123
disabled={
2224
activeDataset.value.columns.filter((c) => c.id !== TEMPORAL_ID)
2325
.length === 0
2426
}
2527
>
26-
<Label class="flex items-center">
27-
<LuDownload class="w-4 h-4" />
28+
<Label class="flex items-center cursor-pointer">
29+
<Tooltip text="Download">
30+
<LuDownload class="w-4 h-4" />
31+
</Tooltip>
2832
</Label>
2933
</Popover.Trigger>
3034
<Popover.Panel class="w-86 text-sm shadow-lg p-2">

src/routes/home/dataset/[id]/index.tsx

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
import { component$ } from '@builder.io/qwik';
2-
import type { DocumentHead } from '@builder.io/qwik-city';
2+
import type {
3+
DocumentHead,
4+
RequestEvent,
5+
RequestHandler,
6+
} from '@builder.io/qwik-city';
37
import { Login } from '~/components/ui/login/Login';
48
import { MobileBanner } from '~/components/ui/mobile/banner';
59
import { Tips } from '~/components/ui/tips/tips';
@@ -11,6 +15,16 @@ import { Table } from '~/features/table';
1115
import { Username } from '~/features/user/username';
1216
import { useSession } from '~/loaders';
1317
import { ActiveDatasetProvider } from '~/state';
18+
import { datasetAsJson } from './json/utils';
19+
20+
export const onGet: RequestHandler = async (event: RequestEvent) => {
21+
const { headers } = event.request;
22+
const acceptHeader = headers.get('Accept') || headers.get('accept');
23+
24+
if (acceptHeader?.includes('application/json')) {
25+
return datasetAsJson(event);
26+
}
27+
};
1428

1529
export default component$(() => {
1630
const session = useSession();
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import type { RequestEvent, RequestHandler } from '@builder.io/qwik-city';
2+
import { datasetAsJson } from './utils';
3+
4+
export const onGet: RequestHandler = async (event: RequestEvent) => {
5+
await datasetAsJson(event);
6+
};
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import type { RequestEvent } from '@builder.io/qwik-city';
2+
import { getDatasetById } from '~/services';
3+
import { generateDatasetConfig } from '~/usecases/create-dataset-config';
4+
5+
export const datasetAsJson = async (event: RequestEvent) => {
6+
const dataset = await getDatasetById(event.params.id);
7+
8+
if (!dataset) {
9+
event.json(404, {
10+
error: 'Dataset not found',
11+
});
12+
return;
13+
}
14+
15+
const config = await generateDatasetConfig(dataset);
16+
17+
event.json(200, {
18+
id: dataset.id,
19+
name: dataset.name,
20+
cretedBy: dataset.createdBy,
21+
createdAt: dataset.createdAt,
22+
...config,
23+
});
24+
};
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
import { materializePrompt } from '~/services/inference/materialize-prompt';
2+
import { getColumnCellByIdx, getRowCells } from '~/services/repository';
3+
import type { Cell, Column, Dataset } from '~/state';
4+
import { collectValidatedExamples } from '~/usecases/collect-examples';
5+
6+
export async function generateDatasetConfig(dataset: Dataset): Promise<{
7+
columns: Record<
8+
string,
9+
{
10+
modelName?: string;
11+
modelProvider?: string;
12+
userPrompt?: string;
13+
prompt?: string;
14+
searchEnabled?: boolean;
15+
columnsReferences?: string[];
16+
}
17+
>;
18+
}> {
19+
const columnConfigs: Record<string, any> = {};
20+
21+
for (const column of dataset.columns) {
22+
if (!column.process) continue;
23+
24+
// Skip columns with empty model configuration
25+
if (
26+
!column.process.modelName &&
27+
!column.process.modelProvider &&
28+
!column.process.prompt
29+
) {
30+
continue;
31+
}
32+
33+
const prompt = await promptTemplateForColumn(column);
34+
35+
columnConfigs[column.name] = {
36+
modelName: column.process.modelName,
37+
modelProvider: column.process.modelProvider,
38+
userPrompt: column.process.prompt,
39+
prompt,
40+
searchEnabled: column.process.searchEnabled,
41+
columnsReferences: column.process.columnsReferences?.map((colId) => {
42+
const refColumn = dataset.columns.find((c) => c.id === colId);
43+
return refColumn?.name || colId;
44+
}),
45+
};
46+
}
47+
48+
return { columns: columnConfigs };
49+
}
50+
51+
async function getFirstRowData(columnsReferences: string[]) {
52+
const firstRowCells = await getRowCells({
53+
rowIdx: 0,
54+
columns: columnsReferences,
55+
});
56+
return Object.fromEntries(
57+
firstRowCells.map((cell) => [cell.column!.name, cell.value]),
58+
);
59+
}
60+
61+
const promptTemplateForColumn = async (
62+
column: Column,
63+
): Promise<string | undefined> => {
64+
const { process } = column;
65+
if (!process || !process.prompt) return undefined;
66+
67+
if (column.type === 'image') {
68+
return undefined; // Image columns do not have prompt templates
69+
}
70+
71+
// Fetch complete cell data for validated cells
72+
const validatedCells = await Promise.all(
73+
column.cells
74+
.filter((cell) => cell.validated)
75+
.map((cell) =>
76+
getColumnCellByIdx({
77+
idx: cell.idx,
78+
columnId: column.id,
79+
}),
80+
),
81+
);
82+
83+
const examples = await collectValidatedExamples({
84+
validatedCells: validatedCells.filter(
85+
(cell): cell is Cell => cell !== null,
86+
),
87+
columnsReferences: process.columnsReferences,
88+
});
89+
90+
// Get data for prompt materialization
91+
const data: any | undefined = process.columnsReferences?.length
92+
? await getFirstRowData(process.columnsReferences)
93+
: {};
94+
95+
// Replace each value in data with its key wrapped in {{}}
96+
for (const key of Object.keys(data)) {
97+
data[key] = `{{${key}}}`;
98+
}
99+
100+
return materializePrompt({
101+
instruction: process.prompt,
102+
data: data ?? undefined,
103+
examples: examples?.length ? examples : undefined,
104+
});
105+
};

0 commit comments

Comments
 (0)