Skip to content

Commit f84f4d3

Browse files
Add extract_metadata to VLM table extractor for LLM stat extraction (#1543)
* Add extract_metadata to VLM table extractor for LLM stat extraction * Address comments * Fix lint * Fix unit tests * Fix lint
1 parent 56d2be4 commit f84f4d3

File tree

2 files changed

+22
-5
lines changed

2 files changed

+22
-5
lines changed

lib/sycamore/sycamore/llms/chained_llm.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import logging
2-
from typing import Optional, Callable
2+
from typing import Optional, Callable, Any
33

44
from sycamore.llms import LLM
55
from sycamore.llms.config import LLMModel, ChainedModel
@@ -50,6 +50,13 @@ def chain(self) -> list[LLM]:
5050
"""
5151
return self._chain
5252

53+
# TODO implement this method for ChainedLLM
54+
def generate_metadata(
55+
self, *, prompt: RenderedPrompt, llm_kwargs: Optional[dict] = None, model: Optional[LLMModel] = None
56+
) -> dict[str, Any]:
57+
output = self.generate(prompt=prompt, llm_kwargs=llm_kwargs, model=model)
58+
return {"output": output}
59+
5360
def generate(
5461
self, *, prompt: RenderedPrompt, llm_kwargs: Optional[dict] = None, model: Optional[LLMModel] = None
5562
) -> str:

lib/sycamore/sycamore/transforms/table_structure/extract.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -540,9 +540,17 @@ def __init__(self, llm: LLM, prompt_str: str = EXTRACT_TABLE_STRUCTURE_PROMPT):
540540
self.prompt_str = prompt_str
541541

542542
def extract(self, element: TableElement, doc_image: Image.Image) -> TableElement:
543+
ret: dict = self.extract_metadata(element, doc_image)
544+
table_element = ret.get("output")
545+
assert isinstance(table_element, TableElement)
546+
return table_element
547+
548+
def extract_metadata(
549+
self, element: TableElement, doc_image: Image.Image, llm_kwargs: Optional[dict] = None
550+
) -> dict[str, Any]:
543551
# We need a bounding box to be able to do anything.
544552
if element.bbox is None:
545-
return element
553+
return {"output": element}
546554

547555
cropped_image, _ = _crop_bbox(doc_image, element.bbox)
548556

@@ -561,22 +569,24 @@ def response_checker(response: str) -> bool:
561569
self.llm.response_checker = response_checker
562570

563571
try:
564-
res: str = self.llm.generate(prompt=prompt)
572+
res_with_md: dict[str, Any] = self.llm.generate_metadata(prompt=prompt, llm_kwargs=llm_kwargs)
565573

574+
res = res_with_md.pop("output")
566575
if res.startswith("```html"):
567576
res = res[7:].rstrip("`")
568577
res = res.strip()
569578

570579
table = Table.from_html(res)
571580
element.table = table
572-
return element
581+
res_with_md.update({"output": element})
582+
return res_with_md
573583
except Exception as e:
574584
tb_str = "".join(traceback.format_exception(type(e), e, e.__traceback__))
575585
logging.warning(
576586
f"Failed to extract a table due to:\n{tb_str}\nReturning the original element without a table."
577587
)
578588

579-
return element
589+
return {"output": element}
580590

581591

582592
DEFAULT_TABLE_STRUCTURE_EXTRACTOR = TableTransformerStructureExtractor

0 commit comments

Comments
 (0)