@@ -540,9 +540,17 @@ def __init__(self, llm: LLM, prompt_str: str = EXTRACT_TABLE_STRUCTURE_PROMPT):
540540 self .prompt_str = prompt_str
541541
542542 def extract (self , element : TableElement , doc_image : Image .Image ) -> TableElement :
543+ ret : dict = self .extract_metadata (element , doc_image )
544+ table_element = ret .get ("output" )
545+ assert isinstance (table_element , TableElement )
546+ return table_element
547+
548+ def extract_metadata (
549+ self , element : TableElement , doc_image : Image .Image , llm_kwargs : Optional [dict ] = None
550+ ) -> dict [str , Any ]:
543551 # We need a bounding box to be able to do anything.
544552 if element .bbox is None :
545- return element
553+ return { "output" : element }
546554
547555 cropped_image , _ = _crop_bbox (doc_image , element .bbox )
548556
@@ -561,22 +569,24 @@ def response_checker(response: str) -> bool:
561569 self .llm .response_checker = response_checker
562570
563571 try :
564- res : str = self .llm .generate (prompt = prompt )
572+ res_with_md : dict [ str , Any ] = self .llm .generate_metadata (prompt = prompt , llm_kwargs = llm_kwargs )
565573
574+ res = res_with_md .pop ("output" )
566575 if res .startswith ("```html" ):
567576 res = res [7 :].rstrip ("`" )
568577 res = res .strip ()
569578
570579 table = Table .from_html (res )
571580 element .table = table
572- return element
581+ res_with_md .update ({"output" : element })
582+ return res_with_md
573583 except Exception as e :
574584 tb_str = "" .join (traceback .format_exception (type (e ), e , e .__traceback__ ))
575585 logging .warning (
576586 f"Failed to extract a table due to:\n { tb_str } \n Returning the original element without a table."
577587 )
578588
579- return element
589+ return { "output" : element }
580590
581591
582592DEFAULT_TABLE_STRUCTURE_EXTRACTOR = TableTransformerStructureExtractor
0 commit comments