From 545d921bcf21ddb322d398ba4c71392b8f01abb0 Mon Sep 17 00:00:00 2001 From: zxstty Date: Thu, 27 Mar 2025 11:48:48 +0000 Subject: [PATCH] =?UTF-8?q?update=20data=5Fchain/parser/handler/pdf=5Fpars?= =?UTF-8?q?er.py.=20=E4=BF=AE=E5=A4=8Dtable=E9=87=8D=E5=A4=8D=E8=A7=A3?= =?UTF-8?q?=E6=9E=90=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: zxstty --- data_chain/parser/handler/pdf_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data_chain/parser/handler/pdf_parser.py b/data_chain/parser/handler/pdf_parser.py index 70d1fbd..f155265 100644 --- a/data_chain/parser/handler/pdf_parser.py +++ b/data_chain/parser/handler/pdf_parser.py @@ -74,11 +74,11 @@ class PdfService(BaseService): table_df = table.to_pandas() table_lines = self.split_table(table_df) for line in table_lines: - table_data.extend([{ + table_data.append({ "text": line, "bbox": table_bbox, "type": "table", - } for line in table_lines]) + }) page.apply_redactions() -- Gitee