您尚未登录。

楼主 # 2023-09-21 21:43:27

演技担当黄晓明
会员
注册时间: 2017-10-17
已发帖子: 184
积分: 122.5

Python 合并pdf 文件

File.py

#!/usr/bin/python2.6
# -*- coding: utf-8 -*-

import pandas as pd
import os


def get_abs_path(relative_path: str) -> str:
    cwd_path = os.getcwd()
    full_path = os.path.join(cwd_path, relative_path)
    return full_path


def find_file(search_path, readsub = False,include_str=None, filter_strs=None):
    """
    查找指定目录下所有的文件(不包含以__开头和结尾的文件)或指定格式的文件,若不同目录存在相同文件名,只返回第1个文件的路径
    :param search_path: 查找的目录路径
    :param include_str: 获取包含字符串的名称
    :param filter_strs: 过滤包含字符串的名称
    """
    if filter_strs is None:
        filter_strs = []

    files = []
    # 获取路径下所有文件
    names = os.listdir(search_path)
    for name in names:
        relativePath = os.path.join(search_path, name)
        absPath = os.path.abspath(relativePath)
        if os.path.isfile(absPath):
            # 如果不包含指定字符串则
            if include_str is not None and include_str not in name:
                continue

            # 如果未break,说明不包含filter_strs中的字符
            for filter_str in filter_strs:
                if filter_str in name:
                    break
            else:
                files.append(absPath)
        else:
            # 递归调用
            if(readsub):
                files += find_file(absPath, include_str=include_str, filter_strs=filter_strs)

    return files


def get_info(str_path1: str) -> dict:
    dic: dict = {}
    file_name = os.path.basename(str_path1)
    strSplit = "-"
    file_name = file_name.replace(".早.", "早-")
    file_name = file_name.replace(".午.", "午-")
    file_name = file_name.replace(".晚.", "晚-")
    file_name = file_name.replace(".pdf", "")
    substrings = file_name.split(strSplit)
    cnt = len(substrings)
    # 前面的是日期,後面的是時間
    if (cnt > 1):
        dic["Date"] = substrings[0].replace(".", "-")
        dic["Money"] = substrings[1]
    return dic

pdfcombine.py

#!/usr/bin/python2.6
# -*- coding: utf-8 -*-

from PyPDF2 import PdfReader, PdfWriter
import Files

merger = PdfWriter()

files = Files.find_file("./", False, ".pdf")
for pdf in files:
    pdf_reader = PdfReader(pdf)
    page_count = len(pdf_reader.pages)
    for index in range(page_count):
        merger.add_page(pdf_reader.pages[index])

merger.write(".\\output\\all.pdf")
merger.close()

离线

页脚

工信部备案:粤ICP备20025096号 Powered by FluxBB

感谢为中文互联网持续输出优质内容的各位老铁们。 QQ: 516333132, 微信(wechat): whycan_cn (哇酷网/挖坑网/填坑网) service@whycan.cn