File.py
#!/usr/bin/python2.6
# -*- coding: utf-8 -*-
import pandas as pd
import os
def get_abs_path(relative_path: str) -> str:
cwd_path = os.getcwd()
full_path = os.path.join(cwd_path, relative_path)
return full_path
def find_file(search_path, readsub = False,include_str=None, filter_strs=None):
"""
查找指定目录下所有的文件(不包含以__开头和结尾的文件)或指定格式的文件,若不同目录存在相同文件名,只返回第1个文件的路径
:param search_path: 查找的目录路径
:param include_str: 获取包含字符串的名称
:param filter_strs: 过滤包含字符串的名称
"""
if filter_strs is None:
filter_strs = []
files = []
# 获取路径下所有文件
names = os.listdir(search_path)
for name in names:
relativePath = os.path.join(search_path, name)
absPath = os.path.abspath(relativePath)
if os.path.isfile(absPath):
# 如果不包含指定字符串则
if include_str is not None and include_str not in name:
continue
# 如果未break,说明不包含filter_strs中的字符
for filter_str in filter_strs:
if filter_str in name:
break
else:
files.append(absPath)
else:
# 递归调用
if(readsub):
files += find_file(absPath, include_str=include_str, filter_strs=filter_strs)
return files
def get_info(str_path1: str) -> dict:
dic: dict = {}
file_name = os.path.basename(str_path1)
strSplit = "-"
file_name = file_name.replace(".早.", "早-")
file_name = file_name.replace(".午.", "午-")
file_name = file_name.replace(".晚.", "晚-")
file_name = file_name.replace(".pdf", "")
substrings = file_name.split(strSplit)
cnt = len(substrings)
# 前面的是日期,後面的是時間
if (cnt > 1):
dic["Date"] = substrings[0].replace(".", "-")
dic["Money"] = substrings[1]
return dic
pdfcombine.py
#!/usr/bin/python2.6
# -*- coding: utf-8 -*-
from PyPDF2 import PdfReader, PdfWriter
import Files
merger = PdfWriter()
files = Files.find_file("./", False, ".pdf")
for pdf in files:
pdf_reader = PdfReader(pdf)
page_count = len(pdf_reader.pages)
for index in range(page_count):
merger.add_page(pdf_reader.pages[index])
merger.write(".\\output\\all.pdf")
merger.close()
离线