import pandas as pd
from datetime import timedelta
def format_date(date: int) -> str:
"""
将形如 20241101 的整数日期转换为 2024-11-01 的字符串格式。
:param date: 整数形式的日期,例如 20241101。
:return: 字符串形式的日期,例如 '2024-11-01'。
"""
# 将整数转为字符串,方便切片处理
date_str = str(date)
if len(date_str) != 8:
raise ValueError("输入的日期格式不正确,应该是形如 20241101 的 8 位数字。")
# 按格式拼接
formatted_date = f"{date_str[:4]}-{date_str[4:6]}-{date_str[6:]}"
return formatted_date
def match_funds(data, time_window=10, tolerance=0.05, threshold=100000):
data["日期"] = pd.to_datetime(data["日期"])
# 按天汇总
daily_summary = data.groupby("日期").apply(
lambda x: pd.Series({
"收入总额": x.loc[x["交易类型"] == "收", "金额"].sum(),
"支出总额": x.loc[x["交易类型"] == "支", "金额"].sum()
}),
include_groups=False
).reset_index()
# 结果存储
results = []
# 滑动窗口匹配逻辑
n = len(daily_summary)
i = 0
while i < n:
start_date = daily_summary.loc[i, "日期"]
income_sum = daily_summary.loc[i, "收入总额"]
expense_sum = daily_summary.loc[i, "支出总额"]
matched = False
# 检查当天的收支金额是否满足条件
net_amount = income_sum - expense_sum
percentage_difference = abs(net_amount) / income_sum if income_sum > 0 else float("inf")
if max(income_sum, expense_sum) >= threshold and percentage_difference <= tolerance:
# 记录匹配结果
results.append({
"收入开始日期": start_date,
"支出结束日期": start_date,
"收入累计": income_sum,
"支出累计": expense_sum,
"净额": net_amount,
"差异占比": percentage_difference
})
matched = True
i += 1 # 跳到下一天继续
continue # 跳过后续累加逻辑
# 在时间窗口内累加后续日期的收入和支出
for j in range(i + 1, n):
end_date = daily_summary.loc[j, "日期"]
if (end_date - start_date).days > time_window:
break
income_sum += daily_summary.loc[j, "收入总额"]
expense_sum += daily_summary.loc[j, "支出总额"]
# 检查是否满足金额阈值
if max(income_sum, expense_sum) < threshold:
continue # 如果不满足阈值,跳过当前窗口
# 检查是否满足差异条件
net_amount = income_sum - expense_sum
percentage_difference = abs(net_amount) / income_sum if income_sum > 0 else float("inf")
if percentage_difference <= tolerance:
# 记录匹配结果
results.append({
"收入开始日期": start_date,
"支出结束日期": end_date,
"收入累计": income_sum,
"支出累计": expense_sum,
"净额": net_amount,
"差异占比": percentage_difference
})
matched = True
i = j # 跳到匹配的结束日期之后
break
if not matched:
i += 1 # 如果未匹配,跳到下一天继续
# 转换结果为DataFrame
result_df = pd.DataFrame(results)
return result_df
if __name__ == "__main__":
# 定义数据列表
data = [
['A公司', '20200624', '支', 1964.00, 'B公司', '外购纸品费'],
['B公司', '20200807', '收', 23436.20, 'A公司', '汇款业务撤销'],
['B公司', '20200807', '收', 23629.20, 'A公司', '汇款业务撤销'],
['A公司', '20200807', '支', 23629.20, 'B公司', '品质改造'],
['A公司', '20200807', '支', 23436.20, 'B公司', '品质改造'],
['A公司', '20200811', '支', 23436.20, 'B公司', '物业费'],
['A公司', '20200811', '支', 23629.20, 'B公司', '物业费'],
['A公司', '20200824', '支', 3126.00, 'B公司', '品质提升'],
['A公司', '20200914', '支', 1692.00, 'B公司', '物业费'],
['A公司', '20201030', '支', 5808.00, 'B公司', '物业人工费'],
['A公司', '20201030', '支', 23541.80, 'B公司', '物业人工费'],
['A公司', '20201030', '支', 24008.00, 'B公司', '物业人工费']
]
# 定义列名
columns = ['付款户名', '交易日期', '交易方向', '交易金额', '收款户名', '备注']
# 创建DataFrame
df = pd.DataFrame(data, columns=columns)
check_company = 'A公司'
data = []
for index,row in df.iterrows():
row_data = []
date = row['交易日期']
date = format_date(date)
amount = row['交易金额']
note = row['备注']
if row['收款户名'] == check_company:
company = check_company
type = '收'
opponent = row['付款户名']
row_data = [date,company,opponent,type,amount,note]
data.append(row_data)
elif row['付款户名'] == check_company:
company = check_company
type = '支'
opponent = row['收款户名']
row_data = [date,company,opponent,type,amount,note]
data.append(row_data)
data = pd.DataFrame(data,columns=['日期','公司名称','对方单位','交易类型','金额','备注'])
company = check_company
opponents = data['对方单位'].unique()
output = []
for opponent in opponents:
df = data[data['对方单位']==opponent]
df = df.reset_index()
result = match_funds(df,10,0.05,10000)
if not result.empty:
result.insert(0,'对方单位',opponent)
result.insert(0,'公司名称',company)
output.append(result)
df_output = pd.concat(output)
df_output = df_output.sort_values(by='收入累计',ascending=False)
df_output.reset_index()
print(df_output)