payment_record_analysis/app.py at main · xiaojun1221/payment_record_analysis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
from flask import Flask, render_template, jsonify, request, session, redirect, url_for
import pandas as pd
from datetime import datetime, timedelta
from functools import lru_cache, wraps
import logging
import calendar
import json
import os
from werkzeug.utils import secure_filename
import shutil
import uuid
import threading
from time import sleep
import atexit
import numpy as np
from secrets import token_hex
import itertools

app = Flask(__name__)

# 在 app 配置后添加
app.config.update(
    SESSION_COOKIE_SECURE=False,  # 本地开发环境设为 False
    SESSION_COOKIE_HTTPONLY=True,  # 防止 JavaScript 访问 cookie
    SESSION_COOKIE_SAMESITE='Lax',  # 防止 CSRF 攻击
    PERMANENT_SESSION_LIFETIME=timedelta(minutes=30)  # 设置会话过期时间
)

# 从环境变量获取密钥，如果没有则生成一个新的
app.secret_key = os.environ.get('FLASK_SECRET_KEY') or token_hex(32)

# 配置日志
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('app.log'),  # 添加文件处理器
        logging.StreamHandler()  # 保留控制台输出
    ]
)
logger = logging.getLogger(__name__)

# 添加静态文件版本号控制
@app.context_processor
def inject_static_version():
    """注入静态文件版本号，用于清除缓存"""
    try:
        # 获取 style.css 的最后修改时间作为版本号
        css_path = os.path.join(app.root_path, 'static', 'css', 'style.css')
        if os.path.exists(css_path):
            version = str(int(os.path.getmtime(css_path)))
        else:
            version = datetime.now().strftime('%Y%m%d%H%M')
    except Exception:
        version = datetime.now().strftime('%Y%m%d%H%M')

    return dict(STATIC_VERSION=version)

# 添加文件上传配置
UPLOAD_FOLDER = '/tmp/flask_uploads'  # PythonAnywhere 推荐的临时目录
ALLOWED_EXTENSIONS = {'csv', 'xlsx'}

app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024  # 16MB

def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

def get_session_dir():
    """获取当前会话的临时目录"""
    if 'user_id' not in session:
        # 使用更安全的方式生成用户ID
        session['user_id'] = f"user_{token_hex(16)}"
        # 记录会话创建时间
        session['created_at'] = datetime.now().timestamp()

    # 检查会话是否过期
    if 'created_at' in session:
        session_age = datetime.now().timestamp() - session['created_at']
        if session_age > 1800:  # 30分钟过期
            # 清理旧文件
            old_session_dir = os.path.join(app.config['UPLOAD_FOLDER'], session['user_id'])
            if os.path.exists(old_session_dir):
                shutil.rmtree(old_session_dir)
            # 重新生成会话
            session.clear()
            return get_session_dir()

    session_dir = os.path.join(app.config['UPLOAD_FOLDER'], session['user_id'])

    if not os.path.exists(session_dir):
        os.makedirs(session_dir, mode=0o700)  # 确保目录权限正确

    return session_dir

def user_cache(f):
    """用户级别的缓存装饰器"""
    cache = {}

    @wraps(f)
    def decorated_function(*args, **kwargs):
        # 演示模式下不需要缓存
        if session.get('is_demo'):
            return f(*args, **kwargs)

        # 获取当前用户ID
        user_id = session.get('user_id')
        if not user_id:
            return f(*args, **kwargs)

        # 检查缓存是否过期
        if user_id in cache:
            cache_time, cached_data = cache[user_id]
            if datetime.now().timestamp() - cache_time < 300:  # 5 minutes cache
                return cached_data

        # 执行函数并缓存结果
        result = f(*args, **kwargs)
        cache[user_id] = (datetime.now().timestamp(), result)

        # 清理其他用户的缓存
        current_time = datetime.now().timestamp()
        expired_keys = [k for k, v in cache.items()
                       if current_time - v[0] > 300]
        for k in expired_keys:
            del cache[k]

        return result

    def clear_cache(user_id):
        if user_id in cache:
            del cache[user_id]

    decorated_function.clear_cache = clear_cache

    return decorated_function

@user_cache
def load_alipay_data():
    try:
        # 演示模式逻辑
        if session.get('is_demo'):
            sample_file = os.path.join(app.static_folder, 'sample_data.csv')
            if not os.path.exists(sample_file):
                raise FileNotFoundError("示例数据文件不存在")

            df = pd.read_csv(sample_file)
            df['交易时间'] = pd.to_datetime(df['交易时间'])
            df['月份'] = df['交易时间'].dt.strftime('%Y-%m')
            df['日期'] = df['交易时间'].dt.strftime('%Y-%m-%d')
            # 确保演示数据也有这些列
            if '是否退款' not in df.columns:
                 df['是否退款'] = False
            if '来源' not in df.columns:
                 df['来源'] = '示例数据'
            return df

        session_dir = get_session_dir()
        all_data = []

        # 读取会话目录中的所有文件
        for filename in os.listdir(session_dir):
            filepath = os.path.join(session_dir, filename)

            # 处理 CSV 文件
            if filename.endswith('.csv'):
                try:
                    # 判断是否为微信账单 (CSV格式)
                    is_wechat_csv = (detect_file_source(filepath) == 'wechat')

                    if is_wechat_csv:
                        # 微信 CSV 处理逻辑
                        with open(filepath, encoding='utf-8-sig') as f:
                            lines = f.readlines()
                            header_row = None
                            for i, line in enumerate(lines):
                                if '交易时间' in line and '交易类型' in line:
                                    header_row = i
                                    break

                        if header_row is not None:
                            df = pd.read_csv(filepath, encoding='utf-8-sig', skiprows=header_row)

                            # 映射列名
                            df = df.rename(columns={
                                '交易类型': '交易分类',
                                '商品': '商品说明',
                                '金额(元)': '金额',
                                '当前状态': '交易状态',
                                '支付方式': '收/付款方式'
                            })

                            # 清理金额列 (移除 '¥')
                            df['金额'] = df['金额'].astype(str).str.replace('¥', '').str.replace(',', '').astype(float)

                            # 处理时间
                            df['交易时间'] = pd.to_datetime(df['交易时间'])
                            df['月份'] = df['交易时间'].dt.strftime('%Y-%m')
                            df['日期'] = df['交易时间'].dt.strftime('%Y-%m-%d')

                            # 标记退款
                            df['是否退款'] = df['交易状态'].astype(str).str.contains('退款|关闭|撤销', case=False, na=False)
                            df.loc[df['是否退款'], '金额'] = -df.loc[df['是否退款'], '金额'].abs()

                            # 确保必要列
                            if '交易对方' not in df.columns:
                                df['交易对方'] = '未知'
                            if '收/支' not in df.columns:
                                df['收/支'] = '/'

                            df['来源'] = '微信'
                            all_data.append(df)

                    else:
                        # 支付宝 CSV 处理逻辑 (默认 gbk)
                        try:
                            # 重新以 GBK 打开 (支付宝通常是 GBK)
                            with open(filepath, encoding='gbk') as f:
                                lines = f.readlines()
                        except UnicodeDecodeError:
                            # 如果 GBK 失败，尝试 UTF-8
                            with open(filepath, encoding='utf-8') as f:
                                lines = f.readlines()

                        header_row = None
                        status_row = None
                        for i, line in enumerate(lines):
                            if '交易状态' in line and not status_row:
                                status_row = i
                            if '交易时间' in line:
                                header_row = i
                                break

                        if header_row is not None:
                            # 读取数据 (使用与上面相同的编码尝试，这里简化，pandas read_csv 也可以 try-except)
                            try:
                                df = pd.read_csv(filepath, encoding='gbk', skiprows=header_row)
                                status_df = pd.read_csv(filepath, encoding='gbk', skiprows=status_row, nrows=1)
                            except UnicodeDecodeError:
                                df = pd.read_csv(filepath, encoding='utf-8', skiprows=header_row)
                                status_df = pd.read_csv(filepath, encoding='utf-8', skiprows=status_row, nrows=1)

                            # ... (原有支付宝处理逻辑) ...
                            status_column = status_df.columns[0]

                            # 数据预处理
                            df['交易时间'] = pd.to_datetime(df['交易时间'])
                            df['月份'] = df['交易时间'].dt.strftime('%Y-%m')
                            df['日期'] = df['交易时间'].dt.strftime('%Y-%m-%d')

                            # 标记交易状态
                            df['是否退款'] = df[status_column].isin(['退款成功', '交易关闭'])
                            df.loc[df['是否退款'], '金额'] = -df.loc[df['是否退款'], '金额']

                            # 添加来源标识
                            df['来源'] = '支付宝'

                            all_data.append(df)

                except Exception as e:
                    logger.error(f"Error processing CSV file {filename}: {str(e)}")
                    continue

            # 处理 XLSX 文件 (微信)
            elif filename.endswith('.xlsx'):
                try:
                    # 微信账单通常头部有16行说明，第17行是标题
                    # 也可以尝试自动寻找标题行，但这里先按固定格式
                    df = pd.read_excel(filepath, header=16, engine='openpyxl')

                    # 检查是否是有效的微信账单（检查关键列）
                    if '交易时间' in df.columns and '金额(元)' in df.columns:
                        # 映射列名以匹配支付宝格式
                        df = df.rename(columns={
                            '交易类型': '交易分类',  # 微信的交易类型对应支付宝的交易分类
                            '商品': '商品说明',
                            '金额(元)': '金额',
                            '当前状态': '交易状态',
                            '支付方式': '收/付款方式'
                        })

                        # 清理金额列 (移除 '¥' 和 ',')
                        df['金额'] = df['金额'].astype(str).str.replace('¥', '').str.replace(',', '').astype(float)

                        # 处理时间
                        df['交易时间'] = pd.to_datetime(df['交易时间'])
                        df['月份'] = df['交易时间'].dt.strftime('%Y-%m')
                        df['日期'] = df['交易时间'].dt.strftime('%Y-%m-%d')

                        # 标记退款
                        # 微信状态包含: '支付成功', '已全额退款', '已退款' 等
                        df['是否退款'] = df['交易状态'].astype(str).str.contains('退款|关闭|撤销', case=False, na=False)

                        # 处理退款金额为负数 (保持与支付宝逻辑一致)
                        df.loc[df['是否退款'], '金额'] = -df.loc[df['是否退款'], '金额'].abs()

                        # 确保所有必要列都存在
                        if '交易对方' not in df.columns:
                            df['交易对方'] = '未知'
                        if '收/支' not in df.columns: # 微信通常有'收/支'
                            df['收/支'] = '/' # 或推断

                        # 添加来源标识
                        df['来源'] = '微信'

                        all_data.append(df)
                    else:
                        logger.warning(f"File {filename} does not look like a standard WeChat bill.")

                except Exception as e:
                    logger.error(f"Error processing Excel file {filename}: {str(e)}")
                    continue

        if not all_data:
            # 修改错误信息，不再只说CSV
            raise FileNotFoundError("未找到任何支付宝(.csv)或微信(.xlsx)账单文件")

        # 合并所有数据
        combined_df = pd.concat(all_data, ignore_index=True)
        combined_df = combined_df.sort_values('交易时间')

        return combined_df

    except Exception as e:
        logger.error(f"Error loading data: {str(e)}")
        raise

def validate_dataframe(df):
    required_columns = ['交易时间', '收/支', '金额', '交易分类', '商品说明']
    missing_columns = [col for col in required_columns if col not in df.columns]
    if missing_columns:
        raise ValueError(f"数据缺少必需列: {', '.join(missing_columns)}")

    # 验证数据类型
    if not pd.api.types.is_numeric_dtype(df['金额']):
        raise ValueError("'金额'列必须是数值类型")

def check_data_exists(f):
    @wraps(f)
    def decorated_function(*args, **kwargs):
        # 演示模式直接通过
        if session.get('is_demo'):
            return f(*args, **kwargs)

        # 如果是 settings 页面，不需要检查数据
        if request.endpoint == 'settings':
            return f(*args, **kwargs)

        if 'user_id' not in session:
            return redirect(url_for('settings'))

        session_dir = get_session_dir()
        has_data = False
        if os.path.exists(session_dir):
            for filename in os.listdir(session_dir):
                if filename.endswith('.csv') or filename.endswith('.xlsx'):
                    has_data = True
                    break

        if not has_data:
            return redirect(url_for('settings'))
        return f(*args, **kwargs)
    return decorated_function

@app.route('/')
@app.route('/index')
def index():
    return render_template('index.html', active_page='index')

@app.route('/yearly')
@check_data_exists
def yearly():
    return render_template('yearly.html', active_page='yearly')

@app.route('/monthly')
@check_data_exists
def monthly():
    return render_template('monthly.html', active_page='monthly')

@app.route('/category')
@check_data_exists
def category():
    return render_template('category.html', active_page='category')

@app.route('/time')
@check_data_exists
def time():
    return render_template('time.html', active_page='time')

@app.route('/transactions')
@check_data_exists
def transactions():
    return render_template('transactions.html', active_page='transactions')

@app.route('/insights')
@check_data_exists
def insights():
    return render_template('insights.html', active_page='insights')

@app.route('/analysis')
@check_data_exists
def analysis():
    return render_template('analysis.html', active_page='analysis')

@app.route('/api/analysis')
def get_analysis():
    try:
        df = load_alipay_data()

        # 获取年份参数
        year = request.args.get('year', type=int)
        if year:
            df = df[df['交易时间'].dt.year == year]

        # 添加金额筛选参数支持
        min_amount = request.args.get('min_amount', type=float)
        max_amount = request.args.get('max_amount', type=float)

        if min_amount:
            df = df[df['金额'] >= min_amount]
        if max_amount:
            df = df[df['金额'] < max_amount]

        # 商家分析
        merchant_analysis = analyze_merchants(df)

        # 消费场景分析
        scenario_analysis = analyze_scenarios(df)

        # 消费习惯分析
        habit_analysis = analyze_habits(df)

        # 高级洞察
        latte_factor = analyze_latte_factor(df)
        nighttime_analysis = analyze_nighttime_spending(df)
        subscription_analysis = analyze_subscriptions(df)
        inflation_analysis = analyze_inflation(df)
        brand_loyalty = analyze_brand_loyalty(df)

        # Phase 2 洞察
        sankey_data = analyze_sankey(df)
        engel_coefficient = analyze_engel_coefficient(df)
        weekend_monday = analyze_weekend_vs_monday(df)
        story_data = generate_story_data(df)

        # 智能标签
        tags = generate_smart_tags(df)

        # 分析支付方式
        payment_analysis = analyze_payment_methods(df)

        # 高级可视化图表数据
        chord_data = generate_chord_data(df)
        funnel_data = generate_funnel_data(df)
        quadrant_data = generate_quadrant_data(df)
        radar_data = generate_radar_data(df)
        wordcloud_data = generate_wordcloud_data(df)
        themeriver_data = generate_themeriver_data(df)

        # 科研风格图表数据
        boxplot_data = generate_boxplot_data(df)
        heatmap_data = generate_heatmap_data(df)
        pareto_data = generate_pareto_data(df)

        return jsonify({
            'success': True,
            'data': {
                'merchant_analysis': merchant_analysis,
                'scenario_analysis': scenario_analysis,
                'habit_analysis': habit_analysis,
                'latte_factor': latte_factor,
                'nighttime_analysis': nighttime_analysis,
                'subscription_analysis': subscription_analysis,
                'inflation_analysis': inflation_analysis,
                'brand_loyalty': brand_loyalty,
                'sankey_data': sankey_data,
                'engel_coefficient': engel_coefficient,
                'weekend_monday': weekend_monday,
                'story_data': story_data,
                'tags': tags,
                'payment_analysis': payment_analysis,
                'chord_data': chord_data,
                'funnel_data': funnel_data,
                'quadrant_data': quadrant_data,
                'radar_data': radar_data,
                'wordcloud_data': wordcloud_data,
                'themeriver_data': themeriver_data,
                'boxplot_data': boxplot_data,
                'heatmap_data': heatmap_data,
                'pareto_data': pareto_data
            }
        })

    except Exception as e:
        logger.error(f"Analysis error: {str(e)}")
        return jsonify({'success': False, 'error': str(e)})

@app.route('/api/monthly_analysis')
def monthly_analysis():
    try:
        df = load_alipay_data()
        year, month = request.args.get('year', type=int), request.args.get('month', type=int)
        min_amount = request.args.get('min_amount', type=float)
        max_amount = request.args.get('max_amount', type=float)

        # 获取当前月份数据
        current_month_df = df[
            (df['交易时间'].dt.year == year) &
            (df['交易时间'].dt.month == month)
        ]

        # 获取上月数据
        last_month = month - 1 if month > 1 else 12
        last_year = year if month > 1 else year - 1
        last_month_df = df[
            (df['交易时间'].dt.year == last_year) &
            (df['交易时间'].dt.month == last_month)
        ]

        # 应用金额筛选
        if min_amount:
            current_month_df = current_month_df[current_month_df['金额'] >= min_amount]
            last_month_df = last_month_df[last_month_df['金额'] >= min_amount]
        if max_amount:
            current_month_df = current_month_df[current_month_df['金额'] < max_amount]
            last_month_df = last_month_df[last_month_df['金额'] < max_amount]

        # 处理收入和支出数据
        current_expense_df = current_month_df[
            (current_month_df['收/支'] == '支出') &
            (~current_month_df['是否退款'])
        ]
        current_income_df = current_month_df[
            (current_month_df['收/支'] == '收入') &
            (~current_month_df['是否退款'])
        ]

        # 计算统计数据
        current_expense = current_expense_df['金额'].sum()
        current_income = current_income_df['金额'].sum()
        current_balance = current_income - current_expense

        # 计算上月数据
        last_expense = last_month_df[
            (last_month_df['收/支'] == '支出') &
            (~last_month_df['是否退款'])
        ]['金额'].sum()
        last_income = last_month_df[
            (last_month_df['收/支'] == '收入') &
            (~last_month_df['是否退款'])
        ]['金额'].sum()
        last_balance = last_income - last_expense

        # 按日期统计
        daily_expenses = current_expense_df.groupby(
            current_expense_df['交易时间'].dt.date
        )['金额'].sum()
        daily_incomes = current_income_df.groupby(
            current_income_df['交易时间'].dt.date
        )['金额'].sum()

        # 计算分类统计
        expense_categories = current_expense_df.groupby('交易分类')['金额'].sum()
        income_categories = current_income_df.groupby('交易分类')['金额'].sum()

        # 计算分来源的分类统计
        expense_source = current_expense_df.groupby(['来源', '交易分类'])['金额'].sum().reset_index().to_dict('records')
        income_source = current_income_df.groupby(['来源', '交易分类'])['金额'].sum().reset_index().to_dict('records')

        # 生成当月所有日期
        import calendar
        last_day = calendar.monthrange(year, month)[1]
        all_dates = [
            datetime(year, month, day).date()
            for day in range(1, last_day + 1)
        ]

        # 补充所有日期，缺失的填充0
        daily_expenses = daily_expenses.reindex(all_dates, fill_value=0)
        daily_incomes = daily_incomes.reindex(all_dates, fill_value=0)

        return jsonify({
            'success': True,
            'data': {
                'stats': {
                    'balance': float(current_balance),
                    'total_expense': float(current_expense),
                    'total_income': float(current_income),
                    'expense_count': int(len(current_expense_df)),
                    'income_count': int(len(current_income_df)),
                    'comparisons': {
                        'balance': {
                            'change': float(current_balance - last_balance),
                            'rate': float((current_balance - last_balance) / abs(last_balance) * 100) if last_balance != 0 else None
                        },
                        'expense': {
                            'change': float(current_expense - last_expense),
                            'rate': float((current_expense - last_expense) / last_expense * 100) if last_expense != 0 else None
                        },
                        'income': {
                            'change': float(current_income - last_income),
                            'rate': float((current_income - last_income) / last_income * 100) if last_income != 0 else None
                        }
                    }
                },
                'daily_data': {
                    'expense': {
                        'dates': [d.strftime('%Y-%m-%d') for d in all_dates],
                        'amounts': daily_expenses.values.tolist()
                    },
                    'income': {
                        'dates': [d.strftime('%Y-%m-%d') for d in all_dates],
                        'amounts': daily_incomes.values.tolist()
                    }
                },
                'categories': {
                    'expense': {
                        'names': expense_categories.index.tolist(),
                        'amounts': expense_categories.values.tolist()
                    },
                    'income': {
                        'names': income_categories.index.tolist(),
                        'amounts': income_categories.values.tolist()
                    }
                },
                'categories_source': {
                    'expense': expense_source,
                    'income': income_source
                }
            }
        })

    except Exception as e:
        logger.error(f"Error in monthly analysis: {str(e)}")
        return jsonify({'success': False, 'error': str(e)})

@app.route('/api/category_expenses')
def category_expenses():
    df = load_alipay_data()

    # 计算分类支出
    category_stats = df[df['收/支'] == '支出'].groupby('交易分类').agg({
        '金额': 'sum'
    }).sort_values('金额', ascending=False)

    # 准备ECharts数据格式
    data = {
        'categories': category_stats.index.tolist(),
        'amounts': category_stats['金额'].tolist()
    }

    return jsonify(data)

@app.route('/api/transactions')
def get_transactions():
    try:
        df = load_alipay_data()

        # 获取分页参数
        page = request.args.get('page', 1, type=int)
        per_page = request.args.get('per_page', 20, type=int)  # 默认每页20条

        # 获取筛选参数
        year = request.args.get('year', type=int)
        month = request.args.get('month', type=int)
        date = request.args.get('date')
        hour = request.args.get('hour', type=int)
        category = request.args.get('category')
        min_amount = request.args.get('min_amount', type=float)
        max_amount = request.args.get('max_amount', type=float)

        # 获取交易类型参数（收入/支出）
        type = request.args.get('type')

        # 获取搜索参数
        search_query = request.args.get('search')

        # 应用筛选条件
        if type:
            df = df[df['收/支'] == type]  # 根据收入/支出类型筛选

        if search_query:
            # 在商品说明、交易对方、交易分类中搜索
            mask = (
                df['商品说明'].astype(str).str.contains(search_query, case=False, na=False) |
                df['交易对方'].astype(str).str.contains(search_query, case=False, na=False) |
                df['交易分类'].astype(str).str.contains(search_query, case=False, na=False)
            )
            df = df[mask]

        if year:
            df = df[df['交易时间'].dt.year == year]
        if month:
            df = df[df['交易时间'].dt.month == month]
        if date:
            df = df[df['日期'] == date]
        if hour is not None:
            df = df[df['交易时间'].dt.hour == hour]
        if category:
            df = df[df['交易分类'] == category]
        if min_amount:
            df = df[df['金额'] >= min_amount]
        if max_amount:
            df = df[df['金额'] <= max_amount]

        # 排除"不计收支"的交易
        df = df[df['收/支'].isin(['收入', '支出'])]

        # 排除退款交易
        df = df[~df['是否退款']]

        # 按时间倒序排序
        df = df.sort_values('交易时间', ascending=False)

        # 计算总记录数和总页数
        total_records = len(df)
        total_pages = (total_records + per_page - 1) // per_page

        # 确保页码在有效范围内
        page = max(1, min(page, total_pages))

        # 计算当前页的数据范围
        start_idx = (page - 1) * per_page
        end_idx = min(start_idx + per_page, total_records)

        # 获取当前页的数据
        page_df = df.iloc[start_idx:end_idx]

        # 转换为列表，处理 NaN 值
        transactions = []
        for _, row in page_df.iterrows():
            transactions.append({
                'time': row['交易时间'].strftime('%Y-%m-%d %H:%M:%S'),
                'description': str(row['商品说明']),
                'category': str(row['交易分类']),
                'type': str(row['收/支']),
                'amount': float(row['金额']),
                'status': str(row['交易状态']),
                'counterparty': str(row.get('交易对方', '')) if pd.notna(row.get('交易对方')) else ''
            })

        # 返回数据，包含分页信息
        return jsonify({
            'success': True,
            'transactions': transactions,
            'pagination': {
                'current_page': page,
                'per_page': per_page,
                'total_pages': total_pages,
                'total_records': total_records
            }
        })

    except Exception as e:
        logger.error(f"获取交易记录时出错: {str(e)}", exc_info=True)
        return jsonify({
            'success': False,
            'error': f'获取交易记录失败: {str(e)}'
        }), 500

@app.route('/api/summary')
def summary():
    df = load_alipay_data()

    # 获取当前自然月
    current_date = datetime.now()
    current_month = current_date.strftime('%Y-%m')

    # 计算基础统计信息
    expense_df = df[df['收/支'] == '支出']
    total_expense = expense_df['金额'].sum()
    total_income = df[df['收/支'] == '收入']['金额'].sum()

    # 按月份分组计算支出
    monthly_expenses = expense_df.groupby('月份')['金额'].sum()

    # 获取最新月份的支出
    latest_month = monthly_expenses.index[-1]
    latest_month_expense = monthly_expenses[latest_month]

    # 获取当前自然月的支出（如果有数据的话）
    current_month_expense = monthly_expenses.get(current_month)

    # 确定要显示的月份和支出金额
    display_month = current_month if current_month_expense is not None else latest_month
    display_expense = current_month_expense if current_month_expense is not None else latest_month_expense

    # 计算环比变化（与上一个月相比）
    if len(monthly_expenses) > 1:
        prev_month_expense = monthly_expenses.iloc[-2]
    else:
        prev_month_expense = display_expense

    return jsonify({
        'total_expense': round(total_expense, 2),
        'total_income': round(total_income, 2),
        'balance': round(total_income - total_expense, 2),
        'monthly_avg': round(monthly_expenses.mean(), 2),
        'current_month_expense': round(display_expense, 2),
        'prev_monthly_avg': round(prev_month_expense, 2),
        'month_count': len(monthly_expenses),
        'transaction_count': len(expense_df),
        'current_month': display_month,
        'has_current_month_data': current_month_expense is not None
    })

@app.route('/api/daily_data')
def daily_data():
    """获取热力图数据，支持年份筛选"""
    try:
        df = load_alipay_data()

        # 获取查询参数
        year = request.args.get('year', type=int)
        filter_type = request.args.get('filter', 'all')

        # 根据筛选条件过滤数据
        if filter_type == 'large':
            df = df[df['金额'] > 1000]
        elif filter_type == 'small':
            df = df[df['金额'] <= 1000]

        # 如果指定了年份，过滤对应年份的数据
        if year:
            df = df[df['交易时间'].dt.year == year]

        # 排除"不计收支"的交易
        df = df[df['收/支'].isin(['收入', '支出'])]

        # 计算每日数据
        daily_data = df.groupby(['日期', '收/支']).agg({
            '金额': 'sum',
            '交易时间': 'count'
        }).reset_index()

        # 准备热力图数据
        expense_data = []
        income_data = []
        transaction_data = []

        for date, group in daily_data.groupby('日期'):
            date_str = date

            # 支出数据
            expense = group[group['收/支'] == '支出']
            if not expense.empty:
                expense_data.append([date_str, float(expense['金额'].iloc[0])])

            # 收入数据
            income = group[group['收/支'] == '收入']
            if not income.empty:
                income_data.append([date_str, float(income['金额'].iloc[0])])

            # 交易笔数 - 收入和支出的总和
            transaction_count = group['交易时间'].sum()
            transaction_data.append([date_str, int(transaction_count)])

        # 计算分位数
        expense_amounts = [x[1] for x in expense_data]
        income_amounts = [x[1] for x in income_data]

        expense_quantiles = []
        income_quantiles = []

        if expense_amounts:
            expense_quantiles = [
                round(float(x), 2) for x in np.quantile(expense_amounts, [0.2, 0.4, 0.6, 0.8])
            ]

        if income_amounts:
            income_quantiles = [
                round(float(x), 2) for x in np.quantile(income_amounts, [0.2, 0.4, 0.6, 0.8])
            ]

        return jsonify({
            'expense': expense_data,
            'income': income_data,
            'transaction': transaction_data,
            'expense_quantiles': expense_quantiles,
            'income_quantiles': income_quantiles
        })

    except Exception as e:
        logger.error(f"Error in daily data: {str(e)}")
        return jsonify({'error': str(e)}), 500

@app.route('/api/category_detail/<month>/<category>')
def category_detail(month, category):
    df = load_alipay_data()

    # 获取指定月份和分类的支出明细，添加交易对方列
    details = df[
        (df['月份'] == month) &
        (df['交易分类'] == category) &
        (df['收/支'] == '支出')
    ].sort_values('金额', ascending=False)[
        ['交易时间', '商品说明', '交易对方', '金额', '交易状态']
    ].to_dict('records')

    # 格式化数据
    formatted_details = [{
        'time': detail['交易时间'].strftime('%Y-%m-%d %H:%M:%S'),
        'description': detail['商品说明'],
        'counterparty': detail['交易对方'],  # 添加交易对方
        'amount': round(float(detail['金额']), 2),
        'status': detail['交易状态']
    } for detail in details]

    return jsonify(formatted_details)

@app.route('/api/top_transactions')
def get_top_transactions():
    """获取大额交易记录"""
    try:
        # 获取查询参数
        limit = int(request.args.get('limit', 10))  # 默认返回前10条
        min_amount = float(request.args.get('min_amount', 1000))  # 默认1000元以上

        # 加载数据
        df = load_alipay_data()

        # 筛选支出交易
        expense_df = df[df['收/支'] == '支出'].copy()

        # 筛选大额交易
        large_transactions = expense_df[expense_df['金额'] >= min_amount]

        # 按金额排序并限制数量
        top_transactions = large_transactions.nlargest(limit, '金额')

        # 转换为列表
        transactions = []
        for _, row in top_transactions.iterrows():
            transactions.append({
                'time': row['交易时间'].strftime('%Y-%m-%d %H:%M:%S'),
                'date': row['交易时间'].strftime('%Y-%m-%d'),
                'category': row['交易分类'],
                'description': row['商品说明'],
                'amount': float(row['金额']),
                'status': row['交易状态'],
                'counterparty': row.get('交易对方', '')
            })

        return jsonify({
            'success': True,
            'transactions': transactions
        })

    except Exception as e:
        logger.error(f"获取大额交易记录时出错: {str(e)}", exc_info=True)
        return jsonify({
            'success': False,
            'error': f'获取大额交易记录失败: {str(e)}'
        }), 500

@app.route('/api/category_trend/<category>')
def category_trend(category):
    df = load_alipay_data()

    # 获取指定分类的月度数据
    category_df = df[
        (df['收/支'] == '支出') &
        (df['交易分类'] == category)
    ]

    # 按月份分组计算各项指标
    monthly_stats = category_df.groupby('月份').agg({
        '金额': ['sum', 'count', 'mean'],  # 总金额、交易次数、平均金额
        '交易时间': lambda x: len(x.dt.date.unique())  # 有交易的天数
    }).round(2)

    # 重命名列
    monthly_stats.columns = ['total', 'transactions', 'avg_amount', 'active_days']

    # 计算日均支出（总金额/当月有交易的天数）
    monthly_stats['daily_avg'] = (monthly_stats['total'] / monthly_stats['active_days']).round(2)

    # 计算环比变化率
    monthly_stats['mom_rate'] = (monthly_stats['total'].pct_change() * 100).round(2)

    # 计算占比
    total_expense = df[
        (df['收/支'] == '支出')
    ].groupby('月份')['金额'].sum()
    monthly_stats['percentage'] = (monthly_stats['total'] / total_expense * 100).round(2)

    # 准备返回数据
    result = {
        'months': monthly_stats.index.tolist(),
        'total': monthly_stats['total'].tolist(),
        'transactions': monthly_stats['transactions'].tolist(),
        'avg_amount': monthly_stats['avg_amount'].tolist(),
        'daily_avg': monthly_stats['daily_avg'].tolist(),
        'mom_rate': monthly_stats['mom_rate'].fillna(0).tolist(),
        'percentage': monthly_stats['percentage'].tolist(),
        'summary': {
            'total_amount': category_df['金额'].sum().round(2),
            'total_transactions': len(category_df),
            'max_month': monthly_stats['total'].idxmax(),
            'max_amount': monthly_stats['total'].max().round(2),