ㄱㄱ

카테고리 없음
ㄱㄱ

저기여... 2025. 10. 23. 11:20
**결론**: 한국식 날짜 형식("2025. 9. 23. 오전 9:44")을 파싱하고 매 시간별로 분석하는 코드입니다.

**구현**:
```python
import re
from datetime import datetime
from collections import defaultdict, Counter

def analyze_chat_statistics(file_path):
    """
    텍스트 파일에서 시간과 대화 내용을 분석하여 통계 생성
    
    지원 형식: "2025. 9. 23. 오전 9:44"
    """
    
    # 통계 저장용 딕셔너리
    stats = {
        'daily_count': defaultdict(int),      # 일자별 대화 수
        'hourly_count': Counter(),            # 시간대별 대화 수 (0-23시)
        'weekday_count': Counter(),           # 요일별 대화 수
        'total_messages': 0,
        'date_range': {'start': None, 'end': None}
    }
    
    # 정규표현식: "2025. 9. 23. 오전 9:44" 형식
    pattern = r'(\d{4})\.\s*(\d{1,2})\.\s*(\d{1,2})\.\s*(오전|오후)\s*(\d{1,2}):(\d{2})'
    
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            for line_num, line in enumerate(f, 1):
                line = line.strip()
                if not line:
                    continue
                
                match = re.search(pattern, line)
                if match:
                    try:
                        year = int(match.group(1))
                        month = int(match.group(2))
                        day = int(match.group(3))
                        ampm = match.group(4)
                        hour = int(match.group(5))
                        minute = int(match.group(6))
                        
                        # 오전/오후를 24시간 형식으로 변환
                        if ampm == '오후' and hour != 12:
                            hour += 12
                        elif ampm == '오전' and hour == 12:
                            hour = 0
                        
                        # 날짜 객체 생성
                        date_obj = datetime(year, month, day, hour, minute)
                        date_key = date_obj.strftime('%Y-%m-%d')
                        weekday = ['월', '화', '수', '목', '금', '토', '일'][date_obj.weekday()]
                        
                        # 통계 업데이트
                        stats['total_messages'] += 1
                        stats['daily_count'][date_key] += 1
                        stats['weekday_count'][weekday] += 1
                        stats['hourly_count'][hour] += 1
                        
                        # 날짜 범위 업데이트
                        if stats['date_range']['start'] is None or date_obj < stats['date_range']['start']:
                            stats['date_range']['start'] = date_obj
                        if stats['date_range']['end'] is None or date_obj > stats['date_range']['end']:
                            stats['date_range']['end'] = date_obj
                        
                    except (ValueError, IndexError) as e:
                        print(f"⚠️  {line_num}번째 줄 파싱 오류: {e}")
                        print(f"    내용: {line[:100]}")
                        continue
                        
    except FileNotFoundError:
        print(f"❌ 파일을 찾을 수 없습니다: {file_path}")
        return None
    except Exception as e:
        print(f"❌ 파일 읽기 오류: {e}")
        return None
    
    return stats


def print_statistics(stats):
    """통계 결과를 보기 좋게 출력"""
    
    if stats is None or stats['total_messages'] == 0:
        print("분석할 데이터가 없습니다.")
        return
    
    print("\n" + "="*60)
    print("📊 대화 통계 분석 결과")
    print("="*60)
    
    # 전체 통계
    print(f"\n📝 전체 메시지 수: {stats['total_messages']:,}개")
    
    if stats['date_range']['start']:
        print(f"📅 기간: {stats['date_range']['start'].strftime('%Y-%m-%d')} ~ "
              f"{stats['date_range']['end'].strftime('%Y-%m-%d')}")
        days_diff = (stats['date_range']['end'] - stats['date_range']['start']).days + 1
        print(f"   (총 {days_diff}일간)")
        print(f"📈 일평균: {stats['total_messages'] / days_diff:.1f}개")
    
    # 일자별 통계 (상위 10개)
    if stats['daily_count']:
        print("\n" + "-"*60)
        print("📆 일자별 대화량 (상위 10일)")
        print("-"*60)
        sorted_daily = sorted(stats['daily_count'].items(), key=lambda x: x[1], reverse=True)[:10]
        max_daily = max(count for _, count in sorted_daily)
        for date, count in sorted_daily:
            bar = '█' * (count * 40 // max_daily)
            print(f"{date}: {count:4d}개 {bar}")
    
    # 요일별 통계
    if stats['weekday_count']:
        print("\n" + "-"*60)
        print("📅 요일별 대화량")
        print("-"*60)
        weekdays = ['월', '화', '수', '목', '금', '토', '일']
        max_count = max(stats['weekday_count'].values()) if stats['weekday_count'] else 1
        for day in weekdays:
            count = stats['weekday_count'][day]
            bar = '█' * (count * 40 // max_count) if count > 0 else ''
            percentage = (count / stats['total_messages'] * 100) if stats['total_messages'] > 0 else 0
            print(f"{day}요일: {count:4d}개 ({percentage:5.1f}%) {bar}")
    
    # 시간대별 통계 (매 시간별)
    if stats['hourly_count']:
        print("\n" + "-"*60)
        print("🕐 시간별 대화량 (매 시간)")
        print("-"*60)
        max_count = max(stats['hourly_count'].values())
        for hour in range(24):
            count = stats['hourly_count'][hour]
            bar = '█' * (count * 40 // max_count) if count > 0 else ''
            percentage = (count / stats['total_messages'] * 100) if stats['total_messages'] > 0 else 0
            
            # 오전/오후 표시
            if hour == 0:
                time_str = "오전 12시"
            elif hour < 12:
                time_str = f"오전 {hour:2d}시"
            elif hour == 12:
                time_str = "오후 12시"
            else:
                time_str = f"오후 {hour-12:2d}시"
            
            print(f"{time_str}: {count:4d}개 ({percentage:5.1f}%) {bar}")
    
    # 시간대 구간별 요약
    if stats['hourly_count']:
        print("\n" + "-"*60)
        print("⏰ 시간대 구간별 요약")
        print("-"*60)
        time_periods = {
            '새벽 (00-06시)': sum(stats['hourly_count'][h] for h in range(0, 6)),
            '오전 (06-12시)': sum(stats['hourly_count'][h] for h in range(6, 12)),
            '오후 (12-18시)': sum(stats['hourly_count'][h] for h in range(12, 18)),
            '저녁 (18-24시)': sum(stats['hourly_count'][h] for h in range(18, 24))
        }
        for period, count in time_periods.items():
            percentage = (count / stats['total_messages'] * 100) if stats['total_messages'] > 0 else 0
            print(f"{period}: {count:5d}개 ({percentage:5.1f}%)")
    
    # 가장 활발한 시간대
    if stats['hourly_count']:
        most_active_hour = max(stats['hourly_count'].items(), key=lambda x: x[1])
        if most_active_hour[0] < 12:
            time_display = f"오전 {most_active_hour[0] if most_active_hour[0] != 0 else 12}시"
        else:
            time_display = f"오후 {most_active_hour[0]-12 if most_active_hour[0] != 12 else 12}시"
        
        print(f"\n🔥 가장 활발한 시간: {time_display} ({most_active_hour[1]}개)")
    
    # 가장 활발한 요일
    if stats['weekday_count']:
        most_active_day = max(stats['weekday_count'].items(), key=lambda x: x[1])
        print(f"🔥 가장 활발한 요일: {most_active_day[0]}요일 ({most_active_day[1]}개)")
    
    print("\n" + "="*60)


# 실행 예시
if __name__ == "__main__":
    # 파일 경로 지정
    file_path = "chat_log.txt"  # 여기에 실제 파일명을 입력하세요
    
    print("🔍 파일 분석 중...")
    stats = analyze_chat_statistics(file_path)
    print_statistics(stats)
```

**주요 변경사항**:
1. ✅ **날짜 형식**: "2025. 9. 23. 오전 9:44" 파싱
2. ✅ **오전/오후 변환**: 자동으로 24시간 형식으로 변환
3. ✅ **매 시간별 분석**: 0시~23시까지 각 시간별 통계
4. ✅ **시간 표시**: 출력 시 "오전 9시", "오후 3시" 형식으로 표시
5. ✅ **퍼센트 추가**: 각 시간대와 요일별 비율 표시
6. ✅ **최다 시간/요일**: 가장 활발한 시간대와 요일 표시

**사용 방법**:
```bash
python chat_stats.py
```

파일 경로만 수정하면 바로 실행됩니다! 📊