# coding: utf-8

# Input file format : Twilog archive (2021/02)

import re

targetYear = 21

fileName = 'bokupi210209.csv'
f = open(fileName, 'r', encoding="utf-8_sig")

totalMinutes = 0

while True:
    line = f.readline()
    
    if not line:
        print('reached to end of file')
        break
    if not isinstance(line, str):
        break

    items = line.split(',')
    
    # skip if items length is not 3, because the line is not regular style, maybe it is a part of tweet.
    if len(items) != 3:
        continue

    tweetDate = items[1]
    tweetText = items[2]

    # get study time
    matchStudy = re.search(r'(#mandarin\s*)(#study\s*)(\d+)(\s*)', tweetText)

    # get date info
#    matchDate = re.search(r'(\d{4})-(\d{2}-\d{2} \d{2}:\d{2}:\d{2})', tweetDate)
    matchDate = re.search(r'(\d{2})(\d{4} \d{6})', tweetDate)

    if matchDate and matchStudy:
        if int(matchDate.group(1)) == targetYear:
            if matchStudy:
#                print('found :',matchStudy.group(3))
                totalMinutes += int(matchStudy.group(3))

print('合計時間 ',totalMinutes,'分')