# coding: utf-8 # Input file format : Twilog archive (2021/02) import re targetYear = 21 fileName = 'bokupi210209.csv' f = open(fileName, 'r', encoding="utf-8_sig") totalMinutes = 0 while True: line = f.readline() if not line: print('reached to end of file') break if not isinstance(line, str): break items = line.split(',') # skip if items length is not 3, because the line is not regular style, maybe it is a part of tweet. if len(items) != 3: continue tweetDate = items[1] tweetText = items[2] # get study time matchStudy = re.search(r'(#mandarin\s*)(#study\s*)(\d+)(\s*)', tweetText) # get date info # matchDate = re.search(r'(\d{4})-(\d{2}-\d{2} \d{2}:\d{2}:\d{2})', tweetDate) matchDate = re.search(r'(\d{2})(\d{4} \d{6})', tweetDate) if matchDate and matchStudy: if int(matchDate.group(1)) == targetYear: if matchStudy: # print('found :',matchStudy.group(3)) totalMinutes += int(matchStudy.group(3)) print('合計時間 ',totalMinutes,'分')