๐Ÿ Python

231221 THU ํŒŒ์ด์ฌ ๋ณต์Šต (2) CRM ํƒ€์ด๋ฐ ๋ถ„์„

ํ–‰ํŒฝ 2023. 12. 21. 15:06

1. ๋ฐ์ดํ„ฐ ํƒ€์ž… ํ™•์ธ

import pandas as pd
sparta_data = pd.read_table('/content/access_detail.csv', sep=',')
sparta_data.head()

# type() - ๋ฐ์ดํ„ฐ ์ข…๋ฅ˜ ํ™•์ธ
print(type(sparta_data['access_date'][1]))

 

 

 

2. ์š”์ผ๋ณ„, ์‹œ๊ฐ„๋ณ„ ์ˆ˜๊ฐ•์ƒ ์ˆ˜ ๊ตฌํ•˜๊ธฐ

  • ๋ฌธ์ž๋ฅผ ๋‚ ์งœ/์‹œ๊ฐ„ ๋ฐ์ดํ„ฐ๋กœ ๋ณ€ํ™˜
  • ์š”์ผ ๋ฐ์ดํ„ฐ, ์‹œ๊ฐ„ ๋ฐ์ดํ„ฐ ์ถ”๊ฐ€
import pandas as pd
sparta_data = pd.read_table('/content/access_detail.csv', sep=',')

# ๋ฌธ์ž๋ฅผ ๋‚ ์งœ/์‹œ๊ฐ„ ๋ฐ์ดํ„ฐ๋กœ ๋ณ€ํ˜•
format='%Y-%m-%dT%H:%M:%S.%f'
sparta_data['access_date_time'] = pd.to_datetime(sparta_data['access_date'], format=format)
sparta_data.tail(5)

# ์š”์ผ, ์‹œ๊ฐ„ ๋ฐ์ดํ„ฐ ์ถ”๊ฐ€
sparta_data['access_date_time_weekday'] = sparta_data['access_date_time'].dt.day_name()
sparta_data['access_date_time_hour'] = sparta_data['access_date_time'].dt.hour
sparta_data.tail(5)

weeks = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
weekdata = sparta_data.groupby('access_date_time_weekday')['user_id'].count()    #weekday ๊ทธ๋ฃนํ™”ํ•˜์—ฌ user_id ๊ฐฏ์ˆ˜ ์„ธ๊ธฐ

weekdata = weekdata.agg(weeks)    #์š”์ผ ์ˆœ์œผ๋กœ ๋ฐฐ์—ด

hourdata = sparta_data.groupby('access_date_time_hour')['user_id'].count()
hourdata = hourdata.sort_index()  #์˜ค๋ฆ„์ฐจ์ˆœ

weekdata, hourdata

 

 

 

3. ์š”์ผ๋ณ„ ์ˆ˜๊ฐ•์ƒ ๋ง‰๋Œ€๊ทธ๋ž˜ํ”„

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
sparta_data = pd.read_table('/content/access_detail.csv', sep=',')

# ๋ฌธ์ž๋ฅผ ๋‚ ์งœ/์‹œ๊ฐ„ ๋ฐ์ดํ„ฐ๋กœ ๋ณ€ํ˜•
format='%Y-%m-%dT%H:%M:%S.%f'
sparta_data['access_date_time'] = pd.to_datetime(sparta_data['access_date'], format=format)
sparta_data.tail(5)

# ์š”์ผ, ์‹œ๊ฐ„ ๋ฐ์ดํ„ฐ ์ถ”๊ฐ€
sparta_data['access_date_time_weekday'] = sparta_data['access_date_time'].dt.day_name()
sparta_data['access_date_time_hour'] = sparta_data['access_date_time'].dt.hour
sparta_data.tail(5)

weeks = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
weekdata = sparta_data.groupby('access_date_time_weekday')['user_id'].count()    #weekday ๊ทธ๋ฃนํ™”ํ•˜์—ฌ user_id ๊ฐฏ์ˆ˜ ์„ธ๊ธฐ

weekdata = weekdata.agg(weeks)    #์š”์ผ ์ˆœ์œผ๋กœ ๋ฐฐ์—ด

hourdata = sparta_data.groupby('access_date_time_hour')['user_id'].count()
hourdata = hourdata.sort_index()  #์˜ค๋ฆ„์ฐจ์ˆœ

plt.figure(figsize=(10,5))
plt.bar(weekdata.index, weekdata)
plt.title('์š”์ผ๋ณ„ ์ˆ˜๊ฐ• ์™„๋ฃŒ ์ˆ˜๊ฐ•์ƒ ์ˆ˜')
plt.xlabel('์š”์ผ')
plt.ylabel('์ˆ˜๊ฐ•์ƒ(๋ช…)')
plt.xticks(rotation=90)
plt.show()

 

 

 

4. ์‹œ๊ฐ„๋ณ„ ์ˆ˜๊ฐ•์ƒ ์„ ๊ทธ๋ž˜ํ”„

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
sparta_data = pd.read_table('/content/access_detail.csv', sep=',')

# ๋ฌธ์ž๋ฅผ ๋‚ ์งœ/์‹œ๊ฐ„ ๋ฐ์ดํ„ฐ๋กœ ๋ณ€ํ˜•
format='%Y-%m-%dT%H:%M:%S.%f'
sparta_data['access_date_time'] = pd.to_datetime(sparta_data['access_date'], format=format)
sparta_data.tail(5)

# ์š”์ผ, ์‹œ๊ฐ„ ๋ฐ์ดํ„ฐ ์ถ”๊ฐ€
sparta_data['access_date_time_weekday'] = sparta_data['access_date_time'].dt.day_name()
sparta_data['access_date_time_hour'] = sparta_data['access_date_time'].dt.hour
sparta_data.tail(5)

weeks = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
weekdata = sparta_data.groupby('access_date_time_weekday')['user_id'].count()    #weekday ๊ทธ๋ฃนํ™”ํ•˜์—ฌ user_id ๊ฐฏ์ˆ˜ ์„ธ๊ธฐ

weekdata = weekdata.agg(weeks)    #์š”์ผ ์ˆœ์œผ๋กœ ๋ฐฐ์—ด

hourdata = sparta_data.groupby('access_date_time_hour')['user_id'].count()
hourdata = hourdata.sort_index()  #์˜ค๋ฆ„์ฐจ์ˆœ

plt.figure(figsize=(10,5))
plt.plot(hourdata.index, hourdata)
plt.title('์‹œ๊ฐ„๋ณ„ ์ˆ˜๊ฐ• ์™„๋ฃŒ ์‚ฌ์šฉ์ž ์ˆ˜')
plt.xlabel('์‹œ๊ฐ„')
plt.ylabel('์‚ฌ์šฉ์ž(๋ช…)')
plt.xticks(np.arange(24))
plt.show()

 

 

 

5. ์š”์ผ๋ณ„ ์ ‘์† ์‹œ๊ฐ„ ํžˆํŠธ๋งต

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
sparta_data = pd.read_table('/content/access_detail.csv', sep=',')

# ๋ฌธ์ž๋ฅผ ๋‚ ์งœ/์‹œ๊ฐ„ ๋ฐ์ดํ„ฐ๋กœ ๋ณ€ํ˜•
format='%Y-%m-%dT%H:%M:%S.%f'
sparta_data['access_date_time'] = pd.to_datetime(sparta_data['access_date'], format=format)
sparta_data.tail(5)

# ์š”์ผ, ์‹œ๊ฐ„ ๋ฐ์ดํ„ฐ ์ถ”๊ฐ€
sparta_data['access_date_time_weekday'] = sparta_data['access_date_time'].dt.day_name()
sparta_data['access_date_time_hour'] = sparta_data['access_date_time'].dt.hour
sparta_data.tail(5)

weeks = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
weekdata = sparta_data.groupby('access_date_time_weekday')['user_id'].count()    #weekday ๊ทธ๋ฃนํ™”ํ•˜์—ฌ user_id ๊ฐฏ์ˆ˜ ์„ธ๊ธฐ

weekdata = weekdata.agg(weeks)    #์š”์ผ ์ˆœ์œผ๋กœ ๋ฐฐ์—ด

hourdata = sparta_data.groupby('access_date_time_hour')['user_id'].count()
hourdata = hourdata.sort_index()  #์˜ค๋ฆ„์ฐจ์ˆœ

#ํ”ผ๋ฒ—ํ…Œ์ด๋ธ” ๋งŒ๋“ค๊ธฐ
#values : ์—ด์— ๋“ค์–ด ๊ฐ€๋Š” ๋ถ€๋ถ„
#index : ํ–‰์— ๋“ค์–ด๊ฐ€๋Š” ๋ถ€๋ถ„
#aggfunc : ๋ฐ์ดํ„ฐ ์ถ•์•ฝ์‹œ ์‚ฌ์šฉํ•  ํ•จ์ˆ˜

sparta_data_pivot_table = pd.pivot_table(sparta_data, values='user_id',
                       index=['access_date_time_weekday'],
                       columns=['access_date_time_hour'],
                       aggfunc="count").agg(weeks)

#ํžˆํŠธ๋งต ๊ทธ๋ฆฌ๊ธฐ
plt.figure(figsize=(14,5))
plt.pcolor(sparta_data_pivot_table)
plt.xticks(np.arange(0.5, len(sparta_data_pivot_table.columns), 1), sparta_data_pivot_table.columns)
plt.yticks(np.arange(0.5, len(sparta_data_pivot_table.index), 1), sparta_data_pivot_table.index)

plt.title('์š”์ผ๋ณ„ ์ข…๋ฃŒ ์‹œ๊ฐ„ ํžˆํŠธ๋งต')
plt.xlabel('์‹œ๊ฐ„')
plt.ylabel('์š”์ผ')
plt.colorbar()  #์ˆซ์ž๋ณ„ ์ƒ‰์ƒ๊ฐ’์„ ๋‚˜ํƒ€๋‚ด๋Š” ์ปฌ๋Ÿฌ๋ฐ”
plt.show()

 

 

 

6. ๋ถ„์„ ๊ฒฐ๊ณผ

ํ™”์š”์ผ ์ €๋… ์‹œ๊ฐ„์— ์ˆ˜๊ฐ• ๋…๋ ค ๋ฌธ์ž ์ „์†ก