6手机数据处理
在这个例子中,我们将介绍如何使用TransBigData来处理手机数据。
首先,导入TransBigData并使用pandas读取数据
[1]:
import pandas as pd
import transbigdata as tbd
data = pd.read_csv(r'data/mobiledata_sample.csv')
#make sure the time column is correct
data['stime'] = pd.to_datetime(data['stime'], format='%Y%m%d%H%M')
data = data.sort_values(by = ['user_id','stime'])
data.head()
[1]:
user_id | stime | longitude | latitude | date | |
---|---|---|---|---|---|
78668 | 00466ab30de56db7efbd04991b680ae1 | 2018-06-01 00:00:00 | 121.43 | 30.175 | 20180601 |
78669 | 00466ab30de56db7efbd04991b680ae1 | 2018-06-01 03:35:00 | 121.43 | 30.175 | 20180601 |
78670 | 00466ab30de56db7efbd04991b680ae1 | 2018-06-01 04:25:00 | 121.43 | 30.175 | 20180601 |
78671 | 00466ab30de56db7efbd04991b680ae1 | 2018-06-01 05:15:00 | 121.43 | 30.175 | 20180601 |
78289 | 00466ab30de56db7efbd04991b680ae1 | 2018-06-01 06:05:00 | 121.43 | 30.175 | 20180601 |
从手机轨迹数据中识别停留和移动信息
在处理手机数据时,TransBigData的方法是首先将数据与网格对应,并将同一网格内的数据视为位于同一位置,以避免导致同一位置被识别为多个位置的数据定位错误。
[3]:
#Obtain gridding parameters
params = tbd.area_to_params([121.860, 29.295, 121.862, 29.301], accuracy=500)
#Identify stay and move infomation from mobile phone trajectory data
stay,move = tbd.traj_stay_move(data,params,col = ['user_id','stime','longitude', 'latitude'])
[4]:
stay.head()
[4]:
user_id | stime | LONCOL | LATCOL | etime | lon | lat | duration | stayid | |
---|---|---|---|---|---|---|---|---|---|
78668 | 00466ab30de56db7efbd04991b680ae1 | 2018-06-01 00:00:00 | -83 | 196 | 2018-06-01 07:21:00 | 121.430 | 30.175 | 26460.0 | 0 |
78303 | 00466ab30de56db7efbd04991b680ae1 | 2018-06-01 07:36:00 | -81 | 191 | 2018-06-01 10:38:00 | 121.444 | 30.152 | 10920.0 | 1 |
78364 | 00466ab30de56db7efbd04991b680ae1 | 2018-06-01 10:38:00 | -81 | 191 | 2018-06-01 12:02:00 | 121.444 | 30.152 | 5040.0 | 2 |
78399 | 00466ab30de56db7efbd04991b680ae1 | 2018-06-01 12:20:00 | -83 | 196 | 2018-06-01 13:04:00 | 121.430 | 30.175 | 2640.0 | 3 |
78471 | 00466ab30de56db7efbd04991b680ae1 | 2018-06-01 14:34:00 | -60 | 189 | 2018-06-01 16:06:00 | 121.551 | 30.143 | 5520.0 | 4 |
[5]:
move.head()
[5]:
user_id | SLONCOL | SLATCOL | stime | slon | slat | etime | elon | elat | ELONCOL | ELATCOL | duration | moveid | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
78668 | 00466ab30de56db7efbd04991b680ae1 | -83 | 196 | 2018-06-01 00:00:00 | 121.430 | 30.175 | 2018-06-01 00:00:00 | 121.430 | 30.175 | -83.0 | 196.0 | 0.0 | 0 |
78668 | 00466ab30de56db7efbd04991b680ae1 | -83 | 196 | 2018-06-01 07:21:00 | 121.430 | 30.175 | 2018-06-01 07:36:00 | 121.444 | 30.152 | -81.0 | 191.0 | 900.0 | 1 |
78303 | 00466ab30de56db7efbd04991b680ae1 | -81 | 191 | 2018-06-01 10:38:00 | 121.444 | 30.152 | 2018-06-01 10:38:00 | 121.444 | 30.152 | -81.0 | 191.0 | 0.0 | 2 |
78364 | 00466ab30de56db7efbd04991b680ae1 | -81 | 191 | 2018-06-01 12:02:00 | 121.444 | 30.152 | 2018-06-01 12:20:00 | 121.430 | 30.175 | -83.0 | 196.0 | 1080.0 | 3 |
78399 | 00466ab30de56db7efbd04991b680ae1 | -83 | 196 | 2018-06-01 13:04:00 | 121.430 | 30.175 | 2018-06-01 14:34:00 | 121.551 | 30.143 | -60.0 | 189.0 | 5400.0 | 4 |
家庭和工作地点标识
[6]:
#Identify home location
home = tbd.mobile_identify_home(stay, col=['user_id','stime', 'etime','LONCOL', 'LATCOL','lon','lat'], start_hour=8, end_hour=20 )
home.head()
[6]:
user_id | LONCOL | LATCOL | lon | lat | |
---|---|---|---|---|---|
2036 | fcc3a9e9df361667e00ee5c16cb08922 | -147 | 292 | 121.103 | 30.610 |
2019 | f71e9d7d78e6f5bc9539d141e3a5a1c4 | -216 | 330 | 120.745 | 30.778 |
2001 | f6b65495b63574c2eb73c7e63ae38252 | -225 | -286 | 120.699 | 28.011 |
1982 | f1f4224a60da630a0b83b3a231022123 | 102 | 157 | 122.387 | 30.000 |
1942 | e96739aedb70a8e5c4efe4c488934b43 | -223 | 278 | 120.708 | 30.546 |
[7]:
#Identify work location
work = tbd.mobile_identify_work(stay, col=['user_id', 'stime', 'etime', 'LONCOL', 'LATCOL','lon','lat'], minhour=3, start_hour=8, end_hour=20,workdaystart=0, workdayend=4)
work.head()
[7]:
user_id | LONCOL | LATCOL | lon | lat | |
---|---|---|---|---|---|
0 | fcc3a9e9df361667e00ee5c16cb08922 | -148 | 292 | 121.097 | 30.610 |
1 | f71e9d7d78e6f5bc9539d141e3a5a1c4 | -219 | 325 | 120.732 | 30.757 |
3 | f1f4224a60da630a0b83b3a231022123 | 103 | 153 | 122.390 | 29.982 |
5 | e1a1dfb5a77578c889bd3368ffe1d30f | -62 | 138 | 121.540 | 29.915 |
6 | e0e30d88fc4f4b8a1d649baf9dd1274e | -436 | -35 | 119.614 | 29.137 |
[8]:
# If you want to filter out the users with work place location from home location
home['flag'] = 1
work = pd.merge(work,home,how='left')
home = home.drop(['flag'],axis = 1)
work = work[work['flag'].isnull()].drop(['flag'],axis = 1)
绘制活动图
[9]:
#Plot the activity of the user, different color represent different location
uid = 'fcc3a9e9df361667e00ee5c16cb08922'
stay['group'] = stay['LONCOL'].astype(str)+','+stay['LATCOL'].astype(str)
tbd.plot_activity(stay[stay['user_id']==uid],figsize = (20, 5))