Posted by DK on Thu 29 Oct 04:38
report abuse | download | new post
- #!/usr/bin/python
- '''The following script cleans up data from the yfd tab-delimited file in a
- cursory way and creates a numpy array of duration, start, and end time.
- '''
- import numpy as np
- import csv
- import pytz
- from pytz import timezone
- from datetime import datetime, timedelta
- file = open('ethandataOct28.txt')
- file.readline()
- #create numpy array of data
- types = {'names':('name','value','unit','time'),
- 'formats':('S10', 'S5', 'S5', 'S20')}
- ethan = np.loadtxt(file, dtype=types, delimiter='\t')
- sleep_mask = np.logical_or(ethan['name']=='gnight', ethan['name']=='gmorning')
- sleep_index = np.where(sleep_mask)[0]
- sleep_raw = ethan[sleep_mask]
- #find potential errors in data (i.e., there are 2 gnight's in a row)
- errors_index = np.where(sleep_raw['name'][:-1] == sleep_raw['name'][1:])
- errors = sleep_raw[errors_index]
- if errors.size != 0:
- print "There are duplicate gnight/gmorning entries. Check the data!"
- #create array of cleaned data
- sleep_clean = np.column_stack((sleep_raw['name'], sleep_raw['time']))
- time_stringlist = sleep_clean[:,1].tolist()
- time_objectlist=[]
- for item in time_stringlist:
- time_objectlist.append(datetime.strptime(item, "%Y-%m-%d %H:%M:%S"))
- time_objectarray = np.array(time_objectlist)
- sleep_events = np.column_stack((sleep_clean[:,0], time_objectarray))
- #a little logic to make sure I calculate time asleep and not time awake
- if sleep_events[0][0] == 'gmorning':
- gmorning_objects = sleep_events[::2,1]
- gnight_objects = sleep_events[1::2,1]
- else:
- gmorning_objects = sleep_events[1::2,1]
- gnight_objects = sleep_events[2::2,1]
- sleep_durations = gmorning_objects - gnight_objects
- duration_table = np.column_stack((sleep_durations,
- gnight_objects, gmorning_objects))
Submit a correction or amendment below (click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.