I'm trying to select only the values from the flightID column where DepDelay and ArrDelay
I tried this:
delay = data.loc[(data['ArrDelay'] > 15) & (data['DepDelay'] > 15), ['FlightID']]
But it did not work ...
KeyError Traceback (most recent call last)
<ipython-input-28-11d40b8188f5> in <module>()
1 #delay = data.loc['FlightID']
2
----> 3 delay = data.loc[(data['ArrDelay'] > 15) & (data['DepDelay'] > 15), ['FlightID']]
C:\Programmes\lib\site-packages\pandas\core\indexing.py in __getitem__(self, key)
1365 except (KeyError, IndexError):
1366 pass
-> 1367 return self._getitem_tuple(key)
1368 else:
1369 # we by definition only have the 0th axis
C:\Programmes\lib\site-packages\pandas\core\indexing.py in _getitem_tuple(self, tup)
861
862 # no multi-index, so validate all of the indexers
--> 863 self._has_valid_tuple(tup)
864
865 # ugly hack for GH #836
C:\Programmes\lib\site-packages\pandas\core\indexing.py in _has_valid_tuple(self, key)
202 if i >= self.obj.ndim:
203 raise IndexingError('Too many indexers')
--> 204 if not self._has_valid_type(k, i):
205 raise ValueError("Location based indexing can only have "
206 "[{types}] types"
C:\Programmes\lib\site-packages\pandas\core\indexing.py in _has_valid_type(self, key, axis)
1470 raise KeyError(
1471 u"None of [{key}] are in the [{axis}]".format(
-> 1472 key=key, axis=self.obj._get_axis_name(axis)))
1473 else:
1474
KeyError: "None of [['FlightID']] are in the [columns]"
I was hoping:
flightID DepDelay ArrDelay
BBYYEUVY67527 20.0 64.0
MUPXAQFN40227 17.0 52.0
KTAMHIFO10843 16.0 18.0
(or only the three values in the flightID column)
Data
flightID Month ArrTime ActualElapsedTime DepDelay ArrDelay
BBYYEUVY67527 1 1514.0 58.0 20.0 64.0
MUPXAQFN40227 1 37.0 120.0 17.0 52.0
LQLYUIMN79169 1 916.0 166.0 NA -25.0
KTAMHIFO10843 1 NaN NaN 5.0 NaN
BOOXJTEY23623 1 NaN NaN 4.0 NaN
BBYYEUVY67527 2 1514.0 58.0 NA 64.0
MUPXAQFN40227 2 37.0 120.0 NA 52.0
LQLYUIMN79169 2 916.0 166.0 NA -25.0
KTAMHIFO10843 2 NaN NaN 16.0 18.0
BOOXJTEY23623 2 NaN NaN 4 NaN