UPDATE:
j = (df.groupby(['ID','Location','Country','Latitude','Longitude'])
.apply(lambda x: x[['timestamp','tide']].to_dict('records'))
.reset_index()
.rename(columns={0:'Tide-Data'})
.to_json(orient="records"))
Result (formatted):
In [103]: print(json.dumps(json.loads(j), indent=2, sort_keys=True))
[
{
"Country": "FRA",
"ID": 1,
"Latitude": 48.383,
"Location": "BREST",
"Longitude": -4.495,
"Tide-Data": [
{
"tide": 6905.0,
"timestamp": "1807-01-01"
},
{
"tide": 6931.0,
"timestamp": "1807-02-01"
},
{
"tide": 6896.0,
"timestamp": "1807-03-01"
},
{
"tide": 6953.0,
"timestamp": "1807-04-01"
},
{
"tide": 7043.0,
"timestamp": "1807-05-01"
}
]
},
{
"Country": "DEU",
"ID": 7,
"Latitude": 53.867,
"Location": "CUXHAVEN 2",
"Longitude": 8.717,
"Tide-Data": [
{
"tide": 7093.0,
"timestamp": "1843-01-01"
},
{
"tide": 6688.0,
"timestamp": "1843-02-01"
},
{
"tide": 6493.0,
"timestamp": "1843-03-01"
},
{
"tide": 6723.0,
"timestamp": "1843-04-01"
},
{
"tide": 6533.0,
"timestamp": "1843-05-01"
}
]
},
{
"Country": "DEU",
"ID": 8,
"Latitude": 53.899,
"Location": "WISMAR 2",
"Longitude": 11.458,
"Tide-Data": [
{
"tide": 6957.0,
"timestamp": "1848-07-01"
},
{
"tide": 6944.0,
"timestamp": "1848-08-01"
},
{
"tide": 7084.0,
"timestamp": "1848-09-01"
},
{
"tide": 6898.0,
"timestamp": "1848-10-01"
},
{
"tide": 6859.0,
"timestamp": "1848-11-01"
}
]
},
{
"Country": "NLD",
"ID": 9,
"Latitude": 51.918,
"Location": "MAASSLUIS",
"Longitude": 4.25,
"Tide-Data": [
{
"tide": 6880.0,
"timestamp": "1848-02-01"
},
{
"tide": 6700.0,
"timestamp": "1848-03-01"
},
{
"tide": 6775.0,
"timestamp": "1848-04-01"
},
{
"tide": 6580.0,
"timestamp": "1848-05-01"
},
{
"tide": 6685.0,
"timestamp": "1848-06-01"
}
]
},
{
"Country": "USA",
"ID": 10,
"Latitude": 37.807,
"Location": "SAN FRANCISCO",
"Longitude": -122.465,
"Tide-Data": [
{
"tide": 6909.0,
"timestamp": "1854-07-01"
},
{
"tide": 6940.0,
"timestamp": "1854-08-01"
},
{
"tide": 6961.0,
"timestamp": "1854-09-01"
},
{
"tide": 6952.0,
"timestamp": "1854-10-01"
},
{
"tide": 6952.0,
"timestamp": "1854-11-01"
}
]
}
]
OLD answer:
You can do it using groupby()
, apply()
and to_json()
methods:
j = (df.groupby(['ID','Location','Country','Latitude','Longitude'], as_index=False)
.apply(lambda x: dict(zip(x.timestamp,x.tide)))
.reset_index()
.rename(columns={0:'Tide-Data'})
.to_json(orient="records"))
Output:
In [112]: print(json.dumps(json.loads(j), indent=2, sort_keys=True))
[
{
"Country": "FRA",
"ID": 1,
"Latitude": 48.383,
"Location": "BREST",
"Longitude": -4.495,
"Tide-Data": {
"1807-01-01": 6905.0,
"1807-02-01": 6931.0,
"1807-03-01": 6896.0,
"1807-04-01": 6953.0,
"1807-05-01": 7043.0
}
},
{
"Country": "DEU",
"ID": 7,
"Latitude": 53.867,
"Location": "CUXHAVEN 2",
"Longitude": 8.717,
"Tide-Data": {
"1843-01-01": 7093.0,
"1843-02-01": 6688.0,
"1843-03-01": 6493.0,
"1843-04-01": 6723.0,
"1843-05-01": 6533.0
}
},
{
"Country": "DEU",
"ID": 8,
"Latitude": 53.899,
"Location": "WISMAR 2",
"Longitude": 11.458,
"Tide-Data": {
"1848-07-01": 6957.0,
"1848-08-01": 6944.0,
"1848-09-01": 7084.0,
"1848-10-01": 6898.0,
"1848-11-01": 6859.0
}
},
{
"Country": "NLD",
"ID": 9,
"Latitude": 51.918,
"Location": "MAASSLUIS",
"Longitude": 4.25,
"Tide-Data": {
"1848-02-01": 6880.0,
"1848-03-01": 6700.0,
"1848-04-01": 6775.0,
"1848-05-01": 6580.0,
"1848-06-01": 6685.0
}
},
{
"Country": "USA",
"ID": 10,
"Latitude": 37.807,
"Location": "SAN FRANCISCO",
"Longitude": -122.465,
"Tide-Data": {
"1854-07-01": 6909.0,
"1854-08-01": 6940.0,
"1854-09-01": 6961.0,
"1854-10-01": 6952.0,
"1854-11-01": 6952.0
}
}
]
PS if you don’t care of idents you can write directly to JSON file:
(df.groupby(['ID','Location','Country','Latitude','Longitude'], as_index=False)
.apply(lambda x: dict(zip(x.timestamp,x.tide)))
.reset_index()
.rename(columns={0:'Tide-Data'})
.to_json('/path/to/file_name.json', orient="records"))