Migrate data from Home Assistant SQLite to InfluxDB

23 Apr, 2024

I noticed some data was missing in my InfluxDB permanent storage which was present in Home Assistant’s SQLite database in two places: states & statistics. I want to port the data from one to another.

Find data in SQLite ¶

Connect to db. Use -readonly to not destroy stuff.

sudo sqlite3 -readonly /var/lib/homeassistant/home-assistant_v2.db
.mode column
.headers on
.width 30, 10,

Show tables and structures

.tables
PRAGMA table_info(states);
PRAGMA table_info(states_meta);
PRAGMA table_info(statistics);
PRAGMA table_info(statistics_meta);

Yields:

sqlite> PRAGMA table_info(statistics);
cid|name|type|notnull|dflt_value|pk
0|id|INTEGER|1||1
1|created|DATETIME|0||0
2|start|DATETIME|0||0
3|mean|FLOAT|0||0
4|min|FLOAT|0||0
5|max|FLOAT|0||0
6|last_reset|DATETIME|0||0
7|state|FLOAT|0||0
8|sum|FLOAT|0||0
9|metadata_id|INTEGER|0||0
10|created_ts|FLOAT|0||0
11|start_ts|FLOAT|0||0
12|last_reset_ts|FLOAT|0||0

sqlite> PRAGMA table_info(statistics_meta);
cid|name|type|notnull|dflt_value|pk
0|id|INTEGER|1||1
1|statistic_id|VARCHAR(255)|0||0
2|source|VARCHAR(32)|0||0
3|unit_of_measurement|VARCHAR(255)|0||0
4|has_mean|BOOLEAN|0||0
5|has_sum|BOOLEAN|0||0
6|name|VARCHAR(255)|0||0

Get data from `state` ¶

It seems we need state and last_updated_ts or last_changed_ts. entity_id is empty but is stored in table states_meta linked via metadata_id. See also this post (home-assistant.io).

cid|name|type|notnull|dflt_value|pk
0|state_id|INTEGER|1||1
1|domain|VARCHAR(64)|0||0
2|entity_id|VARCHAR(255)|0||0
3|state|VARCHAR(255)|0||0
4|attributes|TEXT|0||0
5|event_id|INTEGER|0||0
6|last_changed|DATETIME|0||0
7|last_updated|DATETIME|0||0
8|created|DATETIME|0||0
9|old_state_id|INTEGER|0||0
10|attributes_id|INTEGER|0||0
11|origin_idx|INTEGER|0||0
12|context_id|VARCHAR(36)|0||0
13|context_user_id|VARCHAR(36)|0||0
14|context_parent_id|VARCHAR(36)|0||0
15|last_updated_ts|FLOAT|0||0
16|last_changed_ts|FLOAT|0||0
17|context_id_bin|BLOB|0||0
18|context_user_id_bin|BLOB|0||0
19|context_parent_id_bin|BLOB|0||0
20|metadata_id|INTEGER|0||0

Explore what data is stored

SELECT
  *,
  states_meta.entity_id
FROM states
INNER JOIN states_meta ON states.metadata_id=states_meta.metadata_id
LIMIT 1;

yields:

state_id|domain|entity_id|state|attributes|event_id|last_changed|last_updated|created|old_state_id|attributes_id|origin_idx|context_id|context_user_id|context_parent_id|last_updated_ts|last_changed_ts|context_id_bin|context_user_id_bin|context_parent_id_bin|metadata_id|metadata_id|entity_id|entity_id
101258448|||1231.496|||||||83007|0||||1708654322.53454||?ӻ?fۓ?<?r???z|||40|40|sensor.smeter_production2|sensor.smeter_production2

Find state changes associated with a specific entity_id, get timestamps in nanoseconds to prepare for influxdb

SELECT
  state,
  printf("%.0f", last_updated_ts*1000000000) as timestamp,
  states_meta.entity_id
FROM states
INNER JOIN states_meta ON states.metadata_id=states_meta.metadata_id
WHERE states_meta.entity_id = 'sensor.compr_in_power_43141' OR states_meta.entity_id = 'sensor.compr_in_energy' OR states_meta.entity_id = 'sensor.int_el_add_energy' OR states_meta.entity_id = 'sensor.int_el_add_power_43084';

Get data from `statistics` ¶

Explore what data is stored, see also the docs (home-assistant.io). One addition to the docs: The created_ts seems to be the end of the statistics integration window, at which time the statistic_id sensor has value state (start_ts being the beginning).

SELECT
  count(*)
FROM statistics

SELECT
  MAX(metadata_id)
FROM statistics

.mode csv

SELECT
  *
FROM statistics
INNER JOIN statistics_meta ON statistics.metadata_id=statistics_meta.id
WHERE statistics_meta.statistic_id = 'sensor.compr_in_power_43141' OR statistics_meta.statistic_id = 'sensor.compr_in_energy' OR statistics_meta.statistic_id = 'sensor.int_el_add_energy' OR statistics_meta.statistic_id = 'sensor.int_el_add_power_43084'
LIMIT 10;

SELECT
  state,
  mean,
  sum,
  created_ts,
  start_ts,
  statistic_id
FROM statistics
INNER JOIN statistics_meta ON statistics.metadata_id=statistics_meta.id
WHERE statistics_meta.statistic_id = 'sensor.compr_in_power_43141' OR statistics_meta.statistic_id = 'sensor.compr_in_energy' OR statistics_meta.statistic_id = 'sensor.int_el_add_energy' OR statistics_meta.statistic_id = 'sensor.int_el_add_power_43084'
ORDER BY created_ts DESC
LIMIT 40;

Looks like:

state,    52.66,    663.71  ,                   ,
mean,     ,         ,       0.558742289913889,  0.0
sum,      52.66,    663.21  ,                   ,
created_ts, 1714122010.38259,1714122010.38236,1714122010.38231,1714122010.3821 == Fri 26 Apr 2024 11:00:10 AM CEST
start_ts, 1714118400.0,1714118400.0,1714118400.0,1714118400.0 == Fri 26 Apr 2024 10:00:00 AM CEST
statistic_id,sensor.int_el_add_energy,sensor.compr_in_energy,sensor.compr_in_power_43141,sensor.int_el_add_power_43084

Export data ¶

Saving SQLite to CSV (stackoverflow.com) is supported out of the box:

.mode csv

.separator "," "\n"

.headers on

.once /home/tim/export-nibe2.csv

SELECT
  state,
  printf("%.0f", last_updated_ts) as timestamp,
  states_meta.entity_id
FROM states
INNER JOIN states_meta ON states.metadata_id=states_meta.metadata_id
WHERE states_meta.entity_id = 'sensor.compr_in_power_43141' OR states_meta.entity_id = 'sensor.compr_in_energy' OR states_meta.entity_id = 'sensor.int_el_add_energy' OR states_meta.entity_id = 'sensor.int_el_add_power_43084';


.once /home/tim/export-nibe-stats-power2.csv

SELECT
  mean,
  printf("%.0f", created_ts) as timestamp,
  statistic_id
FROM statistics
INNER JOIN statistics_meta ON statistics.metadata_id=statistics_meta.id
WHERE statistics_meta.statistic_id = 'sensor.compr_in_power_43141' OR statistics_meta.statistic_id = 'sensor.int_el_add_power_43084';

.once /home/tim/export-nibe-stats-energy2.csv

SELECT
  state,
  printf("%.0f", created_ts) as timestamp,
  statistic_id
FROM statistics
INNER JOIN statistics_meta ON statistics.metadata_id=statistics_meta.id
WHERE statistics_meta.statistic_id = 'sensor.compr_in_energy' OR statistics_meta.statistic_id = 'sensor.int_el_add_energy';

Import to InfluxDB ¶

For InfluxDB we need the line protocol (influxdata.com)

// Syntax
<measurement>[,<tag_key>=<tag_value>[,<tag_key>=<tag_value>]] <field_key>=<field_value>[,<field_key>=<field_value>] [<timestamp>]

// Example
myMeasurement,tag1=value1,tag2=value2 fieldKey="fieldValue" 1556813561098000000

In my case I want:

energyv3,quantity=electricity,source=nibe,type=consumption,uniqueid=<uniqueid> value=<value>*3600*1000 <timestamp>
energyv3,quantity=electricity,source=nibe,type=consumption,uniqueid=<uniqueid> value=<value>*3600*1000 <timestamp>

powerv3,quantity=electricity,source=nibe,type=consumption,uniqueid=<uniqueid> value=<value>*1000 <timestamp>
powerv3,quantity=electricity,source=nibe,type=consumption,uniqueid=<uniqueid> value=<value>*1000 <timestamp>

So with some awk-fu (stackoverflow.com) this gives (stackexchange.com):

N.B. Ensure you use GNU awk in case you need >32bit integers. I ran into an issue when apparantly I had mawk installed, which has 32 bit ints, and my results looked weird.

grep _energy /home/tim/export-nibe2.csv | grep -v "^unavailable" | awk -F"," '{$1=sprintf("%d", $1*3600*1000); print "energyv3,quantity=electricity,source=nibe,type=consumption,uniqueid="$3" value="$1" "$2}' > export-nibe.txt
grep _power_ /home/tim/export-nibe2.csv | grep -v "^unavailable" | awk -F"," '{$1=sprintf("%d", $1*1000); print "powerv3,quantity=electricity,source=nibe,type=consumption,uniqueid="$3" value="$1" "$2}' >> export-nibe.txt

grep -v "^unavailable" /home/tim/export-nibe-stats-energy2.csv | awk -F"," '{$1=sprintf("%d", $1*3600*1000); $2=$2-10; print "energyv3,quantity=electricity,source=nibe,type=consumption,uniqueid="$3" value="$1" "$2}' > export-nibe.txt
grep -v "^unavailable" /home/tim/export-nibe-stats-power2.csv | awk -F"," '{$1=sprintf("%d", $1*1000); $2=$2-10; print "powerv3,quantity=electricity,source=nibe,type=consumption,uniqueid="$3" value="$1" "$2}' >> export-nibe.txt

sed -i -e 's/sensor.compr_in_energy/compr_in_energy_43141/' export-nibe.txt
sed -i -e 's/sensor.int_el_add_energy/int_el_add_energy_43084/' export-nibe.txt
sed -i -e 's/sensor.compr_in_power_43141/compr_in_power_43141/' export-nibe.txt
sed -i -e 's/sensor.int_el_add_power_43084/int_el_add_power_43084/' export-nibe.txt

Now insert into InfluxDB (I use a temporary measurement for testing to see ingestion works as expected)

curl -i -u ${INFLUX_USERNAME}:${INFLUX_PASSWORD} -XPOST "http://localhost:8086/write?db=smarthomev3&precision=s" --data-binary @export-nibe.txt

Bonus: upload Nibe USB log to InfluxDB ¶

Nibe can log data to USB, with one file per day named YYMMDD01.LOG. The data is stored as TSV, and the first lines look like:

Divisors		1	1	10	10	10	10	10	10	10	10	10	10	10	10	100	1	10	10	10	10	1	1	1	1
Date	Time	version(43001)	R-version(44331)	BT1(40004)	BT2(40008)	EB100-EP14-BT3(40012)	BT6(40014)	BT7(40013)	EB100-EP14-BT10(40015)	EB100-EP14-BT11(40016)	EB100-EP14-BT12(40017)	EB100-EP14-BT14(40018)	EB100-EP14-BT17(40022)	BT25(40071)	BT50(40033)	Tot.Int.Add(43084)	Alarm number(45001)	Calc. Supply(43009)	Degree Minutes(40940)	BT1-Average(40067)	compr. freq. act.(43136)	Relays PCA-Base(43514)	GP1-speed EP14(43437)	GP2-speed EP14(43439)	Prio(43086)
2024-01-15	22:59:00	9443	5	-7	236	212	545	572	-35	-59	231	492	3	205	203	0	0	200	-2715	0	380	7	100	69	30
2024-01-15	22:59:30	9443	5	-7	236	212	545	572	-34	-58	232	492	4	206	204	0	0	200	-2709	0	369	7	100	69	30
2024-01-15	23:00:00	9443	5	-7	231	213	545	571	-35	-56	232	492	-31	206	203	0	0	200	-2709	0	370	7	100	69	30

Transposing for human readers:

Date	2024-01-15	2024-01-15	2024-01-15	
Time	22:59:00	22:59:30	23:00:00
1	version(43001)	9443	9443	9443
1	R-version(44331)	5	5	5
10	BT1(40004)	-7	-7	-7
10	BT2(40008)	236	236	231
10	EB100-EP14-BT3(40012)	212	212	213
10	BT6(40014)	545	545	545
10	BT7(40013)	572	572	571
10	EB100-EP14-BT10(40015)	-35	-34	-35
10	EB100-EP14-BT11(40016)	-59	-58	-56
10	EB100-EP14-BT12(40017)	231	232	232
10	EB100-EP14-BT14(40018)	492	492	492
10	EB100-EP14-BT17(40022)	3	4	-31
10	BT25(40071)	205	206	206
10	BT50(40033)	203	204	203
100	Tot.Int.Add(43084)	0	0	0
1	Alarm number(45001)	0	0	0
10	Calc. Supply(43009)	200	200	200
10	Degree Minutes(40940)	-2715	-2709	-2709
10	BT1-Average(40067)	0	0	0
10	compr. freq. act.(43136)	380	369	370
1	Relays PCA-Base(43514)	7	7	7
1	GP1-speed EP14(43437)	100	100	100
1	GP2-speed EP14(43439)	69	69	69
1	Prio(43086)	30	30	30

Fix wrong insertion ¶

From 2024-04-24T13:49:00Z == 1713966540

To 2024-04-23T21:21:00Z == 1713907260

sort -t, -k 2 export-nibe.csv > export-nibe-gap.csv
grep -n 1713966 export-nibe-gap.csv  -> 7906
grep -n 171390 export-nibe-gap.csv -> 7812
head -7906 export-nibe-gap.csv | tail +7812 > export-nibe-gap2.csv

grep _energy /home/tim/export-nibe-gap2.csv | grep -v "^unavailable" | awk -F"," '{$1=sprintf("%d", $1*3600*1000); print "energyv3,quantity=electricity,source=nibe,type=consumption,uniqueid="$3" value="$1" "$2}' > export-nibe-gap2.txt
grep _power_ /home/tim/export-nibe-gap2.csv | grep -v "^unavailable" | awk -F"," '{$1=sprintf("%d", $1*1000); print "powerv3,quantity=electricity,source=nibe,type=consumption,uniqueid="$3" value="$1" "$2}' >> export-nibe-gap2.txt

curl -i -u ${INFLUX_USERNAME}:${INFLUX_PASSWORD} -XPOST "http://localhost:8086/write?db=smarthomev3&precision=ns" --data-binary @export-nibe-gap2.txt

delete from _temp_energyv3 where uniqueid='sensor.compr_in_energy'

Fix wrong key value ¶

> select * from energyv3 where source='nibe' and uniqueid='int_el_add_energy_43084' order by asc limit 2
name: energyv3
time                 quantity    source type        unique_id uniqueid                value
----                 --------    ------ ----        --------- --------                -----
2024-04-24T13:50:00Z electricity nibe   consumption           int_el_add_energy_43084 189576000
2024-04-24T13:51:00Z electricity nibe   consumption           int_el_add_energy_43084 189576000

tim@proteus:~$ date +%s -d 2024-04-24T13:50:00Z
1713966600
1713966498438867000

sort -t, -k 2 export-nibe.csv > export-nibe-gap.csv
head -n 7906 export-nibe-gap.csv > export-nibe-head.csv

grep _energy /home/tim/export-nibe-head.csv | grep -v "^unavailable" | awk -F"," '{$1=sprintf("%d", $1*3600*1000); print "energyv3,quantity=electricity,source=nibe,type=consumption,uniqueid="$3" value="$1" "$2}' > export-nibe-head.txt
grep _power_ /home/tim/export-nibe-head.csv | grep -v "^unavailable" | awk -F"," '{$1=sprintf("%d", $1*1000); print "powerv3,quantity=electricity,source=nibe,type=consumption,uniqueid="$3" value="$1" "$2}' >> export-nibe-head.txt

cat export-nibe-head.txt > export-nibe-merged.txt
cat export-nibe.txt >> export-nibe-merged.txt

sed -i -e 's/sensor.compr_in_energy/compr_in_energy_43141/' export-nibe-merged.txt
sed -i -e 's/sensor.int_el_add_energy/int_el_add_energy_43084/' export-nibe-merged.txt

sort export-nibe-merged.txt | uniq > export-nibe-unique.txt

sed -i -e 's/powerv3,/_temp_powerv3,/' export-nibe-unique.txt
sed -i -e 's/energyv3,/_temp_energyv3,/' export-nibe-unique.txt

cat export-nibe-unique.txt | rev | cut -c10- | rev > export-nibe-unique-second.txt


sed -i -e 's/_temp_powerv3,/powerv3,/' export-nibe-unique-second.txt
sed -i -e 's/_temp_energyv3,/energyv3,/' export-nibe-unique-second.txt

curl -i -u ${INFLUX_USERNAME}:${INFLUX_PASSWORD} -XPOST "http://localhost:8086/write?db=smarthomev3&precision=s" --data-binary @export-nibe-unique-second.txt

#Home-Assistant #Influxdb #Linux #Sqlite