Binary Databases

Let's look at our NHL database in binary format.  The first program uses the standard C++ iostream library write and read functions to write both string and numeric data in binary form.

Listing 1a.  Simple NHL binary database

// Binary database using streams

#include <fstream.h>
#include <string.h>

struct team
{
	char * name;
	int gp;
	int wins;
	int losses;
	int ties;
	int gf;
	int ga;
	int pts;
};

ostream &
operator<<(ostream & ostr, const team & theTeam)
{
	ostr << theTeam.name << ' '
	     << theTeam.gp << ' '
	     << theTeam.wins << ' '
	     << theTeam.losses << ' '
	     << theTeam.ties << ' '
	     << theTeam.gf << ' '
	     << theTeam.ga << ' '
	     << theTeam.pts;

	return ostr;
}

istream &
read( istream & istr, team & theTeam )
{
	int length;
	istr.read( (char *) &length, sizeof(length) );
	theTeam.name = new char [ length + 1 ];
	istr.read( theTeam.name, length );
	// Add the terminating null character to the string
	theTeam.name[length] = '\0';

	istr.read( (char *) &theTeam.wins, sizeof(theTeam.wins) );
	istr.read( (char *) &theTeam.losses, sizeof(theTeam.losses) );
	istr.read( (char *) &theTeam.ties, sizeof(theTeam.ties) );
	istr.read( (char *) &theTeam.gf, sizeof(theTeam.gf) );
	istr.read( (char *) &theTeam.ga, sizeof(theTeam.ga) );

	theTeam.gp = theTeam.wins + theTeam.losses + theTeam.ties;
	theTeam.pts = theTeam.wins + theTeam.wins + theTeam.ties;

	return istr;
}

ostream &
write( ostream & ostr, const team & theTeam )
{
	int length = strlen( theTeam.name );
	ostr.write( (const char *) &length, sizeof(length) );
	ostr.write( theTeam.name, length );

	ostr.write( (const char *) &theTeam.wins, sizeof(theTeam.wins) );
	ostr.write( (const char *) &theTeam.losses, sizeof(theTeam.losses) );
	ostr.write( (const char *) &theTeam.ties, sizeof(theTeam.ties) );
	ostr.write( (const char *) &theTeam.gf, sizeof(theTeam.gf) );
	ostr.write( (const char *) &theTeam.ga, sizeof(theTeam.ga) );

	return ostr;
}

int
main( )
{
	// Save the following array to a file in binary form and
	//  the restore the contents.  This whole mechanism is called
	//  "persistence".  Now spaces are allowed in team names.

	const int N_TEAMS = 6;
	const team NHL[N_TEAMS] =
	{
		{ "Toronto",  56, 32, 20, 4, 181, 168, 68 },
		{ "Montreal", 59, 23, 28, 8, 139, 154, 54 },
		{ "Detroit",  59, 31, 23, 5, 175, 147, 67 },
		{ "New York", 57, 23, 27, 7, 158, 159, 53 },
		{ "Chicago",  59, 16, 32, 8, 131, 190, 40 },
		{ "Boston",   56, 23, 24, 9, 142, 132, 55 }
	};

	// A true C++ constant string.  Both the pointer (* const) and
	// what it points to (const char) are constant.  An alternative
	// form is:
	//
	//	const char FILENAME [] = "nhl.bin";
	//
	// which is the same, because the name of an array variable is
	// a constant pointer which points to the first element of the
	// array.

	const char * const FILENAME = "nhl.bin";

	ofstream out( FILENAME, ios::out | ios::trunc );
	if ( ! out )
	{
		cerr << "error saving " << '"' << FILENAME << '"' << endl;
	}
	else
	{
		// Write out the array length (number of records), so
		// we know how many records to read back.

		out.write( (const char *) &N_TEAMS, sizeof(N_TEAMS) );
		for ( int i = 0 ; i < N_TEAMS ; i++ )
		{
			// Write out each array element
			write( out, NHL[i] );
		}
	}
	out.close( );

	ifstream in( FILENAME );
	if ( ! in )
	{
		cerr << "error restoring " << '"' << FILENAME << '"' << endl;
	}
	else
	{
		// We need to allocate an array dynamically because we 
		// don't normally know in advance how many records are
		// in the database.

		team * teamArr;
		int n_teams = 0;
		in.read( (char *) &n_teams, sizeof(n_teams) );
		if ( in.good( ) )
		{
			teamArr = new team [n_teams];

			// Read in each record and write it in text form
			// to cout.

			for ( int i = 0 ; i < n_teams ; i++ )
			{
				read( in, teamArr[i] );
				cout << teamArr[i] << endl;
			}
		}
	}
	return 0;
}

Download this program

The output from this program is shown below.  All ints are stored as four bytes; each char is one byte; and there are no column separator or row separator characters.

Listing 1b.  Hex dump of simple NHL binary database

00000000:  06 00 00 00 07 00 00 00 54 6f 72 6f 6e 74 6f 20   ........Toronto 
00000010:  00 00 00 14 00 00 00 04 00 00 00 b5 00 00 00 a8   ................
00000020:  00 00 00 08 00 00 00 4d 6f 6e 74 72 65 61 6c 17   .......Montreal.
00000030:  00 00 00 1c 00 00 00 08 00 00 00 8b 00 00 00 9a   ................
00000040:  00 00 00 07 00 00 00 44 65 74 72 6f 69 74 1f 00   .......Detroit..
00000050:  00 00 17 00 00 00 05 00 00 00 af 00 00 00 93 00   ................
00000060:  00 00 08 00 00 00 4e 65 77 20 59 6f 72 6b 17 00   ......New York..
00000070:  00 00 1b 00 00 00 07 00 00 00 9e 00 00 00 9f 00   ................
00000080:  00 00 07 00 00 00 43 68 69 63 61 67 6f 10 00 00   ......Chicago...
00000090:  00 20 00 00 00 08 00 00 00 83 00 00 00 be 00 00   . ..............
000000a0:  00 06 00 00 00 42 6f 73 74 6f 6e 17 00 00 00 18   .....Boston.....
000000b0:  00 00 00 09 00 00 00 8e 00 00 00 84 00 00 00      ...............
000000bf

Because we are using ints to store our numeric data, the database is actually larger than the ASCII version (184 bytes vs. 153 bytes).  To improve this we can use short, which is guaranteed to be only 16 bits (two bytes) in size.

Listing 2a.  Simple NHL binary database using 2-byte integers

// Binary database using streams

// Alternate version:  uses "short" instead of "int" for more storage
// efficiency.  A "short" is 16 bits and can hold numbers from
// -32768 to +32767, which is well within the range required by our
// hockey information.

#include <fstream.h>
#include <string.h>

struct team
{
	char * name;
	short gp;
	short wins;
	short losses;
	short ties;
	short gf;
	short ga;
	short pts;
};

ostream &
operator<<(ostream & ostr, const team & theTeam)
{
	ostr << theTeam.name << ' '
	     << theTeam.gp << ' '
	     << theTeam.wins << ' '
	     << theTeam.losses << ' '
	     << theTeam.ties << ' '
	     << theTeam.gf << ' '
	     << theTeam.ga << ' '
	     << theTeam.pts;

	return ostr;
}

istream &
read(istream & istr, team & theTeam)
{
	short length;
	istr.read((char *)&length, sizeof(length));
	theTeam.name = new char [length + 1];
	istr.read(theTeam.name, length);
	theTeam.name[length] = '\0';

	istr.read((char *)&theTeam.wins, sizeof(theTeam.wins));
	istr.read((char *)&theTeam.losses, sizeof(theTeam.losses));
	istr.read((char *)&theTeam.ties, sizeof(theTeam.ties));
	istr.read((char *)&theTeam.gf, sizeof(theTeam.gf));
	istr.read((char *)&theTeam.ga, sizeof(theTeam.ga));

	theTeam.gp = theTeam.wins + theTeam.losses + theTeam.ties;
	theTeam.pts = theTeam.wins + theTeam.wins + theTeam.ties;

	return istr;
}

ostream &
write(ostream & ostr, const team & theTeam)
{
	short length = strlen(theTeam.name);
	ostr.write((const char *)&length, sizeof(length));
	ostr.write(theTeam.name, length);

	ostr.write((const char *)&theTeam.wins, sizeof(theTeam.wins));
	ostr.write((const char *)&theTeam.losses, sizeof(theTeam.losses));
	ostr.write((const char *)&theTeam.ties, sizeof(theTeam.ties));
	ostr.write((const char *)&theTeam.gf, sizeof(theTeam.gf));
	ostr.write((const char *)&theTeam.ga, sizeof(theTeam.ga));

	return ostr;
}

int
main()
{
	const short N_TEAMS = 6;
	const team NHL[N_TEAMS] =
	{
		{ "Toronto",  56, 32, 20, 4, 181, 168, 68 },
		{ "Montreal", 59, 23, 28, 8, 139, 154, 54 },
		{ "Detroit",  59, 31, 23, 5, 175, 147, 67 },
		{ "New York", 57, 23, 27, 7, 158, 159, 53 },
		{ "Chicago",  59, 16, 32, 8, 131, 190, 40 },
		{ "Boston",   56, 23, 24, 9, 142, 132, 55 }
	};

	const char FILENAME [] = "nhl2.bin";
	ofstream out(FILENAME, ios::out | ios::trunc);
	if (!out)
	{
		cerr << "error saving " << '"' << FILENAME << '"' << endl;
	}
	else
	{
		out.write((char *)&N_TEAMS, sizeof(N_TEAMS));
		for (int i = 0; i < N_TEAMS; i++)
		{
			write(out, NHL[i]);
		}
	}
	out.close();

	ifstream in(FILENAME);
	if (!in)
	{
		cerr << "error restoring " << '"' << FILENAME << '"' << endl;
	}
	else
	{
		team * teamArr;
		short n_teams;

		in.read((char *)&n_teams, sizeof(n_teams));
		teamArr = new team [n_teams];

		for (int i = 0; i < n_teams; i++)
		{
			read(in, teamArr[i]);
			cout << teamArr[i] << endl;
		}
	}

	return 0;
}

Download this program
Download an alternate, object-based version of this program

Listing 2b.  Hex dump of smaller NHL binary database

00000000:  06 00 07 00 54 6f 72 6f 6e 74 6f 20 00 14 00 04   ....Toronto ....
00000010:  00 b5 00 a8 00 08 00 4d 6f 6e 74 72 65 61 6c 17   .......Montreal.
00000020:  00 1c 00 08 00 8b 00 9a 00 07 00 44 65 74 72 6f   ...........Detro
00000030:  69 74 1f 00 17 00 05 00 af 00 93 00 08 00 4e 65   it............Ne
00000040:  77 20 59 6f 72 6b 17 00 1b 00 07 00 9e 00 9f 00   w York..........
00000050:  07 00 43 68 69 63 61 67 6f 10 00 20 00 08 00 83   ..Chicago.. ....
00000060:  00 be 00 06 00 42 6f 73 74 6f 6e 17 00 18 00 09   .....Boston.....
00000070:  00 8e 00 84 00                                    .....
00000075

Now the database is smaller than the ASCII version (117 bytes vs. 153 bytes).  If we calculate the difference in sizes, with six integer fields being written to the database file -- we save 12 bytes per team.  While this number does not seem significant for such a small application, for a larger database with, say, one million rows -- a 12-Megabyte difference is significant!


Back to the COMP435 page