diff -Nru mysql-5.0.67.orig/include/my_time.h mysql-5.0.67.microslow_and_userstats/include/my_time.h
--- mysql-5.0.67.orig/include/my_time.h	Mon Aug  4 15:19:12 2008
+++ mysql-5.0.67.microslow_and_userstats/include/my_time.h	Wed Sep  3 12:11:39 2008
@@ -140,7 +140,7 @@
 int my_date_to_str(const MYSQL_TIME *l_time, char *to);
 int my_datetime_to_str(const MYSQL_TIME *l_time, char *to);
 int my_TIME_to_str(const MYSQL_TIME *l_time, char *to);
-
+ulonglong my_timer(ulonglong *ltime, ulonglong frequency);
 C_MODE_END
 
 #endif /* _my_time_h_ */
diff -Nru mysql-5.0.67.orig/include/mysql_com.h mysql-5.0.67.microslow_and_userstats/include/mysql_com.h
--- mysql-5.0.67.orig/include/mysql_com.h	Mon Aug  4 15:19:12 2008
+++ mysql-5.0.67.microslow_and_userstats/include/mysql_com.h	Wed Sep  3 12:07:46 2008
@@ -106,6 +106,8 @@
 					   thread */
 #define REFRESH_MASTER          128     /* Remove all bin logs in the index
 					   and truncate the index */
+#define REFRESH_TABLE_STATS     256     /* Refresh table stats hash table */
+#define REFRESH_INDEX_STATS     512     /* Refresh index stats hash table */
 
 /* The following can't be set with mysql_refresh() */
 #define REFRESH_READ_LOCK	16384	/* Lock tables for read */
diff -Nru mysql-5.0.67.orig/innobase/buf/buf0buf.c mysql-5.0.67.microslow_and_userstats/innobase/buf/buf0buf.c
--- mysql-5.0.67.orig/innobase/buf/buf0buf.c	Mon Aug  4 15:19:12 2008
+++ mysql-5.0.67.microslow_and_userstats/innobase/buf/buf0buf.c	Wed Sep  3 12:11:39 2008
@@ -37,6 +37,7 @@
 #include "log0log.h"
 #include "trx0undo.h"
 #include "srv0srv.h"
+#include "thr0loc.h"
 
 /*
 		IMPLEMENTATION OF THE BUFFER POOL
@@ -1086,6 +1087,31 @@
 	return(block);
 }
 
+inline void _increment_page_get_statistics(buf_block_t* block, trx_t* trx)
+{
+	ulint           block_hash;
+	ulint           block_hash_byte;
+	byte            block_hash_offset;
+
+	ut_ad(block);
+
+	if (!trx || !trx->distinct_page_access_hash)
+		return;
+
+        block_hash = ut_hash_ulint((block->space << 20) + block->space +
+					block->offset, DPAH_SIZE << 3);
+	block_hash_byte = block_hash >> 3;
+	block_hash_offset = (byte) block_hash & 0x07;
+	if (block_hash_byte < 0 || block_hash_byte >= DPAH_SIZE)
+		fprintf(stderr, "!!! block_hash_byte = %lu  block_hash_offset = %lu !!!\n", block_hash_byte, block_hash_offset);
+	if (block_hash_offset < 0 || block_hash_offset > 7)
+		fprintf(stderr, "!!! block_hash_byte = %lu  block_hash_offset = %lu !!!\n", block_hash_byte, block_hash_offset);
+	if ((trx->distinct_page_access_hash[block_hash_byte] & ((byte) 0x01 << block_hash_offset)) == 0)
+		trx->distinct_page_access++;
+	trx->distinct_page_access_hash[block_hash_byte] |= (byte) 0x01 << block_hash_offset;
+	return;
+}
+
 /************************************************************************
 This is the general function used to get access to a database page. */
 
@@ -1108,6 +1134,11 @@
 	ulint		fix_type;
 	ibool		success;
 	ibool		must_read;
+	trx_t*          trx;
+	ulint           sec;
+	ulint           ms;
+	ib_longlong     start_time;
+	ib_longlong     finish_time;
 	
 	ut_ad(mtr);
 	ut_ad((rw_latch == RW_S_LATCH)
@@ -1119,6 +1150,7 @@
 #ifndef UNIV_LOG_DEBUG
 	ut_ad(!ibuf_inside() || ibuf_page(space, offset));
 #endif
+	trx = thr_local_get_trx(os_thread_get_curr_id());
 	buf_pool->n_page_gets++;
 loop:
 	block = NULL;
@@ -1148,7 +1180,7 @@
 			return(NULL);
 		}
 
-		buf_read_page(space, offset);
+		buf_read_page(space, offset, trx);
 
 #ifdef UNIV_DEBUG
 		buf_dbg_counter++;
@@ -1261,6 +1293,11 @@
 		        /* Let us wait until the read operation
 			completes */
 
+			if (trx)
+			{
+				ut_usectime(&sec, &ms);
+				start_time = (ib_longlong)sec * 1000000 + ms;
+			}
 		        for (;;) {
 				mutex_enter(&block->mutex);
 
@@ -1276,6 +1313,12 @@
 				       break;
 				}
 			}
+                	if (trx)
+			{
+				ut_usectime(&sec, &ms);
+        	        	finish_time = (ib_longlong)sec * 1000000 + ms;
+                		trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
+			}
 		}
 
 		fix_type = MTR_MEMO_BUF_FIX;
@@ -1296,12 +1339,15 @@
 		/* In the case of a first access, try to apply linear
 		read-ahead */
 
-		buf_read_ahead_linear(space, offset);
+		buf_read_ahead_linear(space, offset, trx);
 	}
 
 #ifdef UNIV_IBUF_DEBUG
 	ut_a(ibuf_count_get(block->space, block->offset) == 0);
 #endif
+
+	_increment_page_get_statistics(block, trx);
+	
 	return(block->frame);		
 }
 
@@ -1326,6 +1372,7 @@
 	ibool		accessed;
 	ibool		success;
 	ulint		fix_type;
+	trx_t*          trx;
 
 	ut_ad(mtr && block);
 	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
@@ -1440,7 +1487,7 @@
 		read-ahead */
 
 		buf_read_ahead_linear(buf_frame_get_space_id(guess),
-					buf_frame_get_page_no(guess));
+					buf_frame_get_page_no(guess), trx);
 	}
 
 #ifdef UNIV_IBUF_DEBUG
@@ -1448,6 +1495,9 @@
 #endif
 	buf_pool->n_page_gets++;
 
+	trx = thr_local_get_trx(os_thread_get_curr_id());
+	_increment_page_get_statistics(block, trx);
+
 	return(TRUE);
 }
 
@@ -1470,6 +1520,7 @@
 	buf_block_t*	block;
 	ibool		success;
 	ulint		fix_type;
+	trx_t*		trx;
 
 	ut_ad(mtr);
 	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
@@ -1559,6 +1610,9 @@
 #endif
 	buf_pool->n_page_gets++;
 
+	trx = thr_local_get_trx(os_thread_get_curr_id());
+	_increment_page_get_statistics(block, trx);
+
 	return(TRUE);
 }
 
diff -Nru mysql-5.0.67.orig/innobase/buf/buf0rea.c mysql-5.0.67.microslow_and_userstats/innobase/buf/buf0rea.c
--- mysql-5.0.67.orig/innobase/buf/buf0rea.c	Mon Aug  4 15:19:12 2008
+++ mysql-5.0.67.microslow_and_userstats/innobase/buf/buf0rea.c	Wed Sep  3 12:11:39 2008
@@ -70,7 +70,8 @@
 			treat the tablespace as dropped; this is a timestamp we
 			use to stop dangling page reads from a tablespace
 			which we have DISCARDed + IMPORTed back */
-	ulint	offset)	/* in: page number */
+	ulint	offset,	/* in: page number */
+	trx_t*  trx)
 {
 	buf_block_t*	block;
 	ulint		wake_later;
@@ -140,10 +141,10 @@
 
 	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
 
-	*err = fil_io(OS_FILE_READ | wake_later,
+	*err = _fil_io(OS_FILE_READ | wake_later,
 			sync, space,
 			offset, 0, UNIV_PAGE_SIZE,
-			(void*)block->frame, (void*)block);
+			(void*)block->frame, (void*)block, trx);
 	ut_a(*err == DB_SUCCESS);
 
 	if (sync) {
@@ -174,8 +175,9 @@
 			the page at the given page number does not get
 			read even if we return a value > 0! */
 	ulint	space,	/* in: space id */
-	ulint	offset)	/* in: page number of a page which the current thread
+	ulint	offset,	/* in: page number of a page which the current thread
 			wants to access */
+	trx_t*  trx)
 {
 	ib_longlong	tablespace_version;
 	buf_block_t*	block;
@@ -270,7 +272,7 @@
 		if (!ibuf_bitmap_page(i)) {
 			count += buf_read_page_low(&err, FALSE, ibuf_mode
 					| OS_AIO_SIMULATED_WAKE_LATER,
-				        space, tablespace_version, i);
+				        space, tablespace_version, i, trx);
 			if (err == DB_TABLESPACE_DELETED) {
 				ut_print_timestamp(stderr);
 				fprintf(stderr,
@@ -314,7 +316,8 @@
 			/* out: number of page read requests issued: this can
 			be > 1 if read-ahead occurred */
 	ulint	space,	/* in: space id */
-	ulint	offset)	/* in: page number */
+	ulint	offset,	/* in: page number */
+	trx_t*  trx)
 {
 	ib_longlong	tablespace_version;
 	ulint		count;
@@ -323,13 +326,13 @@
 
 	tablespace_version = fil_space_get_version(space);
 
-	count = buf_read_ahead_random(space, offset);
+	count = buf_read_ahead_random(space, offset, trx);
 
 	/* We do the i/o in the synchronous aio mode to save thread
 	switches: hence TRUE */
 
 	count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
-					tablespace_version, offset);
+					tablespace_version, offset, trx);
         srv_buf_pool_reads+= count2;
 	if (err == DB_TABLESPACE_DELETED) {
 	        ut_print_timestamp(stderr);
@@ -374,8 +377,9 @@
 /*==================*/
 			/* out: number of page read requests issued */
 	ulint	space,	/* in: space id */
-	ulint	offset)	/* in: page number of a page; NOTE: the current thread
+	ulint	offset,	/* in: page number of a page; NOTE: the current thread
 			must want access to this page (see NOTE 3 above) */
+	trx_t*  trx)
 {
 	ib_longlong	tablespace_version;
 	buf_block_t*	block;
@@ -556,7 +560,7 @@
 		if (!ibuf_bitmap_page(i)) {
 			count += buf_read_page_low(&err, FALSE, ibuf_mode
 					| OS_AIO_SIMULATED_WAKE_LATER,
-					space, 	tablespace_version, i);
+					space, 	tablespace_version, i, trx);
 			if (err == DB_TABLESPACE_DELETED) {
 				ut_print_timestamp(stderr);
 				fprintf(stderr,
@@ -625,10 +629,10 @@
 	for (i = 0; i < n_stored; i++) {
 		if ((i + 1 == n_stored) && sync) {
 			buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE,
-				space_ids[i], space_versions[i], page_nos[i]);
+				space_ids[i], space_versions[i], page_nos[i], NULL);
 		} else {
 			buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE,
-				space_ids[i], space_versions[i], page_nos[i]);
+				space_ids[i], space_versions[i], page_nos[i], NULL);
 		}
 
 		if (err == DB_TABLESPACE_DELETED) {
@@ -704,11 +708,11 @@
 
 		if ((i + 1 == n_stored) && sync) {
 			buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
-					tablespace_version, page_nos[i]);
+					tablespace_version, page_nos[i], NULL);
 		} else {
 			buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
 					| OS_AIO_SIMULATED_WAKE_LATER,
-				       space, tablespace_version, page_nos[i]);
+				       space, tablespace_version, page_nos[i], NULL);
 		}
 	}
 	
diff -Nru mysql-5.0.67.orig/innobase/fil/fil0fil.c mysql-5.0.67.microslow_and_userstats/innobase/fil/fil0fil.c
--- mysql-5.0.67.orig/innobase/fil/fil0fil.c	Mon Aug  4 15:19:13 2008
+++ mysql-5.0.67.microslow_and_userstats/innobase/fil/fil0fil.c	Wed Sep  3 12:11:39 2008
@@ -3527,7 +3527,7 @@
 			node->name, node->handle, buf,
 			offset_low, offset_high,
 			UNIV_PAGE_SIZE * n_pages,
-			NULL, NULL);
+			NULL, NULL, NULL);
 #endif
 		if (success) {
 			node->size += n_pages;
@@ -3851,7 +3851,7 @@
 Reads or writes data. This operation is asynchronous (aio). */
 
 ulint
-fil_io(
+_fil_io(
 /*===*/
 				/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
 				if we are trying to do i/o on a tablespace
@@ -3877,8 +3877,9 @@
 	void*	buf,		/* in/out: buffer where to store read data
 				or from where to write; in aio this must be
 				appropriately aligned */
-	void*	message)	/* in: message for aio handler if non-sync
+	void*	message,	/* in: message for aio handler if non-sync
 				aio used, else ignored */
+	trx_t*  trx)
 {
 	fil_system_t*	system		= fil_system;
 	ulint		mode;
@@ -4018,7 +4019,7 @@
 #else
 	/* Queue the aio request */
 	ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
-				offset_low, offset_high, len, node, message);
+				offset_low, offset_high, len, node, message, trx);
 #endif
 	ut_a(ret);
 
diff -Nru mysql-5.0.67.orig/innobase/include/buf0rea.h mysql-5.0.67.microslow_and_userstats/innobase/include/buf0rea.h
--- mysql-5.0.67.orig/innobase/include/buf0rea.h	Mon Aug  4 15:19:13 2008
+++ mysql-5.0.67.microslow_and_userstats/innobase/include/buf0rea.h	Wed Sep  3 12:11:39 2008
@@ -10,6 +10,7 @@
 #define buf0rea_h
 
 #include "univ.i"
+#include "trx0types.h"
 #include "buf0types.h"
 
 /************************************************************************
@@ -25,7 +26,8 @@
 			/* out: number of page read requests issued: this can
 			be > 1 if read-ahead occurred */
 	ulint	space,	/* in: space id */
-	ulint	offset);/* in: page number */
+	ulint	offset,	/* in: page number */
+	trx_t*  trx);
 /************************************************************************
 Applies linear read-ahead if in the buf_pool the page is a border page of
 a linear read-ahead area and all the pages in the area have been accessed.
@@ -55,8 +57,9 @@
 /*==================*/
 			/* out: number of page read requests issued */
 	ulint	space,	/* in: space id */
-	ulint	offset);/* in: page number of a page; NOTE: the current thread
+	ulint	offset,	/* in: page number of a page; NOTE: the current thread
 			must want access to this page (see NOTE 3 above) */
+	trx_t*  trx);
 /************************************************************************
 Issues read requests for pages which the ibuf module wants to read in, in
 order to contract the insert buffer tree. Technically, this function is like
diff -Nru mysql-5.0.67.orig/innobase/include/fil0fil.h mysql-5.0.67.microslow_and_userstats/innobase/include/fil0fil.h
--- mysql-5.0.67.orig/innobase/include/fil0fil.h	Mon Aug  4 15:19:13 2008
+++ mysql-5.0.67.microslow_and_userstats/innobase/include/fil0fil.h	Wed Sep  3 12:11:39 2008
@@ -534,8 +534,11 @@
 /************************************************************************
 Reads or writes data. This operation is asynchronous (aio). */
 
+#define fil_io(type, sync, space_id, block_offset, byte_offset, len, buf, message) \
+	_fil_io(type, sync, space_id, block_offset, byte_offset, len, buf, message, NULL)
+
 ulint
-fil_io(
+_fil_io(
 /*===*/
 				/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
 				if we are trying to do i/o on a tablespace
@@ -561,8 +564,9 @@
 	void*	buf,		/* in/out: buffer where to store read data
 				or from where to write; in aio this must be
 				appropriately aligned */
-	void*	message);	/* in: message for aio handler if non-sync
+	void*	message,	/* in: message for aio handler if non-sync
 				aio used, else ignored */
+	trx_t*  trx);
 /************************************************************************
 Reads data from a space to a buffer. Remember that the possible incomplete
 blocks at the end of file are ignored: they are not taken into account when
diff -Nru mysql-5.0.67.orig/innobase/include/os0file.h mysql-5.0.67.microslow_and_userstats/innobase/include/os0file.h
--- mysql-5.0.67.orig/innobase/include/os0file.h	Mon Aug  4 15:19:14 2008
+++ mysql-5.0.67.microslow_and_userstats/innobase/include/os0file.h	Wed Sep  3 12:11:39 2008
@@ -11,6 +11,8 @@
 
 #include "univ.i"
 
+#include "trx0types.h"
+
 #ifndef __WIN__
 #include <dirent.h>
 #include <sys/stat.h>
@@ -421,8 +423,11 @@
 /***********************************************************************
 Requests a synchronous read operation. */
 
+#define os_file_read(file, buf, offset, offset_high, n)         \
+		_os_file_read(file, buf, offset, offset_high, n, NULL)
+
 ibool
-os_file_read(
+_os_file_read(
 /*=========*/
 				/* out: TRUE if request was
 				successful, FALSE if fail */
@@ -432,7 +437,8 @@
 				offset where to read */
 	ulint		offset_high,/* in: most significant 32 bits of
 				offset */
-	ulint		n);	/* in: number of bytes to read */	
+	ulint		n,	/* in: number of bytes to read */
+	trx_t*		trx);
 /***********************************************************************
 Rewind file to its start, read at most size - 1 bytes from it to str, and
 NUL-terminate str. All errors are silently ignored. This function is
@@ -584,7 +590,8 @@
 				can be used to identify a completed aio
 				operation); if mode is OS_AIO_SYNC, these
 				are ignored */
-	void*		message2);
+	void*		message2,
+	trx_t*          trx);
 /****************************************************************************
 Wakes up all async i/o threads so that they know to exit themselves in
 shutdown. */
diff -Nru mysql-5.0.67.orig/innobase/include/thr0loc.h mysql-5.0.67.microslow_and_userstats/innobase/include/thr0loc.h
--- mysql-5.0.67.orig/innobase/include/thr0loc.h	Mon Aug  4 15:19:15 2008
+++ mysql-5.0.67.microslow_and_userstats/innobase/include/thr0loc.h	Wed Sep  3 12:11:39 2008
@@ -15,6 +15,7 @@
 
 #include "univ.i"
 #include "os0thread.h"
+#include "trx0trx.h"
 
 /********************************************************************
 Initializes the thread local storage module. */
@@ -36,6 +37,14 @@
 /*===========*/
 	os_thread_id_t	id);	/* in: thread id */
 /***********************************************************************
+Gets trx */
+
+trx_t*
+thr_local_get_trx(
+/*==================*/
+				/* out: trx for mysql */
+	os_thread_id_t	id);	/* in: thread id of the thread */
+/***********************************************************************
 Gets the slot number in the thread table of a thread. */
 
 ulint
@@ -46,6 +55,14 @@
 /***********************************************************************
 Sets in the local storage the slot number in the thread table of a thread. */
 
+void
+thr_local_set_trx(
+/*==================*/
+	os_thread_id_t	id,	/* in: thread id of the thread */
+	trx_t*		trx);	/* in: slot number */
+/***********************************************************************
+Sets in the local storage the slot number in the thread table of a thread. */
+
 void
 thr_local_set_slot_no(
 /*==================*/
diff -Nru mysql-5.0.67.orig/innobase/include/trx0trx.h mysql-5.0.67.microslow_and_userstats/innobase/include/trx0trx.h
--- mysql-5.0.67.orig/innobase/include/trx0trx.h	Mon Aug  4 15:19:15 2008
+++ mysql-5.0.67.microslow_and_userstats/innobase/include/trx0trx.h	Wed Sep  3 12:11:39 2008
@@ -668,6 +668,17 @@
 	/*------------------------------*/
 	char detailed_error[256];	/* detailed error message for last
 					error, or empty. */
+	/*------------------------------*/
+	os_thread_id_t	trx_thread_id;
+	ulint		io_reads;
+	ib_longlong     io_read;
+	ulint		io_reads_wait_timer;
+	ib_longlong     lock_que_wait_ustarted;
+	ulint           lock_que_wait_timer;
+	ulint           innodb_que_wait_timer;
+	ulint           distinct_page_access;
+#define	DPAH_SIZE	8192
+	byte*		distinct_page_access_hash;
 };
 
 #define TRX_MAX_N_THREADS	32	/* maximum number of concurrent
diff -Nru mysql-5.0.67.orig/innobase/lock/lock0lock.c mysql-5.0.67.microslow_and_userstats/innobase/lock/lock0lock.c
--- mysql-5.0.67.orig/innobase/lock/lock0lock.c	Mon Aug  4 15:19:16 2008
+++ mysql-5.0.67.microslow_and_userstats/innobase/lock/lock0lock.c	Wed Sep  3 12:11:39 2008
@@ -1806,6 +1806,8 @@
 {
 	lock_t*	lock;
 	trx_t*	trx;
+	ulint   sec;
+	ulint   ms;
 	
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(mutex_own(&kernel_mutex));
@@ -1861,6 +1863,8 @@
 	trx->que_state = TRX_QUE_LOCK_WAIT;
 	trx->was_chosen_as_deadlock_victim = FALSE;
 	trx->wait_started = time(NULL);
+	ut_usectime(&sec, &ms);
+	trx->lock_que_wait_ustarted = (ib_longlong)sec * 1000000 + ms;
 
 	ut_a(que_thr_stop(thr));
 
@@ -3514,7 +3518,9 @@
 {
 	lock_t*	lock;
 	trx_t*	trx;
-	
+	ulint   sec;
+	ulint   ms;
+
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(mutex_own(&kernel_mutex));
 #endif /* UNIV_SYNC_DEBUG */
@@ -3563,7 +3569,10 @@
 	
 		return(DB_SUCCESS);
 	}
-	
+
+	trx->wait_started = time(NULL);
+	ut_usectime(&sec, &ms);
+	trx->lock_que_wait_ustarted = (ib_longlong)sec * 1000000 + ms;
 	trx->que_state = TRX_QUE_LOCK_WAIT;
 	trx->was_chosen_as_deadlock_victim = FALSE;
 	trx->wait_started = time(NULL);
@@ -4289,7 +4298,7 @@
 	ulint	i;
 	mtr_t	mtr;
 	trx_t*	trx;
-
+	
 	fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n");
 
 	/* First print info on non-active transactions */
diff -Nru mysql-5.0.67.orig/innobase/os/os0file.c mysql-5.0.67.microslow_and_userstats/innobase/os/os0file.c
--- mysql-5.0.67.orig/innobase/os/os0file.c	Mon Aug  4 15:19:16 2008
+++ mysql-5.0.67.microslow_and_userstats/innobase/os/os0file.c	Wed Sep  3 12:11:39 2008
@@ -14,6 +14,7 @@
 #include "srv0start.h"
 #include "fil0fil.h"
 #include "buf0buf.h"
+#include "trx0sys.h"
 
 #if defined(UNIV_HOTBACKUP) && defined(__WIN__)
 /* Add includes for the _stat() call to compile on Windows */
@@ -101,6 +102,7 @@
 	struct aiocb	control;	/* Posix control block for aio
 					request */
 #endif
+        trx_t*		trx;
 };
 
 /* The aio array structure */
@@ -1903,9 +1905,13 @@
 #ifndef __WIN__
 /***********************************************************************
 Does a synchronous read operation in Posix. */
+
+#define os_file_pread(file, buf, n, offset, offset_high)        \
+		_os_file_pread(file, buf, n, offset, offset_high, NULL);
+
 static
 ssize_t
-os_file_pread(
+_os_file_pread(
 /*==========*/
 				/* out: number of bytes read, -1 if error */
 	os_file_t	file,	/* in: handle to a file */
@@ -1913,12 +1919,17 @@
 	ulint		n,	/* in: number of bytes to read */	
 	ulint		offset,	/* in: least significant 32 bits of file
 				offset from where to read */
-	ulint		offset_high) /* in: most significant 32 bits of
+	ulint		offset_high, /* in: most significant 32 bits of
 				offset */
+        trx_t*		trx)
 {
         off_t	offs;
 	ssize_t	n_bytes;
-
+	ulint           sec;
+	ulint           ms;
+	ib_longlong     start_time;
+	ib_longlong     finish_time;
+	
 	ut_a((offset & 0xFFFFFFFFUL) == offset);
         
         /* If off_t is > 4 bytes in size, then we assume we can pass a
@@ -1937,7 +1948,13 @@
         }
 
 	os_n_file_reads++;
-
+	if (trx)
+	{
+	        trx->io_reads++;
+		trx->io_read += n;
+		ut_usectime(&sec, &ms);
+		start_time = (ib_longlong)sec * 1000000 + ms;
+	}
 #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
         os_mutex_enter(os_file_count_mutex);
 	os_file_n_pending_preads++;
@@ -1951,6 +1968,13 @@
 	os_n_pending_reads--;
         os_mutex_exit(os_file_count_mutex);
 
+        if (trx)
+        {
+		ut_usectime(&sec, &ms);
+        	finish_time = (ib_longlong)sec * 1000000 + ms;
+                trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
+	}
+
 	return(n_bytes);
 #else
 	{
@@ -1981,6 +2005,13 @@
 	os_n_pending_reads--;
         os_mutex_exit(os_file_count_mutex);
 
+        if (trx)
+        {
+		ut_usectime(&sec, &ms);
+        	finish_time = (ib_longlong)sec * 1000000 + ms;
+                trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
+	}
+
 	return(ret);
 	}
 #endif
@@ -2103,7 +2134,7 @@
 Requests a synchronous positioned read operation. */
 
 ibool
-os_file_read(
+_os_file_read(
 /*=========*/
 				/* out: TRUE if request was
 				successful, FALSE if fail */
@@ -2113,7 +2144,8 @@
 				offset where to read */
 	ulint		offset_high, /* in: most significant 32 bits of
 				offset */
-	ulint		n)	/* in: number of bytes to read */	
+	ulint		n,	/* in: number of bytes to read */
+        trx_t*		trx)
 {
 #ifdef __WIN__
 	BOOL		ret;
@@ -2128,8 +2160,7 @@
 
 	os_n_file_reads++;
 	os_bytes_read_since_printout += n;
-
-try_again:	
+try_again:
 	ut_ad(file);
 	ut_ad(buf);
 	ut_ad(n > 0);
@@ -2177,7 +2208,7 @@
 	os_bytes_read_since_printout += n;
 
 try_again:
-	ret = os_file_pread(file, buf, n, offset, offset_high);
+	ret = _os_file_pread(file, buf, n, offset, offset_high, trx);
 
 	if ((ulint)ret == n) {
 
@@ -3137,7 +3168,8 @@
 				offset */
 	ulint		offset_high, /* in: most significant 32 bits of
 				offset */
-	ulint		len)	/* in: length of the block to read or write */
+	ulint		len,	/* in: length of the block to read or write */
+	trx_t*          trx)
 {
 	os_aio_slot_t*	slot;
 #ifdef WIN_ASYNC_IO
@@ -3196,7 +3228,7 @@
 	slot->offset   = offset;
 	slot->offset_high = offset_high;
 	slot->io_already_done = FALSE;
-	
+
 #ifdef WIN_ASYNC_IO		
 	control = &(slot->control);
 	control->Offset = (DWORD)offset;
@@ -3390,7 +3422,8 @@
 				can be used to identify a completed aio
 				operation); if mode is OS_AIO_SYNC, these
 				are ignored */
-	void*		message2)
+	void*		message2,
+	trx_t*          trx)
 {
 	os_aio_array_t*	array;
 	os_aio_slot_t*	slot;
@@ -3429,8 +3462,8 @@
 		wait in the Windows case. */
 
 		if (type == OS_FILE_READ) {
-			return(os_file_read(file, buf, offset,
-							offset_high, n));
+			return(_os_file_read(file, buf, offset,
+							offset_high, n, trx));
 		}
 
 		ut_a(type == OS_FILE_WRITE);
@@ -3463,14 +3496,19 @@
 		ut_error;
 	}
 	
+	if (trx && type == OS_FILE_READ)
+	{
+		trx->io_reads++;
+		trx->io_read += n;
+	}
 	slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
-					name, buf, offset, offset_high, n);
+					name, buf, offset, offset_high, n, trx);
 	if (type == OS_FILE_READ) {
 		if (os_aio_use_native_aio) {
 #ifdef WIN_ASYNC_IO
 			os_n_file_reads++;
 			os_bytes_read_since_printout += len;
-			
+
 			ret = ReadFile(file, buf, (DWORD)n, &len,
 							&(slot->control));
 #elif defined(POSIX_ASYNC_IO)
@@ -4038,7 +4076,7 @@
 
 			ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs, 
 						consecutive_ios[i]->len);
-			offs += consecutive_ios[i]->len;
+			offs += consecutive_ios[i]->len;			
 		}
 	}
 
@@ -4050,9 +4088,8 @@
 
 	/* Mark the i/os done in slots */
 
-	for (i = 0; i < n_consecutive; i++) {
+	for (i = 0; i < n_consecutive; i++) 
 		consecutive_ios[i]->io_already_done = TRUE;
-	}
 
 	/* We return the messages for the first slot now, and if there were
 	several slots, the messages will be returned with subsequent calls
diff -Nru mysql-5.0.67.orig/innobase/srv/srv0srv.c mysql-5.0.67.microslow_and_userstats/innobase/srv/srv0srv.c
--- mysql-5.0.67.orig/innobase/srv/srv0srv.c	Mon Aug  4 15:19:17 2008
+++ mysql-5.0.67.microslow_and_userstats/innobase/srv/srv0srv.c	Wed Sep  3 12:11:39 2008
@@ -996,6 +996,10 @@
 	ibool			has_slept = FALSE;
 	srv_conc_slot_t*	slot	  = NULL;
 	ulint			i;
+	ib_longlong             start_time = 0L;
+	ib_longlong             finish_time = 0L;
+	ulint                   sec;
+	ulint                   ms;
 
 	/* If trx has 'free tickets' to enter the engine left, then use one
 	such ticket */
@@ -1054,6 +1058,7 @@
     if (SRV_THREAD_SLEEP_DELAY > 0)
     {
       os_thread_sleep(SRV_THREAD_SLEEP_DELAY);
+      trx->innodb_que_wait_timer += SRV_THREAD_SLEEP_DELAY;
     }
 
 		trx->op_info = "";
@@ -1109,12 +1114,19 @@
 	/* Go to wait for the event; when a thread leaves InnoDB it will
 	release this thread */
 
+	ut_usectime(&sec, &ms);
+	start_time = (ib_longlong)sec * 1000000 + ms;
+
 	trx->op_info = "waiting in InnoDB queue";
 
 	os_event_wait(slot->event);
 
 	trx->op_info = "";
 
+	ut_usectime(&sec, &ms);
+	finish_time = (ib_longlong)sec * 1000000 + ms;
+	trx->innodb_que_wait_timer += (ulint)(finish_time - start_time);
+
 	os_fast_mutex_lock(&srv_conc_mutex);
 
 	srv_conc_n_waiting_threads--;
diff -Nru mysql-5.0.67.orig/innobase/thr/thr0loc.c mysql-5.0.67.microslow_and_userstats/innobase/thr/thr0loc.c
--- mysql-5.0.67.orig/innobase/thr/thr0loc.c	Mon Aug  4 15:19:17 2008
+++ mysql-5.0.67.microslow_and_userstats/innobase/thr/thr0loc.c	Wed Sep  3 12:11:39 2008
@@ -45,6 +45,7 @@
 				for this thread */
 	ibool		in_ibuf;/* TRUE if the the thread is doing an ibuf
 				operation */
+	trx_t*          trx;
 	hash_node_t	hash;	/* hash chain node */
 	ulint		magic_n;
 };
@@ -113,6 +114,29 @@
 }
 
 /***********************************************************************
+Gets trx */
+
+trx_t*
+thr_local_get_trx(
+/*==================*/
+				/* out: trx for mysql */
+	os_thread_id_t	id)	/* in: thread id of the thread */
+{
+	trx_t*          trx;
+	thr_local_t*	local;
+
+	mutex_enter(&thr_local_mutex);
+
+	local = thr_local_get(id);
+
+	trx = local->trx;
+
+	mutex_exit(&thr_local_mutex);
+
+	return(trx);
+}
+
+/***********************************************************************
 Sets the slot number in the thread table of a thread. */
 
 void
@@ -124,11 +148,31 @@
 	thr_local_t*	local;
 
 	mutex_enter(&thr_local_mutex);
-	
+
 	local = thr_local_get(id);
 
 	local->slot_no = slot_no;
-	
+
+	mutex_exit(&thr_local_mutex);
+}
+
+/***********************************************************************
+Sets trx */
+
+void
+thr_local_set_trx(
+/*==================*/
+	os_thread_id_t	id,	/* in: thread id of the thread */
+	trx_t*		trx)	/* in: trx */
+{
+	thr_local_t*	local;
+
+	mutex_enter(&thr_local_mutex);
+
+	local = thr_local_get(id);
+
+	local->trx = trx;
+
 	mutex_exit(&thr_local_mutex);
 }
 
@@ -172,6 +216,7 @@
 	local->magic_n = THR_LOCAL_MAGIC_N;
 
  	local->in_ibuf = FALSE;
+ 	local->trx = NULL;
 	
 	mutex_enter(&thr_local_mutex);
 
diff -Nru mysql-5.0.67.orig/innobase/trx/trx0trx.c mysql-5.0.67.microslow_and_userstats/innobase/trx/trx0trx.c
--- mysql-5.0.67.orig/innobase/trx/trx0trx.c	Mon Aug  4 15:19:17 2008
+++ mysql-5.0.67.microslow_and_userstats/innobase/trx/trx0trx.c	Wed Sep  3 12:11:39 2008
@@ -190,6 +190,16 @@
 	trx->global_read_view_heap = mem_heap_create(256);
 	trx->global_read_view = NULL;
 	trx->read_view = NULL;
+	
+	trx->io_reads = 0;
+	trx->io_read = 0;
+	trx->io_reads_wait_timer = 0;
+	trx->lock_que_wait_timer = 0;
+	trx->innodb_que_wait_timer = 0;
+	trx->distinct_page_access = 0;
+	trx->distinct_page_access_hash = NULL;
+	trx->trx_thread_id = os_thread_get_curr_id();
+	thr_local_set_trx(trx->trx_thread_id, NULL);
 
 	/* Set X/Open XA transaction identification to NULL */
 	memset(&trx->xid, 0, sizeof(trx->xid));
@@ -230,6 +240,10 @@
 
 	trx->mysql_process_no = os_proc_get_number();
 	
+	trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE);
+	memset(trx->distinct_page_access_hash, 0, DPAH_SIZE);
+	thr_local_set_trx(trx->mysql_thread_id, trx);
+
 	return(trx);
 }
 
@@ -355,6 +369,8 @@
 
 	ut_a(trx->read_view == NULL);
 	
+	thr_local_free(trx->trx_thread_id);
+	
 	mem_free(trx);
 }
 
@@ -366,6 +382,12 @@
 /*===============*/
 	trx_t*	trx)	/* in, own: trx object */
 {
+	if (trx->distinct_page_access_hash)
+	{
+		mem_free(trx->distinct_page_access_hash);
+		trx->distinct_page_access_hash= NULL;
+	}
+
 	thr_local_free(trx->mysql_thread_id);
 
 	mutex_enter(&kernel_mutex);
@@ -1064,7 +1086,10 @@
 	trx_t*	trx)	/* in: transaction */
 {
 	que_thr_t*	thr;
-
+	ulint           sec;
+	ulint           ms;
+	ib_longlong     now;
+	
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(mutex_own(&kernel_mutex));
 #endif /* UNIV_SYNC_DEBUG */
@@ -1080,6 +1105,9 @@
 		thr = UT_LIST_GET_FIRST(trx->wait_thrs);
 	}
 
+	ut_usectime(&sec, &ms);
+	now = (ib_longlong)sec * 1000000 + ms;
+	trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted);
 	trx->que_state = TRX_QUE_RUNNING;
 }
 
@@ -1093,6 +1121,9 @@
 	trx_t*	trx)	/* in: transaction in the TRX_QUE_LOCK_WAIT state */
 {
 	que_thr_t*	thr;
+	ulint           sec;
+	ulint           ms;
+	ib_longlong     now;
 
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(mutex_own(&kernel_mutex));
@@ -1109,6 +1140,9 @@
 		thr = UT_LIST_GET_FIRST(trx->wait_thrs);
 	}
 
+	ut_usectime(&sec, &ms);
+	now = (ib_longlong)sec * 1000000 + ms;
+	trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted);
 	trx->que_state = TRX_QUE_RUNNING;
 }
 
diff -Nru mysql-5.0.67.orig/scripts/mysqldumpslow.sh mysql-5.0.67.microslow_and_userstats/scripts/mysqldumpslow.sh
--- mysql-5.0.67.orig/scripts/mysqldumpslow.sh	Mon Aug  4 15:20:02 2008
+++ mysql-5.0.67.microslow_and_userstats/scripts/mysqldumpslow.sh	Wed Sep  3 12:11:39 2008
@@ -83,8 +83,8 @@
     s/^#? Time: \d{6}\s+\d+:\d+:\d+.*\n//;
     my ($user,$host) = s/^#? User\@Host:\s+(\S+)\s+\@\s+(\S+).*\n// ? ($1,$2) : ('','');
 
-    s/^# Query_time: (\d+)  Lock_time: (\d+)  Rows_sent: (\d+).*\n//;
-    my ($t, $l, $r) = ($1, $2, $3);
+    s/^# Query_time: (\d+(\.\d+)?)  Lock_time: (\d+(\.\d+)?)  Rows_sent: (\d+(\.\d+)?).*\n//;
+    my ($t, $l, $r) = ($1, $3, $5);
     $t -= $l unless $opt{l};
 
     # remove fluff that mysqld writes to log when it (re)starts:
diff -Nru mysql-5.0.67.orig/sql/filesort.cc mysql-5.0.67.microslow_and_userstats/sql/filesort.cc
--- mysql-5.0.67.orig/sql/filesort.cc	Mon Aug  4 15:20:03 2008
+++ mysql-5.0.67.microslow_and_userstats/sql/filesort.cc	Wed Sep  3 12:11:39 2008
@@ -180,6 +180,7 @@
   {
     statistic_increment(thd->status_var.filesort_scan_count, &LOCK_status);
   }
+  thd->query_plan_flags|= QPLAN_FILESORT;
 #ifdef CAN_TRUST_RANGE
   if (select && select->quick && select->quick->records > 0L)
   {
@@ -245,6 +246,7 @@
   }
   else
   {
+    thd->query_plan_flags|= QPLAN_FILESORT_DISK;
     if (table_sort.buffpek && table_sort.buffpek_len < maxbuffer)
     {
       x_free(table_sort.buffpek);
@@ -1076,6 +1078,7 @@
 
   statistic_increment(current_thd->status_var.filesort_merge_passes,
 		      &LOCK_status);
+  current_thd->query_plan_fsort_passes++;
   if (param->not_killable)
   {
     killed= &not_killable;
diff -Nru mysql-5.0.67.orig/sql/ha_innodb.cc mysql-5.0.67.microslow_and_userstats/sql/ha_innodb.cc
--- mysql-5.0.67.orig/sql/ha_innodb.cc	Mon Aug  4 15:20:03 2008
+++ mysql-5.0.67.microslow_and_userstats/sql/ha_innodb.cc	Wed Sep  3 12:11:39 2008
@@ -1,3 +1,4 @@
+
 /* Copyright (C) 2000-2005 MySQL AB & Innobase Oy
 
    This program is free software; you can redistribute it and/or modify
@@ -3286,6 +3287,8 @@
 
 	error = row_insert_for_mysql((byte*) record, prebuilt);
 
+        if (error == DB_SUCCESS) rows_changed++;
+
 	if (error == DB_SUCCESS && auto_inc_used) {
 
         	/* Fetch the value that was set in the autoincrement field */
@@ -3558,6 +3561,8 @@
 		}
 	}
 
+	if (error == DB_SUCCESS) rows_changed++;
+
 	innodb_srv_conc_exit_innodb(prebuilt->trx);
 
 	error = convert_error_code_to_mysql(error, user_thd);
@@ -3606,6 +3611,8 @@
 
 	error = row_update_for_mysql((byte*) record, prebuilt);
 
+	if (error == DB_SUCCESS) rows_changed++;
+
 	innodb_srv_conc_exit_innodb(prebuilt->trx);
 
 	error = convert_error_code_to_mysql(error, user_thd);
@@ -3885,6 +3892,9 @@
 	if (ret == DB_SUCCESS) {
 		error = 0;
 		table->status = 0;
+                rows_read++;
+                if (active_index >= 0 && active_index < MAX_KEY)
+                        index_rows_read[active_index]++;
 
 	} else if (ret == DB_RECORD_NOT_FOUND) {
 		error = HA_ERR_KEY_NOT_FOUND;
@@ -4038,6 +4048,9 @@
 	if (ret == DB_SUCCESS) {
 		error = 0;
 		table->status = 0;
+                rows_read++;
+                if (active_index >= 0 && active_index < MAX_KEY)
+                        index_rows_read[active_index]++;
 
 	} else if (ret == DB_RECORD_NOT_FOUND) {
 		error = HA_ERR_END_OF_FILE;
@@ -6096,6 +6109,7 @@
 {
 	row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
 	trx_t*		trx;
+	int i;
 
   	DBUG_ENTER("ha_innobase::external_lock");
 	DBUG_PRINT("enter",("lock_type: %d", lock_type));
@@ -6219,7 +6233,24 @@
 
 	if (trx->n_mysql_tables_in_use == 0) {
 
-	        trx->mysql_n_tables_locked = 0;
+		current_thd->innodb_was_used = TRUE;
+		current_thd->innodb_io_reads += trx->io_reads;
+		current_thd->innodb_io_read += trx->io_read;
+		current_thd->innodb_io_reads_wait_timer += trx->io_reads_wait_timer;
+		current_thd->innodb_lock_que_wait_timer += trx->lock_que_wait_timer;
+		current_thd->innodb_innodb_que_wait_timer += trx->innodb_que_wait_timer;
+                current_thd->innodb_page_access += trx->distinct_page_access;
+
+		trx->io_reads = 0;
+		trx->io_read = 0;
+		trx->io_reads_wait_timer = 0;
+		trx->lock_que_wait_timer = 0;
+		trx->innodb_que_wait_timer = 0;
+		trx->distinct_page_access = 0;
+		if (trx->distinct_page_access_hash)
+			memset(trx->distinct_page_access_hash, 0, DPAH_SIZE);
+
+    		trx->mysql_n_tables_locked = 0;
 		prebuilt->used_in_HANDLER = FALSE;
 
 		if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
diff -Nru mysql-5.0.67.orig/sql/ha_innodb.cc.orig mysql-5.0.67.microslow_and_userstats/sql/ha_innodb.cc.orig
--- mysql-5.0.67.orig/sql/ha_innodb.cc.orig	Thu Jan  1 02:00:00 1970
+++ mysql-5.0.67.microslow_and_userstats/sql/ha_innodb.cc.orig	Wed Sep  3 12:07:46 2008
@@ -0,0 +1,7421 @@
+/* Copyright (C) 2000-2005 MySQL AB & Innobase Oy
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+/* This file defines the InnoDB handler: the interface between MySQL and InnoDB
+NOTE: You can only use noninlined InnoDB functions in this file, because we
+have disabled the InnoDB inlining in this file. */
+
+/* TODO list for the InnoDB handler in 5.0:
+  - Remove the flag trx->active_trans and look at the InnoDB
+    trx struct state field
+  - fix savepoint functions to use savepoint storage area
+  - Find out what kind of problems the OS X case-insensitivity causes to
+    table and database names; should we 'normalize' the names like we do
+    in Windows?
+*/
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation				// gcc: Class implementation
+#endif
+
+#include "mysql_priv.h"
+#include "slave.h"
+
+#ifdef HAVE_INNOBASE_DB
+#include <m_ctype.h>
+#include <hash.h>
+#include <myisampack.h>
+#include <mysys_err.h>
+#include <my_sys.h>
+
+#define MAX_ULONG_BIT ((ulong) 1 << (sizeof(ulong)*8-1))
+
+#include "ha_innodb.h"
+
+pthread_mutex_t innobase_share_mutex, /* to protect innobase_open_files */
+                prepare_commit_mutex; /* to force correct commit order in
+				      binlog */
+ulong commit_threads= 0;
+pthread_mutex_t commit_threads_m;
+pthread_cond_t commit_cond;
+pthread_mutex_t commit_cond_m;
+bool innodb_inited= 0;
+
+/*-----------------------------------------------------------------*/
+/* These variables are used to implement (semi-)synchronous MySQL binlog
+replication for InnoDB tables. */
+
+pthread_cond_t  innobase_repl_cond;             /* Posix cond variable;
+                                                this variable is signaled
+                                                when enough binlog has been
+                                                sent to slave, so that a
+                                                waiting trx can return the
+                                                'ok' message to the client
+                                                for a commit */
+pthread_mutex_t innobase_repl_cond_mutex;       /* Posix cond variable mutex
+                                                that also protects the next
+                                                innobase_repl_... variables */
+uint            innobase_repl_state;            /* 1 if synchronous replication
+                                                is switched on and is working
+                                                ok; else 0 */
+uint            innobase_repl_file_name_inited  = 0; /* This is set to 1 when
+                                                innobase_repl_file_name
+                                                contains meaningful data */
+char*           innobase_repl_file_name;        /* The binlog name up to which
+                                                we have sent some binlog to
+                                                the slave */
+my_off_t        innobase_repl_pos;              /* The position in that file
+                                                up to which we have sent the
+                                                binlog to the slave */
+uint            innobase_repl_n_wait_threads    = 0; /* This tells how many
+                                                transactions currently are
+                                                waiting for the binlog to be
+                                                sent to the client */
+uint            innobase_repl_wait_file_name_inited = 0; /* This is set to 1
+                                                when we know the 'smallest'
+                                                wait position */
+char*           innobase_repl_wait_file_name;   /* NULL, or the 'smallest'
+                                                innobase_repl_file_name that
+                                                a transaction is waiting for */
+my_off_t        innobase_repl_wait_pos;         /* The smallest position in
+                                                that file that a trx is
+                                                waiting for: the trx can
+                                                proceed and send an 'ok' to
+                                                the client when MySQL has sent
+                                                the binlog up to this position
+                                                to the slave */
+/*-----------------------------------------------------------------*/
+
+
+
+/* Store MySQL definition of 'byte': in Linux it is char while InnoDB
+uses unsigned char; the header univ.i which we include next defines
+'byte' as a macro which expands to 'unsigned char' */
+
+typedef byte	mysql_byte;
+
+#define INSIDE_HA_INNOBASE_CC
+
+/* Include necessary InnoDB headers */
+extern "C" {
+#include "../innobase/include/univ.i"
+#include "../innobase/include/os0file.h"
+#include "../innobase/include/os0thread.h"
+#include "../innobase/include/srv0start.h"
+#include "../innobase/include/srv0srv.h"
+#include "../innobase/include/trx0roll.h"
+#include "../innobase/include/trx0trx.h"
+#include "../innobase/include/trx0sys.h"
+#include "../innobase/include/mtr0mtr.h"
+#include "../innobase/include/row0ins.h"
+#include "../innobase/include/row0mysql.h"
+#include "../innobase/include/row0sel.h"
+#include "../innobase/include/row0upd.h"
+#include "../innobase/include/log0log.h"
+#include "../innobase/include/lock0lock.h"
+#include "../innobase/include/dict0crea.h"
+#include "../innobase/include/btr0cur.h"
+#include "../innobase/include/btr0btr.h"
+#include "../innobase/include/fsp0fsp.h"
+#include "../innobase/include/sync0sync.h"
+#include "../innobase/include/fil0fil.h"
+#include "../innobase/include/trx0xa.h"
+}
+
+#define HA_INNOBASE_ROWS_IN_TABLE 10000 /* to get optimization right */
+#define HA_INNOBASE_RANGE_COUNT	  100
+
+ulong 	innobase_large_page_size = 0;
+
+/* The default values for the following, type long or longlong, start-up
+parameters are declared in mysqld.cc: */
+
+long innobase_mirrored_log_groups, innobase_log_files_in_group,
+     innobase_log_buffer_size, innobase_buffer_pool_awe_mem_mb,
+     innobase_additional_mem_pool_size, innobase_file_io_threads,
+     innobase_lock_wait_timeout, innobase_force_recovery,
+     innobase_open_files;
+
+longlong innobase_buffer_pool_size, innobase_log_file_size;
+
+/* The default values for the following char* start-up parameters
+are determined in innobase_init below: */
+
+char*	innobase_data_home_dir			= NULL;
+char*	innobase_data_file_path 		= NULL;
+char*	innobase_log_group_home_dir		= NULL;
+char*	innobase_log_arch_dir			= NULL;/* unused */
+/* The following has a misleading name: starting from 4.0.5, this also
+affects Windows: */
+char*	innobase_unix_file_flush_method		= NULL;
+
+/* Below we have boolean-valued start-up parameters, and their default
+values */
+
+ulong	innobase_fast_shutdown			= 1;
+my_bool innobase_log_archive			= FALSE;/* unused */
+my_bool innobase_use_doublewrite    = TRUE;
+my_bool innobase_use_checksums      = TRUE;
+my_bool innobase_use_large_pages    = FALSE;
+my_bool	innobase_use_native_aio			= FALSE;
+my_bool	innobase_file_per_table			= FALSE;
+my_bool innobase_locks_unsafe_for_binlog        = FALSE;
+my_bool innobase_rollback_on_timeout		= FALSE;
+my_bool innobase_create_status_file		= FALSE;
+my_bool innobase_adaptive_hash_index		= TRUE;
+
+static char *internal_innobase_data_file_path	= NULL;
+
+/* The following counter is used to convey information to InnoDB
+about server activity: in selects it is not sensible to call
+srv_active_wake_master_thread after each fetch or search, we only do
+it every INNOBASE_WAKE_INTERVAL'th step. */
+
+#define INNOBASE_WAKE_INTERVAL	32
+ulong	innobase_active_counter	= 0;
+
+static HASH 	innobase_open_tables;
+
+#ifdef __NETWARE__  	/* some special cleanup for NetWare */
+bool nw_panic = FALSE;
+#endif
+
+static mysql_byte* innobase_get_key(INNOBASE_SHARE *share,uint *length,
+			      my_bool not_used __attribute__((unused)));
+static INNOBASE_SHARE *get_share(const char *table_name);
+static void free_share(INNOBASE_SHARE *share);
+static int innobase_close_connection(THD* thd);
+static int innobase_commit(THD* thd, bool all);
+static int innobase_rollback(THD* thd, bool all);
+static int innobase_rollback_to_savepoint(THD* thd, void *savepoint);
+static int innobase_savepoint(THD* thd, void *savepoint);
+static int innobase_release_savepoint(THD* thd, void *savepoint);
+
+handlerton innobase_hton = {
+  "InnoDB",
+  SHOW_OPTION_YES,
+  "Supports transactions, row-level locking, and foreign keys",
+  DB_TYPE_INNODB,
+  innobase_init,
+  0,				/* slot */
+  sizeof(trx_named_savept_t),	/* savepoint size. TODO: use it */
+  innobase_close_connection,
+  innobase_savepoint,
+  innobase_rollback_to_savepoint,
+  innobase_release_savepoint,
+  innobase_commit,		/* commit */
+  innobase_rollback,		/* rollback */
+  innobase_xa_prepare,		/* prepare */
+  innobase_xa_recover,		/* recover */
+  innobase_commit_by_xid,	/* commit_by_xid */
+  innobase_rollback_by_xid,     /* rollback_by_xid */
+  innobase_create_cursor_view,
+  innobase_set_cursor_view,
+  innobase_close_cursor_view,
+  HTON_NO_FLAGS
+};
+
+/*********************************************************************
+Commits a transaction in an InnoDB database. */
+
+void
+innobase_commit_low(
+/*================*/
+	trx_t*	trx);	/* in: transaction handle */
+
+struct show_var_st innodb_status_variables[]= {
+  {"buffer_pool_pages_data",
+  (char*) &export_vars.innodb_buffer_pool_pages_data,     SHOW_LONG},
+  {"buffer_pool_pages_dirty",
+  (char*) &export_vars.innodb_buffer_pool_pages_dirty,    SHOW_LONG},
+  {"buffer_pool_pages_flushed",
+  (char*) &export_vars.innodb_buffer_pool_pages_flushed,  SHOW_LONG},
+  {"buffer_pool_pages_free",
+  (char*) &export_vars.innodb_buffer_pool_pages_free,     SHOW_LONG},
+  {"buffer_pool_pages_latched",
+  (char*) &export_vars.innodb_buffer_pool_pages_latched,  SHOW_LONG},
+  {"buffer_pool_pages_misc",
+  (char*) &export_vars.innodb_buffer_pool_pages_misc,     SHOW_LONG},
+  {"buffer_pool_pages_total",
+  (char*) &export_vars.innodb_buffer_pool_pages_total,    SHOW_LONG},
+  {"buffer_pool_read_ahead_rnd",
+  (char*) &export_vars.innodb_buffer_pool_read_ahead_rnd, SHOW_LONG},
+  {"buffer_pool_read_ahead_seq",
+  (char*) &export_vars.innodb_buffer_pool_read_ahead_seq, SHOW_LONG},
+  {"buffer_pool_read_requests",
+  (char*) &export_vars.innodb_buffer_pool_read_requests,  SHOW_LONG},
+  {"buffer_pool_reads",
+  (char*) &export_vars.innodb_buffer_pool_reads,          SHOW_LONG},
+  {"buffer_pool_wait_free",
+  (char*) &export_vars.innodb_buffer_pool_wait_free,      SHOW_LONG},
+  {"buffer_pool_write_requests",
+  (char*) &export_vars.innodb_buffer_pool_write_requests, SHOW_LONG},
+  {"data_fsyncs",
+  (char*) &export_vars.innodb_data_fsyncs,                SHOW_LONG},
+  {"data_pending_fsyncs",
+  (char*) &export_vars.innodb_data_pending_fsyncs,        SHOW_LONG},
+  {"data_pending_reads",
+  (char*) &export_vars.innodb_data_pending_reads,         SHOW_LONG},
+  {"data_pending_writes",
+  (char*) &export_vars.innodb_data_pending_writes,        SHOW_LONG},
+  {"data_read",
+  (char*) &export_vars.innodb_data_read,                  SHOW_LONG},
+  {"data_reads",
+  (char*) &export_vars.innodb_data_reads,                 SHOW_LONG},
+  {"data_writes",
+  (char*) &export_vars.innodb_data_writes,                SHOW_LONG},
+  {"data_written",
+  (char*) &export_vars.innodb_data_written,               SHOW_LONG},
+  {"dblwr_pages_written",
+  (char*) &export_vars.innodb_dblwr_pages_written,        SHOW_LONG},
+  {"dblwr_writes",
+  (char*) &export_vars.innodb_dblwr_writes,               SHOW_LONG},
+  {"log_waits",
+  (char*) &export_vars.innodb_log_waits,                  SHOW_LONG},
+  {"log_write_requests",
+  (char*) &export_vars.innodb_log_write_requests,         SHOW_LONG},
+  {"log_writes",
+  (char*) &export_vars.innodb_log_writes,                 SHOW_LONG},
+  {"os_log_fsyncs",
+  (char*) &export_vars.innodb_os_log_fsyncs,              SHOW_LONG},
+  {"os_log_pending_fsyncs",
+  (char*) &export_vars.innodb_os_log_pending_fsyncs,      SHOW_LONG},
+  {"os_log_pending_writes",
+  (char*) &export_vars.innodb_os_log_pending_writes,      SHOW_LONG},
+  {"os_log_written",
+  (char*) &export_vars.innodb_os_log_written,             SHOW_LONG},
+  {"page_size",
+  (char*) &export_vars.innodb_page_size,                  SHOW_LONG},
+  {"pages_created",
+  (char*) &export_vars.innodb_pages_created,              SHOW_LONG},
+  {"pages_read",
+  (char*) &export_vars.innodb_pages_read,                 SHOW_LONG},
+  {"pages_written",
+  (char*) &export_vars.innodb_pages_written,              SHOW_LONG},
+  {"row_lock_current_waits",
+  (char*) &export_vars.innodb_row_lock_current_waits,     SHOW_LONG},
+  {"row_lock_time",
+  (char*) &export_vars.innodb_row_lock_time,              SHOW_LONGLONG},
+  {"row_lock_time_avg",
+  (char*) &export_vars.innodb_row_lock_time_avg,          SHOW_LONG},
+  {"row_lock_time_max",
+  (char*) &export_vars.innodb_row_lock_time_max,          SHOW_LONG},
+  {"row_lock_waits",
+  (char*) &export_vars.innodb_row_lock_waits,             SHOW_LONG},
+  {"rows_deleted",
+  (char*) &export_vars.innodb_rows_deleted,               SHOW_LONG},
+  {"rows_inserted",
+  (char*) &export_vars.innodb_rows_inserted,              SHOW_LONG},
+  {"rows_read",
+  (char*) &export_vars.innodb_rows_read,                  SHOW_LONG},
+  {"rows_updated",
+  (char*) &export_vars.innodb_rows_updated,               SHOW_LONG},
+  {NullS, NullS, SHOW_LONG}};
+
+/* General functions */
+
+/**********************************************************************
+Save some CPU by testing the value of srv_thread_concurrency in inline
+functions. */
+inline
+void
+innodb_srv_conc_enter_innodb(
+/*=========================*/
+	trx_t*	trx)	/* in: transaction handle */
+{
+	if (UNIV_LIKELY(!srv_thread_concurrency)) {
+
+		return;
+	}
+
+	srv_conc_enter_innodb(trx);
+}
+
+/**********************************************************************
+Save some CPU by testing the value of srv_thread_concurrency in inline
+functions. */
+inline
+void
+innodb_srv_conc_exit_innodb(
+/*========================*/
+	trx_t*	trx)	/* in: transaction handle */
+{
+	if (UNIV_LIKELY(!srv_thread_concurrency)) {
+
+		return;
+	}
+
+	srv_conc_exit_innodb(trx);
+}
+
+/**********************************************************************
+Releases possible search latch and InnoDB thread FIFO ticket. These should
+be released at each SQL statement end, and also when mysqld passes the
+control to the client. It does no harm to release these also in the middle
+of an SQL statement. */
+inline
+void
+innobase_release_stat_resources(
+/*============================*/
+	trx_t*	trx)	/* in: transaction object */
+{
+	if (trx->has_search_latch) {
+		trx_search_latch_release_if_reserved(trx);
+	}
+
+	if (trx->declared_to_be_inside_innodb) {
+		/* Release our possible ticket in the FIFO */
+
+		srv_conc_force_exit_innodb(trx);
+	}
+}
+
+/************************************************************************
+Call this function when mysqld passes control to the client. That is to
+avoid deadlocks on the adaptive hash S-latch possibly held by thd. For more
+documentation, see handler.cc. */
+
+void
+innobase_release_temporary_latches(
+/*===============================*/
+        THD *thd)
+{
+	trx_t*	trx;
+
+	if (!innodb_inited) {
+
+		return;
+	}
+
+	trx = (trx_t*) thd->ha_data[innobase_hton.slot];
+
+	if (trx) {
+        	innobase_release_stat_resources(trx);
+	}
+}
+
+/************************************************************************
+Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
+time calls srv_active_wake_master_thread. This function should be used
+when a single database operation may introduce a small need for
+server utility activity, like checkpointing. */
+inline
+void
+innobase_active_small(void)
+/*=======================*/
+{
+	innobase_active_counter++;
+
+	if ((innobase_active_counter % INNOBASE_WAKE_INTERVAL) == 0) {
+		srv_active_wake_master_thread();
+	}
+}
+
+/************************************************************************
+Converts an InnoDB error code to a MySQL error code and also tells to MySQL
+about a possible transaction rollback inside InnoDB caused by a lock wait
+timeout or a deadlock. */
+static
+int
+convert_error_code_to_mysql(
+/*========================*/
+			/* out: MySQL error code */
+	int	error,	/* in: InnoDB error code */
+	THD*	thd)	/* in: user thread handle or NULL */
+{
+	if (error == DB_SUCCESS) {
+
+		return(0);
+
+  	} else if (error == (int) DB_DUPLICATE_KEY) {
+
+    		return(HA_ERR_FOUND_DUPP_KEY);
+
+ 	} else if (error == (int) DB_RECORD_NOT_FOUND) {
+
+    		return(HA_ERR_NO_ACTIVE_RECORD);
+
+ 	} else if (error == (int) DB_ERROR) {
+
+    		return(-1); /* unspecified error */
+
+ 	} else if (error == (int) DB_DEADLOCK) {
+ 		/* Since we rolled back the whole transaction, we must
+ 		tell it also to MySQL so that MySQL knows to empty the
+ 		cached binlog for this transaction */
+
+                mark_transaction_to_rollback(thd, TRUE);
+
+    		return(HA_ERR_LOCK_DEADLOCK);
+
+ 	} else if (error == (int) DB_LOCK_WAIT_TIMEOUT) {
+
+		/* Starting from 5.0.13, we let MySQL just roll back the
+		latest SQL statement in a lock wait timeout. Previously, we
+		rolled back the whole transaction. */
+
+                mark_transaction_to_rollback(thd,
+                                             (bool)row_rollback_on_timeout);
+
+   		return(HA_ERR_LOCK_WAIT_TIMEOUT);
+
+ 	} else if (error == (int) DB_NO_REFERENCED_ROW) {
+
+    		return(HA_ERR_NO_REFERENCED_ROW);
+
+ 	} else if (error == (int) DB_ROW_IS_REFERENCED) {
+
+    		return(HA_ERR_ROW_IS_REFERENCED);
+
+        } else if (error == (int) DB_CANNOT_ADD_CONSTRAINT) {
+
+    		return(HA_ERR_CANNOT_ADD_FOREIGN);
+
+        } else if (error == (int) DB_CANNOT_DROP_CONSTRAINT) {
+
+    		return(HA_ERR_ROW_IS_REFERENCED); /* TODO: This is a bit
+						misleading, a new MySQL error
+						code should be introduced */
+        } else if (error == (int) DB_COL_APPEARS_TWICE_IN_INDEX) {
+
+    		return(HA_ERR_CRASHED);
+
+ 	} else if (error == (int) DB_OUT_OF_FILE_SPACE) {
+
+    		return(HA_ERR_RECORD_FILE_FULL);
+
+ 	} else if (error == (int) DB_TABLE_IS_BEING_USED) {
+
+    		return(HA_ERR_WRONG_COMMAND);
+
+ 	} else if (error == (int) DB_TABLE_NOT_FOUND) {
+
+    		return(HA_ERR_NO_SUCH_TABLE);
+
+  	} else if (error == (int) DB_TOO_BIG_RECORD) {
+
+    		return(HA_ERR_TO_BIG_ROW);
+
+  	} else if (error == (int) DB_CORRUPTION) {
+
+    		return(HA_ERR_CRASHED);
+  	} else if (error == (int) DB_NO_SAVEPOINT) {
+
+    		return(HA_ERR_NO_SAVEPOINT);
+  	} else if (error == (int) DB_LOCK_TABLE_FULL) {
+ 		/* Since we rolled back the whole transaction, we must
+ 		tell it also to MySQL so that MySQL knows to empty the
+ 		cached binlog for this transaction */
+
+                mark_transaction_to_rollback(thd, TRUE);
+
+    		return(HA_ERR_LOCK_TABLE_FULL);
+	} else if (error == DB_UNSUPPORTED) {
+
+		return(HA_ERR_UNSUPPORTED);
+    	} else {
+    		return(-1);			// Unknown error
+    	}
+}
+
+/*****************************************************************
+If you want to print a thd that is not associated with the current thread,
+you must call this function before reserving the InnoDB kernel_mutex, to
+protect MySQL from setting thd->query NULL. If you print a thd of the current
+thread, we know that MySQL cannot modify thd->query, and it is not necessary
+to call this. Call innobase_mysql_end_print_arbitrary_thd() after you release
+the kernel_mutex.
+NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
+function! */
+extern "C"
+void
+innobase_mysql_prepare_print_arbitrary_thd(void)
+/*============================================*/
+{
+	VOID(pthread_mutex_lock(&LOCK_thread_count));
+}
+
+/*****************************************************************
+Releases the mutex reserved by innobase_mysql_prepare_print_arbitrary_thd().
+NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
+function! */
+extern "C"
+void
+innobase_mysql_end_print_arbitrary_thd(void)
+/*========================================*/
+{
+	VOID(pthread_mutex_unlock(&LOCK_thread_count));
+}
+
+/*****************************************************************
+Prints info of a THD object (== user session thread) to the given file.
+NOTE that /mysql/innobase/trx/trx0trx.c must contain the prototype for
+this function! */
+extern "C"
+void
+innobase_mysql_print_thd(
+/*=====================*/
+	FILE*   f,		/* in: output stream */
+	void*   input_thd,	/* in: pointer to a MySQL THD object */
+	uint	max_query_len)	/* in: max query length to print, or 0 to
+				   use the default max length */
+{
+	const THD*	thd;
+        const Security_context *sctx;
+	const char*	s;
+
+        thd = (const THD*) input_thd;
+        /* We probably want to have original user as part of debug output. */
+        sctx = &thd->main_security_ctx;
+
+
+  	fprintf(f, "MySQL thread id %lu, query id %lu",
+		thd->thread_id, (ulong) thd->query_id);
+	if (sctx->host) {
+		putc(' ', f);
+		fputs(sctx->host, f);
+	}
+
+	if (sctx->ip) {
+		putc(' ', f);
+		fputs(sctx->ip, f);
+	}
+
+        if (sctx->user) {
+		putc(' ', f);
+		fputs(sctx->user, f);
+  	}
+
+	if ((s = thd->proc_info)) {
+		putc(' ', f);
+		fputs(s, f);
+	}
+
+	if ((s = thd->query)) {
+		/* 3100 is chosen because currently 3000 is the maximum
+		   max_query_len we ever give this. */
+		char	buf[3100];
+		uint	len;
+
+		/* If buf is too small, we dynamically allocate storage
+		   in this. */
+		char*	dyn_str = NULL;
+
+		/* Points to buf or dyn_str. */
+		char*	str = buf;
+
+		if (max_query_len == 0)
+		{
+			/* ADDITIONAL SAFETY: the default is to print at
+			   most 300 chars to reduce the probability of a
+			   seg fault if there is a race in
+			   thd->query_length in MySQL; after May 14, 2004
+			   probably no race any more, but better be
+			   safe */
+			max_query_len = 300;
+		}
+
+		len = min(thd->query_length, max_query_len);
+
+		if (len > (sizeof(buf) - 1))
+		{
+			dyn_str = my_malloc(len + 1, MYF(0));
+			str = dyn_str;
+		}
+
+                /* Use strmake to reduce the timeframe for a race,
+                   compared to fwrite() */
+		len = (uint) (strmake(str, s, len) - str);
+		putc('\n', f);
+		fwrite(str, 1, len, f);
+
+		if (dyn_str)
+		{
+			my_free(dyn_str, MYF(0));
+		}
+	}
+
+	putc('\n', f);
+}
+
+/**********************************************************************
+Get the variable length bounds of the given character set.
+
+NOTE that the exact prototype of this function has to be in
+/innobase/data/data0type.ic! */
+extern "C"
+void
+innobase_get_cset_width(
+/*====================*/
+	ulint	cset,		/* in: MySQL charset-collation code */
+	ulint*	mbminlen,	/* out: minimum length of a char (in bytes) */
+	ulint*	mbmaxlen)	/* out: maximum length of a char (in bytes) */
+{
+	CHARSET_INFO*	cs;
+	ut_ad(cset < 256);
+	ut_ad(mbminlen);
+	ut_ad(mbmaxlen);
+
+	cs = all_charsets[cset];
+	if (cs) {
+		*mbminlen = cs->mbminlen;
+		*mbmaxlen = cs->mbmaxlen;
+	} else {
+		ut_a(cset == 0);
+		*mbminlen = *mbmaxlen = 0;
+	}
+}
+
+/**********************************************************************
+Compares NUL-terminated UTF-8 strings case insensitively.
+
+NOTE that the exact prototype of this function has to be in
+/innobase/dict/dict0dict.c! */
+extern "C"
+int
+innobase_strcasecmp(
+/*================*/
+				/* out: 0 if a=b, <0 if a<b, >1 if a>b */
+	const char*	a,	/* in: first string to compare */
+	const char*	b)	/* in: second string to compare */
+{
+	return(my_strcasecmp(system_charset_info, a, b));
+}
+
+/**********************************************************************
+Makes all characters in a NUL-terminated UTF-8 string lower case.
+
+NOTE that the exact prototype of this function has to be in
+/innobase/dict/dict0dict.c! */
+extern "C"
+void
+innobase_casedn_str(
+/*================*/
+	char*	a)	/* in/out: string to put in lower case */
+{
+	my_casedn_str(system_charset_info, a);
+}
+
+/*************************************************************************
+Creates a temporary file. */
+extern "C"
+int
+innobase_mysql_tmpfile(void)
+/*========================*/
+			/* out: temporary file descriptor, or < 0 on error */
+{
+	char	filename[FN_REFLEN];
+	int	fd2 = -1;
+	File	fd = create_temp_file(filename, mysql_tmpdir, "ib",
+#ifdef __WIN__
+				O_BINARY | O_TRUNC | O_SEQUENTIAL |
+				O_TEMPORARY | O_SHORT_LIVED |
+#endif /* __WIN__ */
+				O_CREAT | O_EXCL | O_RDWR,
+				MYF(MY_WME));
+	if (fd >= 0) {
+#ifndef __WIN__
+		/* On Windows, open files cannot be removed, but files can be
+		created with the O_TEMPORARY flag to the same effect
+		("delete on close"). */
+		unlink(filename);
+#endif /* !__WIN__ */
+		/* Copy the file descriptor, so that the additional resources
+		allocated by create_temp_file() can be freed by invoking
+		my_close().
+
+		Because the file descriptor returned by this function
+		will be passed to fdopen(), it will be closed by invoking
+		fclose(), which in turn will invoke close() instead of
+		my_close(). */
+		fd2 = dup(fd);
+		if (fd2 < 0) {
+			DBUG_PRINT("error",("Got error %d on dup",fd2));
+			my_errno=errno;
+                        my_error(EE_OUT_OF_FILERESOURCES,
+                                 MYF(ME_BELL+ME_WAITTANG),
+                                 filename, my_errno);
+                }
+		my_close(fd, MYF(MY_WME));
+	}
+	return(fd2);
+}
+
+/*************************************************************************
+Gets the InnoDB transaction handle for a MySQL handler object, creates
+an InnoDB transaction struct if the corresponding MySQL thread struct still
+lacks one. */
+static
+trx_t*
+check_trx_exists(
+/*=============*/
+			/* out: InnoDB transaction handle */
+	THD*	thd)	/* in: user thread handle */
+{
+	trx_t*	trx;
+
+	ut_ad(thd == current_thd);
+
+        trx = (trx_t*) thd->ha_data[innobase_hton.slot];
+
+	if (trx == NULL) {
+	        DBUG_ASSERT(thd != NULL);
+		trx = trx_allocate_for_mysql();
+
+		trx->mysql_thd = thd;
+		trx->mysql_query_str = &(thd->query);
+                trx->active_trans = 0;
+
+		/* Update the info whether we should skip XA steps that eat
+		CPU time */
+		trx->support_xa = (ibool)(thd->variables.innodb_support_xa);
+
+                thd->ha_data[innobase_hton.slot] = trx;
+	} else {
+		if (trx->magic_n != TRX_MAGIC_N) {
+			mem_analyze_corruption((byte*)trx);
+
+			ut_a(0);
+		}
+	}
+
+	if (thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
+		trx->check_foreigns = FALSE;
+	} else {
+		trx->check_foreigns = TRUE;
+	}
+
+	if (thd->options & OPTION_RELAXED_UNIQUE_CHECKS) {
+		trx->check_unique_secondary = FALSE;
+	} else {
+		trx->check_unique_secondary = TRUE;
+	}
+
+	return(trx);
+}
+
+
+/*************************************************************************
+Construct ha_innobase handler. */
+
+ha_innobase::ha_innobase(TABLE *table_arg)
+  :handler(&innobase_hton, table_arg),
+  int_table_flags(HA_REC_NOT_IN_SEQ |
+                  HA_NULL_IN_KEY |
+                  HA_CAN_INDEX_BLOBS |
+                  HA_CAN_SQL_HANDLER |
+                  HA_NOT_EXACT_COUNT |
+                  HA_PRIMARY_KEY_IN_READ_INDEX |
+                  HA_CAN_GEOMETRY |
+                  HA_TABLE_SCAN_ON_INDEX),
+  start_of_scan(0),
+  num_write_row(0)
+{}
+
+/*************************************************************************
+Updates the user_thd field in a handle and also allocates a new InnoDB
+transaction handle if needed, and updates the transaction fields in the
+prebuilt struct. */
+inline
+int
+ha_innobase::update_thd(
+/*====================*/
+			/* out: 0 or error code */
+	THD*	thd)	/* in: thd to use the handle */
+{
+	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
+	trx_t*		trx;
+
+	trx = check_trx_exists(thd);
+
+	if (prebuilt->trx != trx) {
+
+		row_update_prebuilt_trx(prebuilt, trx);
+	}
+
+	user_thd = thd;
+
+	return(0);
+}
+
+/*************************************************************************
+Registers that InnoDB takes part in an SQL statement, so that MySQL knows to
+roll back the statement if the statement results in an error. This MUST be
+called for every SQL statement that may be rolled back by MySQL. Calling this
+several times to register the same statement is allowed, too. */
+inline
+void
+innobase_register_stmt(
+/*===================*/
+	THD*	thd)	/* in: MySQL thd (connection) object */
+{
+        /* Register the statement */
+        trans_register_ha(thd, FALSE, &innobase_hton);
+}
+
+/*************************************************************************
+Registers an InnoDB transaction in MySQL, so that the MySQL XA code knows
+to call the InnoDB prepare and commit, or rollback for the transaction. This
+MUST be called for every transaction for which the user may call commit or
+rollback. Calling this several times to register the same transaction is
+allowed, too.
+This function also registers the current SQL statement. */
+inline
+void
+innobase_register_trx_and_stmt(
+/*===========================*/
+	THD*	thd)	/* in: MySQL thd (connection) object */
+{
+	/* NOTE that actually innobase_register_stmt() registers also
+	the transaction in the AUTOCOMMIT=1 mode. */
+
+	innobase_register_stmt(thd);
+
+        if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
+
+              /* No autocommit mode, register for a transaction */
+              trans_register_ha(thd, TRUE, &innobase_hton);
+        }
+}
+
+/*   BACKGROUND INFO: HOW THE MYSQL QUERY CACHE WORKS WITH INNODB
+     ------------------------------------------------------------
+
+1) The use of the query cache for TBL is disabled when there is an
+uncommitted change to TBL.
+
+2) When a change to TBL commits, InnoDB stores the current value of
+its global trx id counter, let us denote it by INV_TRX_ID, to the table object
+in the InnoDB data dictionary, and does only allow such transactions whose
+id <= INV_TRX_ID to use the query cache.
+
+3) When InnoDB does an INSERT/DELETE/UPDATE to a table TBL, or an implicit
+modification because an ON DELETE CASCADE, we invalidate the MySQL query cache
+of TBL immediately.
+
+How this is implemented inside InnoDB:
+
+1) Since every modification always sets an IX type table lock on the InnoDB
+table, it is easy to check if there can be uncommitted modifications for a
+table: just check if there are locks in the lock list of the table.
+
+2) When a transaction inside InnoDB commits, it reads the global trx id
+counter and stores the value INV_TRX_ID to the tables on which it had a lock.
+
+3) If there is an implicit table change from ON DELETE CASCADE or SET NULL,
+InnoDB calls an invalidate method for the MySQL query cache for that table.
+
+How this is implemented inside sql_cache.cc:
+
+1) The query cache for an InnoDB table TBL is invalidated immediately at an
+INSERT/UPDATE/DELETE, just like in the case of MyISAM. No need to delay
+invalidation to the transaction commit.
+
+2) To store or retrieve a value from the query cache of an InnoDB table TBL,
+any query must first ask InnoDB's permission. We must pass the thd as a
+parameter because InnoDB will look at the trx id, if any, associated with
+that thd.
+
+3) Use of the query cache for InnoDB tables is now allowed also when
+AUTOCOMMIT==0 or we are inside BEGIN ... COMMIT. Thus transactions no longer
+put restrictions on the use of the query cache.
+*/
+
+/**********************************************************************
+The MySQL query cache uses this to check from InnoDB if the query cache at
+the moment is allowed to operate on an InnoDB table. The SQL query must
+be a non-locking SELECT.
+
+The query cache is allowed to operate on certain query only if this function
+returns TRUE for all tables in the query.
+
+If thd is not in the autocommit state, this function also starts a new
+transaction for thd if there is no active trx yet, and assigns a consistent
+read view to it if there is no read view yet.
+
+Why a deadlock of threads is not possible: the query cache calls this function
+at the start of a SELECT processing. Then the calling thread cannot be
+holding any InnoDB semaphores. The calling thread is holding the
+query cache mutex, and this function will reserver the InnoDB kernel mutex.
+Thus, the 'rank' in sync0sync.h of the MySQL query cache mutex is above
+the InnoDB kernel mutex. */
+
+my_bool
+innobase_query_caching_of_table_permitted(
+/*======================================*/
+				/* out: TRUE if permitted, FALSE if not;
+				note that the value FALSE does not mean
+				we should invalidate the query cache:
+				invalidation is called explicitly */
+	THD*	thd,		/* in: thd of the user who is trying to
+				store a result to the query cache or
+				retrieve it */
+	char*	full_name,	/* in: concatenation of database name,
+				the null character '\0', and the table
+				name */
+	uint	full_name_len,	/* in: length of the full name, i.e.
+				len(dbname) + len(tablename) + 1 */
+        ulonglong *unused)      /* unused for this engine */
+{
+	ibool	is_autocommit;
+	trx_t*	trx;
+	char	norm_name[1000];
+
+	ut_a(full_name_len < 999);
+
+	if (thd->variables.tx_isolation == ISO_SERIALIZABLE) {
+		/* In the SERIALIZABLE mode we add LOCK IN SHARE MODE to every
+		plain SELECT if AUTOCOMMIT is not on. */
+
+		return((my_bool)FALSE);
+	}
+
+        trx = check_trx_exists(thd);
+	if (trx->has_search_latch) {
+		ut_print_timestamp(stderr);
+		sql_print_error("The calling thread is holding the adaptive "
+				"search, latch though calling "
+				"innobase_query_caching_of_table_permitted.");
+
+		mutex_enter_noninline(&kernel_mutex);
+		trx_print(stderr, trx, 1024);
+		mutex_exit_noninline(&kernel_mutex);
+	}
+
+	innobase_release_stat_resources(trx);
+
+	if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
+
+		is_autocommit = TRUE;
+	} else {
+		is_autocommit = FALSE;
+
+	}
+
+	if (is_autocommit && trx->n_mysql_tables_in_use == 0) {
+		/* We are going to retrieve the query result from the query
+		cache. This cannot be a store operation to the query cache
+		because then MySQL would have locks on tables already.
+
+		TODO: if the user has used LOCK TABLES to lock the table,
+		then we open a transaction in the call of row_.. below.
+		That trx can stay open until UNLOCK TABLES. The same problem
+		exists even if we do not use the query cache. MySQL should be
+		modified so that it ALWAYS calls some cleanup function when
+		the processing of a query ends!
+
+		We can imagine we instantaneously serialize this consistent
+		read trx to the current trx id counter. If trx2 would have
+		changed the tables of a query result stored in the cache, and
+		trx2 would have already committed, making the result obsolete,
+		then trx2 would have already invalidated the cache. Thus we
+		can trust the result in the cache is ok for this query. */
+
+		return((my_bool)TRUE);
+	}
+
+	/* Normalize the table name to InnoDB format */
+
+	memcpy(norm_name, full_name, full_name_len);
+
+	norm_name[strlen(norm_name)] = '/'; /* InnoDB uses '/' as the
+					    separator between db and table */
+	norm_name[full_name_len] = '\0';
+#ifdef __WIN__
+	innobase_casedn_str(norm_name);
+#endif
+	/* The call of row_search_.. will start a new transaction if it is
+	not yet started */
+
+        if (trx->active_trans == 0) {
+
+                innobase_register_trx_and_stmt(thd);
+                trx->active_trans = 1;
+        }
+
+	if (row_search_check_if_query_cache_permitted(trx, norm_name)) {
+
+		/* printf("Query cache for %s permitted\n", norm_name); */
+
+		return((my_bool)TRUE);
+	}
+
+	/* printf("Query cache for %s NOT permitted\n", norm_name); */
+
+	return((my_bool)FALSE);
+}
+
+/*********************************************************************
+Invalidates the MySQL query cache for the table.
+NOTE that the exact prototype of this function has to be in
+/innobase/row/row0ins.c! */
+extern "C"
+void
+innobase_invalidate_query_cache(
+/*============================*/
+	trx_t*	trx,		/* in: transaction which modifies the table */
+	char*	full_name,	/* in: concatenation of database name, null
+				char '\0', table name, null char'\0';
+				NOTE that in Windows this is always
+				in LOWER CASE! */
+	ulint	full_name_len)	/* in: full name length where also the null
+				chars count */
+{
+	/* Note that the sync0sync.h rank of the query cache mutex is just
+	above the InnoDB kernel mutex. The caller of this function must not
+	have latches of a lower rank. */
+
+	/* Argument TRUE below means we are using transactions */
+#ifdef HAVE_QUERY_CACHE
+	query_cache.invalidate((THD*)(trx->mysql_thd),
+					(const char*)full_name,
+					(uint32)full_name_len,
+					TRUE);
+#endif
+}
+
+/*********************************************************************
+Get the quote character to be used in SQL identifiers.
+This definition must match the one in innobase/ut/ut0ut.c! */
+extern "C"
+int
+mysql_get_identifier_quote_char(
+/*============================*/
+				/* out: quote character to be
+				used in SQL identifiers; EOF if none */
+	trx_t*		trx,	/* in: transaction */
+	const char*	name,	/* in: name to print */
+	ulint		namelen)/* in: length of name */
+{
+	if (!trx || !trx->mysql_thd) {
+		return(EOF);
+	}
+	return(get_quote_char_for_identifier((THD*) trx->mysql_thd,
+						name, (int) namelen));
+}
+
+/**************************************************************************
+Determines if the currently running transaction has been interrupted. */
+extern "C"
+ibool
+trx_is_interrupted(
+/*===============*/
+			/* out: TRUE if interrupted */
+	trx_t*	trx)	/* in: transaction */
+{
+	return(trx && trx->mysql_thd && ((THD*) trx->mysql_thd)->killed);
+}
+
+/**************************************************************************
+Obtain a pointer to the MySQL THD object, as in current_thd().  This
+definition must match the one in sql/ha_innodb.cc! */
+extern "C"
+void*
+innobase_current_thd(void)
+/*======================*/
+			/* out: MySQL THD object */
+{
+	return(current_thd);
+}
+
+/*********************************************************************
+Call this when you have opened a new table handle in HANDLER, before you
+call index_read_idx() etc. Actually, we can let the cursor stay open even
+over a transaction commit! Then you should call this before every operation,
+fetch next etc. This function inits the necessary things even after a
+transaction commit. */
+
+void
+ha_innobase::init_table_handle_for_HANDLER(void)
+/*============================================*/
+{
+        row_prebuilt_t* prebuilt;
+
+        /* If current thd does not yet have a trx struct, create one.
+        If the current handle does not yet have a prebuilt struct, create
+        one. Update the trx pointers in the prebuilt struct. Normally
+        this operation is done in external_lock. */
+
+        update_thd(current_thd);
+
+        /* Initialize the prebuilt struct much like it would be inited in
+        external_lock */
+
+        prebuilt = (row_prebuilt_t*)innobase_prebuilt;
+
+	innobase_release_stat_resources(prebuilt->trx);
+
+        /* If the transaction is not started yet, start it */
+
+        trx_start_if_not_started_noninline(prebuilt->trx);
+
+        /* Assign a read view if the transaction does not have it yet */
+
+        trx_assign_read_view(prebuilt->trx);
+
+	/* Set the MySQL flag to mark that there is an active transaction */
+
+        if (prebuilt->trx->active_trans == 0) {
+
+                innobase_register_trx_and_stmt(current_thd);
+
+                prebuilt->trx->active_trans = 1;
+        }
+
+        /* We did the necessary inits in this function, no need to repeat them
+        in row_search_for_mysql */
+
+        prebuilt->sql_stat_start = FALSE;
+
+        /* We let HANDLER always to do the reads as consistent reads, even
+        if the trx isolation level would have been specified as SERIALIZABLE */
+
+        prebuilt->select_lock_type = LOCK_NONE;
+        prebuilt->stored_select_lock_type = LOCK_NONE;
+
+        /* Always fetch all columns in the index record */
+
+        prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
+
+        /* We want always to fetch all columns in the whole row? Or do
+	we???? */
+
+        prebuilt->read_just_key = FALSE;
+
+	prebuilt->used_in_HANDLER = TRUE;
+
+	prebuilt->keep_other_fields_on_keyread = FALSE;
+}
+
+/*************************************************************************
+Opens an InnoDB database. */
+
+bool
+innobase_init(void)
+/*===============*/
+			/* out: &innobase_hton, or NULL on error */
+{
+	static char	current_dir[3];		/* Set if using current lib */
+	int		err;
+	bool		ret;
+	char 	        *default_path;
+
+  	DBUG_ENTER("innobase_init");
+
+         if (have_innodb != SHOW_OPTION_YES)
+           goto error;
+
+	ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);
+
+	/* Check that values don't overflow on 32-bit systems. */
+	if (sizeof(ulint) == 4) {
+		if (innobase_buffer_pool_size > UINT_MAX32) {
+			sql_print_error(
+				"innobase_buffer_pool_size can't be over 4GB"
+				" on 32-bit systems");
+
+			goto error;
+		}
+
+		if (innobase_log_file_size > UINT_MAX32) {
+			sql_print_error(
+				"innobase_log_file_size can't be over 4GB"
+				" on 32-bit systems");
+
+			goto error;
+		}
+	}
+
+  	os_innodb_umask = (ulint)my_umask;
+
+	/* First calculate the default path for innodb_data_home_dir etc.,
+	in case the user has not given any value.
+
+	Note that when using the embedded server, the datadirectory is not
+	necessarily the current directory of this program. */
+
+	if (mysqld_embedded) {
+		default_path = mysql_real_data_home;
+		fil_path_to_mysql_datadir = mysql_real_data_home;
+	} else {
+	  	/* It's better to use current lib, to keep paths short */
+	  	current_dir[0] = FN_CURLIB;
+	  	current_dir[1] = FN_LIBCHAR;
+	  	current_dir[2] = 0;
+	  	default_path = current_dir;
+	}
+
+	ut_a(default_path);
+
+	if (specialflag & SPECIAL_NO_PRIOR) {
+	        srv_set_thread_priorities = FALSE;
+	} else {
+	        srv_set_thread_priorities = TRUE;
+	        srv_query_thread_priority = QUERY_PRIOR;
+	}
+
+	/* Set InnoDB initialization parameters according to the values
+	read from MySQL .cnf file */
+
+	/*--------------- Data files -------------------------*/
+
+	/* The default dir for data files is the datadir of MySQL */
+
+	srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir :
+			 default_path);
+
+	/* Set default InnoDB data file size to 10 MB and let it be
+  	auto-extending. Thus users can use InnoDB in >= 4.0 without having
+	to specify any startup options. */
+
+	if (!innobase_data_file_path) {
+  		innobase_data_file_path = (char*) "ibdata1:10M:autoextend";
+	}
+
+	/* Since InnoDB edits the argument in the next call, we make another
+	copy of it: */
+
+	internal_innobase_data_file_path = my_strdup(innobase_data_file_path,
+						   MYF(MY_FAE));
+
+	ret = (bool) srv_parse_data_file_paths_and_sizes(
+				internal_innobase_data_file_path,
+				&srv_data_file_names,
+				&srv_data_file_sizes,
+				&srv_data_file_is_raw_partition,
+				&srv_n_data_files,
+				&srv_auto_extend_last_data_file,
+				&srv_last_file_size_max);
+	if (ret == FALSE) {
+	  	sql_print_error(
+			"InnoDB: syntax error in innodb_data_file_path");
+	  	my_free(internal_innobase_data_file_path,
+						MYF(MY_ALLOW_ZERO_PTR));
+                goto error;
+	}
+
+	/* -------------- Log files ---------------------------*/
+
+	/* The default dir for log files is the datadir of MySQL */
+
+	if (!innobase_log_group_home_dir) {
+	  	innobase_log_group_home_dir = default_path;
+	}
+
+#ifdef UNIV_LOG_ARCHIVE
+	/* Since innodb_log_arch_dir has no relevance under MySQL,
+	starting from 4.0.6 we always set it the same as
+	innodb_log_group_home_dir: */
+
+	innobase_log_arch_dir = innobase_log_group_home_dir;
+
+	srv_arch_dir = innobase_log_arch_dir;
+#endif /* UNIG_LOG_ARCHIVE */
+
+	ret = (bool)
+		srv_parse_log_group_home_dirs(innobase_log_group_home_dir,
+						&srv_log_group_home_dirs);
+
+	if (ret == FALSE || innobase_mirrored_log_groups != 1) {
+	  sql_print_error("syntax error in innodb_log_group_home_dir, or a "
+			  "wrong number of mirrored log groups");
+
+	  	my_free(internal_innobase_data_file_path,
+						MYF(MY_ALLOW_ZERO_PTR));
+                goto error;
+	}
+
+	/* --------------------------------------------------*/
+
+	srv_file_flush_method_str = innobase_unix_file_flush_method;
+
+	srv_n_log_groups = (ulint) innobase_mirrored_log_groups;
+	srv_n_log_files = (ulint) innobase_log_files_in_group;
+	srv_log_file_size = (ulint) innobase_log_file_size;
+
+#ifdef UNIV_LOG_ARCHIVE
+	srv_log_archive_on = (ulint) innobase_log_archive;
+#endif /* UNIV_LOG_ARCHIVE */
+	srv_log_buffer_size = (ulint) innobase_log_buffer_size;
+
+        /* We set srv_pool_size here in units of 1 kB. InnoDB internally
+        changes the value so that it becomes the number of database pages. */
+
+        if (innobase_buffer_pool_awe_mem_mb == 0) {
+                /* Careful here: we first convert the signed long int to ulint
+                and only after that divide */
+
+                srv_pool_size = ((ulint) innobase_buffer_pool_size) / 1024;
+        } else {
+                srv_use_awe = TRUE;
+                srv_pool_size = (ulint)
+                                (1024 * innobase_buffer_pool_awe_mem_mb);
+                srv_awe_window_size = (ulint) innobase_buffer_pool_size;
+
+                /* Note that what the user specified as
+                innodb_buffer_pool_size is actually the AWE memory window
+                size in this case, and the real buffer pool size is
+                determined by .._awe_mem_mb. */
+        }
+
+	srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
+
+	srv_n_file_io_threads = (ulint) innobase_file_io_threads;
+
+	srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout;
+	srv_force_recovery = (ulint) innobase_force_recovery;
+
+	srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
+	srv_use_checksums = (ibool) innobase_use_checksums;
+
+	srv_use_adaptive_hash_indexes = (ibool) innobase_adaptive_hash_index;
+
+	os_use_large_pages = (ibool) innobase_use_large_pages;
+	os_large_page_size = (ulint) innobase_large_page_size;
+
+	row_rollback_on_timeout = (ibool) innobase_rollback_on_timeout;
+
+	srv_file_per_table = (ibool) innobase_file_per_table;
+        srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog;
+
+	srv_max_n_open_files = (ulint) innobase_open_files;
+	srv_innodb_status = (ibool) innobase_create_status_file;
+
+	srv_print_verbose_log = mysqld_embedded ? 0 : 1;
+
+	/* Store the default charset-collation number of this MySQL
+	installation */
+
+	data_mysql_default_charset_coll = (ulint)default_charset_info->number;
+
+	ut_a(DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL ==
+					my_charset_latin1.number);
+	ut_a(DATA_MYSQL_BINARY_CHARSET_COLL == my_charset_bin.number);
+
+	/* Store the latin1_swedish_ci character ordering table to InnoDB. For
+	non-latin1_swedish_ci charsets we use the MySQL comparison functions,
+	and consequently we do not need to know the ordering internally in
+	InnoDB. */
+
+	ut_a(0 == strcmp((char*)my_charset_latin1.name,
+						(char*)"latin1_swedish_ci"));
+	memcpy(srv_latin1_ordering, my_charset_latin1.sort_order, 256);
+
+	/* Since we in this module access directly the fields of a trx
+        struct, and due to different headers and flags it might happen that
+	mutex_t has a different size in this module and in InnoDB
+	modules, we check at run time that the size is the same in
+	these compilation modules. */
+
+	srv_sizeof_trx_t_in_ha_innodb_cc = sizeof(trx_t);
+
+	err = innobase_start_or_create_for_mysql();
+
+	if (err != DB_SUCCESS) {
+	  	my_free(internal_innobase_data_file_path,
+						MYF(MY_ALLOW_ZERO_PTR));
+                goto error;
+	}
+
+	(void) hash_init(&innobase_open_tables,system_charset_info, 32, 0, 0,
+			 		(hash_get_key) innobase_get_key, 0, 0);
+        pthread_mutex_init(&innobase_share_mutex, MY_MUTEX_INIT_FAST);
+        pthread_mutex_init(&prepare_commit_mutex, MY_MUTEX_INIT_FAST);
+        pthread_mutex_init(&commit_threads_m, MY_MUTEX_INIT_FAST);
+        pthread_mutex_init(&commit_cond_m, MY_MUTEX_INIT_FAST);
+        pthread_cond_init(&commit_cond, NULL);
+	innodb_inited= 1;
+
+	/* If this is a replication slave and we needed to do a crash recovery,
+	set the master binlog position to what InnoDB internally knew about
+	how far we got transactions durable inside InnoDB. There is a
+	problem here: if the user used also MyISAM tables, InnoDB might not
+	know the right position for them.
+
+	THIS DOES NOT WORK CURRENTLY because replication seems to initialize
+	glob_mi also after innobase_init. */
+
+/*	if (trx_sys_mysql_master_log_pos != -1) {
+		ut_memcpy(glob_mi.log_file_name, trx_sys_mysql_master_log_name,
+				1 + ut_strlen(trx_sys_mysql_master_log_name));
+		glob_mi.pos = trx_sys_mysql_master_log_pos;
+	}
+*/
+	DBUG_RETURN(FALSE);
+error:
+        have_innodb= SHOW_OPTION_DISABLED;	// If we couldn't use handler
+        DBUG_RETURN(TRUE);
+}
+
+/***********************************************************************
+Closes an InnoDB database. */
+
+bool
+innobase_end(void)
+/*==============*/
+				/* out: TRUE if error */
+{
+	int	err= 0;
+
+	DBUG_ENTER("innobase_end");
+
+#ifdef __NETWARE__ 	/* some special cleanup for NetWare */
+	if (nw_panic) {
+		set_panic_flag_for_netware();
+	}
+#endif
+	if (innodb_inited) {
+
+	        srv_fast_shutdown = (ulint) innobase_fast_shutdown;
+	  	innodb_inited = 0;
+	  	if (innobase_shutdown_for_mysql() != DB_SUCCESS) {
+	    		err = 1;
+		}
+	  	hash_free(&innobase_open_tables);
+	  	my_free(internal_innobase_data_file_path,
+						MYF(MY_ALLOW_ZERO_PTR));
+                pthread_mutex_destroy(&innobase_share_mutex);
+                pthread_mutex_destroy(&prepare_commit_mutex);
+                pthread_mutex_destroy(&commit_threads_m);
+                pthread_mutex_destroy(&commit_cond_m);
+                pthread_cond_destroy(&commit_cond);
+	}
+
+  	DBUG_RETURN(err);
+}
+
+/********************************************************************
+Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes
+the logs, and the name of this function should be innobase_checkpoint. */
+
+bool
+innobase_flush_logs(void)
+/*=====================*/
+				/* out: TRUE if error */
+{
+  	bool 	result = 0;
+
+  	DBUG_ENTER("innobase_flush_logs");
+
+	log_buffer_flush_to_disk();
+
+  	DBUG_RETURN(result);
+}
+
+/*********************************************************************
+Commits a transaction in an InnoDB database. */
+
+void
+innobase_commit_low(
+/*================*/
+	trx_t*	trx)	/* in: transaction handle */
+{
+        if (trx->conc_state == TRX_NOT_STARTED) {
+
+                return;
+        }
+
+#ifdef HAVE_REPLICATION
+        THD *thd=current_thd;
+
+        if (thd && thd->slave_thread) {
+                /* Update the replication position info inside InnoDB */
+
+                trx->mysql_master_log_file_name
+                                        = active_mi->rli.group_master_log_name;
+                trx->mysql_master_log_pos = ((ib_longlong)
+                                active_mi->rli.future_group_master_log_pos);
+        }
+#endif /* HAVE_REPLICATION */
+
+	trx_commit_for_mysql(trx);
+}
+
+/*********************************************************************
+Creates an InnoDB transaction struct for the thd if it does not yet have one.
+Starts a new InnoDB transaction if a transaction is not yet started. And
+assigns a new snapshot for a consistent read if the transaction does not yet
+have one. */
+
+int
+innobase_start_trx_and_assign_read_view(
+/*====================================*/
+			/* out: 0 */
+	THD*	thd)	/* in: MySQL thread handle of the user for whom
+			the transaction should be committed */
+{
+	trx_t*	trx;
+
+  	DBUG_ENTER("innobase_start_trx_and_assign_read_view");
+
+	/* Create a new trx struct for thd, if it does not yet have one */
+
+	trx = check_trx_exists(thd);
+
+	/* This is just to play safe: release a possible FIFO ticket and
+	search latch. Since we will reserve the kernel mutex, we have to
+	release the search system latch first to obey the latching order. */
+
+	innobase_release_stat_resources(trx);
+
+	/* If the transaction is not started yet, start it */
+
+	trx_start_if_not_started_noninline(trx);
+
+	/* Assign a read view if the transaction does not have it yet */
+
+	trx_assign_read_view(trx);
+
+	/* Set the MySQL flag to mark that there is an active transaction */
+
+        if (trx->active_trans == 0) {
+
+                innobase_register_trx_and_stmt(current_thd);
+
+                trx->active_trans = 1;
+        }
+
+	DBUG_RETURN(0);
+}
+
+/*********************************************************************
+Commits a transaction in an InnoDB database or marks an SQL statement
+ended. */
+static
+int
+innobase_commit(
+/*============*/
+			/* out: 0 */
+	THD*	thd,	/* in: MySQL thread handle of the user for whom
+			the transaction should be committed */
+        bool    all)    /* in: TRUE - commit transaction
+                               FALSE - the current SQL statement ended */
+{
+	trx_t*		trx;
+
+  	DBUG_ENTER("innobase_commit");
+  	DBUG_PRINT("trans", ("ending transaction"));
+
+	trx = check_trx_exists(thd);
+
+	/* Update the info whether we should skip XA steps that eat CPU time */
+	trx->support_xa = (ibool)(thd->variables.innodb_support_xa);
+
+	/* Release a possible FIFO ticket and search latch. Since we will
+	reserve the kernel mutex, we have to release the search system latch
+	first to obey the latching order. */
+
+        if (trx->has_search_latch) {
+                          trx_search_latch_release_if_reserved(trx);
+        }
+
+        /* The flag trx->active_trans is set to 1 in
+
+	1. ::external_lock(),
+	2. ::start_stmt(),
+	3. innobase_query_caching_of_table_permitted(),
+	4. innobase_savepoint(),
+	5. ::init_table_handle_for_HANDLER(),
+	6. innobase_start_trx_and_assign_read_view(),
+	7. ::transactional_table_lock()
+
+	and it is only set to 0 in a commit or a rollback. If it is 0 we know
+	there cannot be resources to be freed and we could return immediately.
+	For the time being, we play safe and do the cleanup though there should
+	be nothing to clean up. */
+
+        if (trx->active_trans == 0
+	    && trx->conc_state != TRX_NOT_STARTED) {
+
+	  sql_print_error("trx->active_trans == 0, but trx->conc_state != "
+			  "TRX_NOT_STARTED");
+	}
+        if (all
+	    || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) {
+
+ 		/* We were instructed to commit the whole transaction, or
+		this is an SQL statement end and autocommit is on */
+
+                /* We need current binlog position for ibbackup to work.
+                Note, the position is current because of prepare_commit_mutex */
+retry:
+                if (srv_commit_concurrency > 0)
+                {
+                  pthread_mutex_lock(&commit_cond_m);
+                  commit_threads++;
+                  if (commit_threads > srv_commit_concurrency)
+                  {
+                    commit_threads--;
+                    pthread_cond_wait(&commit_cond, &commit_cond_m);
+                    pthread_mutex_unlock(&commit_cond_m);
+                    goto retry;
+                  }
+                  else
+                    pthread_mutex_unlock(&commit_cond_m);
+                }
+
+                trx->mysql_log_file_name = mysql_bin_log.get_log_fname();
+                trx->mysql_log_offset =
+                        (ib_longlong)mysql_bin_log.get_log_file()->pos_in_file;
+
+		innobase_commit_low(trx);
+
+                if (srv_commit_concurrency > 0)
+                {
+                  pthread_mutex_lock(&commit_cond_m);
+                  commit_threads--;
+                  pthread_cond_signal(&commit_cond);
+                  pthread_mutex_unlock(&commit_cond_m);
+                }
+
+                if (trx->active_trans == 2) {
+
+                        pthread_mutex_unlock(&prepare_commit_mutex);
+                }
+
+                trx->active_trans = 0;
+
+	} else {
+	        /* We just mark the SQL statement ended and do not do a
+		transaction commit */
+
+		if (trx->auto_inc_lock) {
+			/* If we had reserved the auto-inc lock for some
+			table in this SQL statement we release it now */
+
+			row_unlock_table_autoinc_for_mysql(trx);
+		}
+		/* Store the current undo_no of the transaction so that we
+		know where to roll back if we have to roll back the next
+		SQL statement */
+
+		trx_mark_sql_stat_end(trx);
+	}
+
+	/* Tell the InnoDB server that there might be work for utility
+	threads: */
+        if (trx->declared_to_be_inside_innodb) {
+                          /* Release our possible ticket in the FIFO */
+
+                          srv_conc_force_exit_innodb(trx);
+        }
+	srv_active_wake_master_thread();
+
+	DBUG_RETURN(0);
+}
+
+/* TODO: put the
+MySQL-4.1 functionality back to 5.0. This is needed to get InnoDB Hot Backup
+to work. */
+
+/*********************************************************************
+This is called when MySQL writes the binlog entry for the current
+transaction. Writes to the InnoDB tablespace info which tells where the
+MySQL binlog entry for the current transaction ended. Also commits the
+transaction inside InnoDB but does NOT flush InnoDB log files to disk.
+To flush you have to call innobase_commit_complete(). We have separated
+flushing to eliminate the bottleneck of LOCK_log in log.cc which disabled
+InnoDB's group commit capability. */
+
+int
+innobase_report_binlog_offset_and_commit(
+/*=====================================*/
+                                /* out: 0 */
+        THD*    thd,            /* in: user thread */
+        void*   trx_handle,     /* in: InnoDB trx handle */
+        char*   log_file_name,  /* in: latest binlog file name */
+        my_off_t end_offset)    /* in: the offset in the binlog file
+                                   up to which we wrote */
+{
+	trx_t*	trx;
+
+	trx = (trx_t*)trx_handle;
+
+	ut_a(trx != NULL);
+
+	trx->mysql_log_file_name = log_file_name;
+	trx->mysql_log_offset = (ib_longlong)end_offset;
+
+	trx->flush_log_later = TRUE;
+
+	innobase_commit(thd, TRUE);
+
+	trx->flush_log_later = FALSE;
+
+	return(0);
+}
+
+#if 0
+/***********************************************************************
+This function stores the binlog offset and flushes logs. */
+
+void
+innobase_store_binlog_offset_and_flush_log(
+/*=======================================*/
+    char *binlog_name,          /* in: binlog name */
+    longlong	offset)		/* in: binlog offset */
+{
+	mtr_t mtr;
+
+	assert(binlog_name != NULL);
+
+	/* Start a mini-transaction */
+        mtr_start_noninline(&mtr);
+
+	/* Update the latest MySQL binlog name and offset info
+        in trx sys header */
+
+        trx_sys_update_mysql_binlog_offset(
+            binlog_name,
+            offset,
+            TRX_SYS_MYSQL_LOG_INFO, &mtr);
+
+        /* Commits the mini-transaction */
+        mtr_commit(&mtr);
+
+	/* Synchronous flush of the log buffer to disk */
+	log_buffer_flush_to_disk();
+}
+#endif
+
+/*********************************************************************
+This is called after MySQL has written the binlog entry for the current
+transaction. Flushes the InnoDB log files to disk if required. */
+
+int
+innobase_commit_complete(
+/*=====================*/
+                                /* out: 0 */
+        THD*    thd)            /* in: user thread */
+{
+	trx_t*	trx;
+
+        trx = (trx_t*) thd->ha_data[innobase_hton.slot];
+
+        if (trx && trx->active_trans) {
+
+                trx->active_trans = 0;
+
+                if (UNIV_UNLIKELY(srv_flush_log_at_trx_commit == 0)) {
+
+                        return(0);
+                }
+
+                trx_commit_complete_for_mysql(trx);
+        }
+
+	return(0);
+}
+
+/*********************************************************************
+Rolls back a transaction or the latest SQL statement. */
+
+static int
+innobase_rollback(
+/*==============*/
+			/* out: 0 or error number */
+	THD*	thd,	/* in: handle to the MySQL thread of the user
+			whose transaction should be rolled back */
+        bool    all)    /* in: TRUE - commit transaction
+                               FALSE - the current SQL statement ended */
+{
+	int	error = 0;
+	trx_t*	trx;
+
+	DBUG_ENTER("innobase_rollback");
+	DBUG_PRINT("trans", ("aborting transaction"));
+
+	trx = check_trx_exists(thd);
+
+	/* Update the info whether we should skip XA steps that eat CPU time */
+	trx->support_xa = (ibool)(thd->variables.innodb_support_xa);
+
+	/* Release a possible FIFO ticket and search latch. Since we will
+	reserve the kernel mutex, we have to release the search system latch
+	first to obey the latching order. */
+
+	innobase_release_stat_resources(trx);
+
+        if (trx->auto_inc_lock) {
+		/* If we had reserved the auto-inc lock for some table (if
+		we come here to roll back the latest SQL statement) we
+		release it now before a possibly lengthy rollback */
+
+		row_unlock_table_autoinc_for_mysql(trx);
+	}
+
+        if (all
+	    || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) {
+
+		error = trx_rollback_for_mysql(trx);
+                trx->active_trans = 0;
+	} else {
+		error = trx_rollback_last_sql_stat_for_mysql(trx);
+	}
+
+	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
+}
+
+/*********************************************************************
+Rolls back a transaction */
+
+int
+innobase_rollback_trx(
+/*==================*/
+			/* out: 0 or error number */
+	trx_t*	trx)	/*  in: transaction */
+{
+	int	error = 0;
+
+	DBUG_ENTER("innobase_rollback_trx");
+	DBUG_PRINT("trans", ("aborting transaction"));
+
+	/* Release a possible FIFO ticket and search latch. Since we will
+	reserve the kernel mutex, we have to release the search system latch
+	first to obey the latching order. */
+
+	innobase_release_stat_resources(trx);
+
+        if (trx->auto_inc_lock) {
+		/* If we had reserved the auto-inc lock for some table (if
+		we come here to roll back the latest SQL statement) we
+		release it now before a possibly lengthy rollback */
+
+		row_unlock_table_autoinc_for_mysql(trx);
+	}
+
+	error = trx_rollback_for_mysql(trx);
+
+	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
+}
+
+/*********************************************************************
+Rolls back a transaction to a savepoint. */
+
+static int
+innobase_rollback_to_savepoint(
+/*===========================*/
+				/* out: 0 if success, HA_ERR_NO_SAVEPOINT if
+				no savepoint with the given name */
+	THD*	thd,		/* in: handle to the MySQL thread of the user
+				whose transaction should be rolled back */
+        void *savepoint)        /* in: savepoint data */
+{
+	ib_longlong mysql_binlog_cache_pos;
+	int	    error = 0;
+	trx_t*	    trx;
+        char 	    name[64];
+
+	DBUG_ENTER("innobase_rollback_to_savepoint");
+
+	trx = check_trx_exists(thd);
+
+	/* Release a possible FIFO ticket and search latch. Since we will
+	reserve the kernel mutex, we have to release the search system latch
+	first to obey the latching order. */
+
+	innobase_release_stat_resources(trx);
+
+        /* TODO: use provided savepoint data area to store savepoint data */
+
+        longlong2str((ulint)savepoint, name, 36);
+
+        error = (int) trx_rollback_to_savepoint_for_mysql(trx, name,
+						&mysql_binlog_cache_pos);
+	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
+}
+
+/*********************************************************************
+Release transaction savepoint name. */
+static
+int
+innobase_release_savepoint(
+/*=======================*/
+				/* out: 0 if success, HA_ERR_NO_SAVEPOINT if
+				no savepoint with the given name */
+	THD*	thd,		/* in: handle to the MySQL thread of the user
+				whose transaction should be rolled back */
+        void*	savepoint)      /* in: savepoint data */
+{
+	int	    error = 0;
+	trx_t*	    trx;
+        char 	    name[64];
+
+	DBUG_ENTER("innobase_release_savepoint");
+
+	trx = check_trx_exists(thd);
+
+        /* TODO: use provided savepoint data area to store savepoint data */
+
+        longlong2str((ulint)savepoint, name, 36);
+
+	error = (int) trx_release_savepoint_for_mysql(trx, name);
+
+	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
+}
+
+/*********************************************************************
+Sets a transaction savepoint. */
+static
+int
+innobase_savepoint(
+/*===============*/
+				/* out: always 0, that is, always succeeds */
+	THD*	thd,		/* in: handle to the MySQL thread */
+        void*	savepoint)      /* in: savepoint data */
+{
+	int	error = 0;
+	trx_t*	trx;
+
+	DBUG_ENTER("innobase_savepoint");
+
+        /*
+          In the autocommit mode there is no sense to set a savepoint
+          (unless we are in sub-statement), so SQL layer ensures that
+          this method is never called in such situation.
+        */
+        DBUG_ASSERT(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN) ||
+                    thd->in_sub_stmt);
+
+	trx = check_trx_exists(thd);
+
+	/* Release a possible FIFO ticket and search latch. Since we will
+	reserve the kernel mutex, we have to release the search system latch
+	first to obey the latching order. */
+
+	innobase_release_stat_resources(trx);
+
+        /* cannot happen outside of transaction */
+        DBUG_ASSERT(trx->active_trans);
+
+        /* TODO: use provided savepoint data area to store savepoint data */
+        char name[64];
+        longlong2str((ulint)savepoint,name,36);
+
+        error = (int) trx_savepoint_for_mysql(trx, name, (ib_longlong)0);
+
+	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
+}
+
+/*********************************************************************
+Frees a possible InnoDB trx object associated with the current THD. */
+static
+int
+innobase_close_connection(
+/*======================*/
+			/* out: 0 or error number */
+	THD*	thd)	/* in: handle to the MySQL thread of the user
+			whose resources should be free'd */
+{
+	trx_t*	trx;
+
+	trx = (trx_t*)thd->ha_data[innobase_hton.slot];
+
+	ut_a(trx);
+
+        if (trx->active_trans == 0
+	    && trx->conc_state != TRX_NOT_STARTED) {
+
+	  sql_print_error("trx->active_trans == 0, but trx->conc_state != "
+			  "TRX_NOT_STARTED");
+	}
+
+
+	if (trx->conc_state != TRX_NOT_STARTED &&
+            global_system_variables.log_warnings)
+          sql_print_warning("MySQL is closing a connection that has an active "
+                            "InnoDB transaction.  %lu row modifications will "
+                            "roll back.",
+                            (ulong)trx->undo_no.low);
+
+	innobase_rollback_trx(trx);
+
+        trx_free_for_mysql(trx);
+
+	return(0);
+}
+
+
+/*****************************************************************************
+** InnoDB database tables
+*****************************************************************************/
+
+/********************************************************************
+Get the record format from the data dictionary. */
+enum row_type
+ha_innobase::get_row_type() const
+/*=============================*/
+			/* out: ROW_TYPE_REDUNDANT or ROW_TYPE_COMPACT */
+{
+	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
+
+	if (prebuilt && prebuilt->table) {
+		if (prebuilt->table->comp) {
+			return(ROW_TYPE_COMPACT);
+		} else {
+			return(ROW_TYPE_REDUNDANT);
+		}
+	}
+	ut_ad(0);
+	return(ROW_TYPE_NOT_USED);
+}
+
+/********************************************************************
+Gives the file extension of an InnoDB single-table tablespace. */
+static const char* ha_innobase_exts[] = {
+  ".ibd",
+  NullS
+};
+
+const char**
+ha_innobase::bas_ext() const
+/*========================*/
+				/* out: file extension string */
+{
+  return ha_innobase_exts;
+}
+
+
+/*********************************************************************
+Normalizes a table name string. A normalized name consists of the
+database name catenated to '/' and table name. An example:
+test/mytable. On Windows normalization puts both the database name and the
+table name always to lower case. */
+static
+void
+normalize_table_name(
+/*=================*/
+	char*		norm_name,	/* out: normalized name as a
+					null-terminated string */
+	const char*	name)		/* in: table name string */
+{
+	char*	name_ptr;
+	char*	db_ptr;
+	char*	ptr;
+
+	/* Scan name from the end */
+
+	ptr = strend(name)-1;
+
+	while (ptr >= name && *ptr != '\\' && *ptr != '/') {
+		ptr--;
+	}
+
+	name_ptr = ptr + 1;
+
+	DBUG_ASSERT(ptr > name);
+
+	ptr--;
+
+	while (ptr >= name && *ptr != '\\' && *ptr != '/') {
+		ptr--;
+	}
+
+	db_ptr = ptr + 1;
+
+	memcpy(norm_name, db_ptr, strlen(name) + 1 - (db_ptr - name));
+
+	norm_name[name_ptr - db_ptr - 1] = '/';
+
+#ifdef __WIN__
+	innobase_casedn_str(norm_name);
+#endif
+}
+
+/*********************************************************************
+Creates and opens a handle to a table which already exists in an InnoDB
+database. */
+
+int
+ha_innobase::open(
+/*==============*/
+					/* out: 1 if error, 0 if success */
+	const char*	name,		/* in: table name */
+	int 		mode,		/* in: not used */
+	uint 		test_if_locked)	/* in: not used */
+{
+	dict_table_t*	ib_table;
+  	char		norm_name[1000];
+	THD*		thd;
+
+	DBUG_ENTER("ha_innobase::open");
+
+	UT_NOT_USED(mode);
+	UT_NOT_USED(test_if_locked);
+
+	thd = current_thd;
+	normalize_table_name(norm_name, name);
+
+	user_thd = NULL;
+
+	last_query_id = (ulong)-1;
+
+	if (!(share=get_share(name))) {
+
+		DBUG_RETURN(1);
+	}
+
+	/* Create buffers for packing the fields of a record. Why
+	table->reclength did not work here? Obviously, because char
+	fields when packed actually became 1 byte longer, when we also
+	stored the string length as the first byte. */
+
+	upd_and_key_val_buff_len =
+				table->s->reclength + table->s->max_key_length
+							+ MAX_REF_PARTS * 3;
+	if (!(mysql_byte*) my_multi_malloc(MYF(MY_WME),
+				     &upd_buff, upd_and_key_val_buff_len,
+				     &key_val_buff, upd_and_key_val_buff_len,
+				     NullS)) {
+	  	free_share(share);
+
+	  	DBUG_RETURN(1);
+  	}
+
+	/* Get pointer to a table object in InnoDB dictionary cache */
+
+	ib_table = dict_table_get_and_increment_handle_count(
+				      		     norm_name, NULL);
+ 	if (NULL == ib_table) {
+	        ut_print_timestamp(stderr);
+		sql_print_error("Cannot find table %s from the internal data "
+				"dictionary\nof InnoDB though the .frm file "
+				"for the table exists. Maybe you\nhave "
+				"deleted and recreated InnoDB data files but "
+				"have forgotten\nto delete the corresponding "
+				".frm files of InnoDB tables, or you\n"
+				"have moved .frm files to another database?\n"
+				"See http://dev.mysql.com/doc/refman/5.0/en/innodb-troubleshooting.html\n"
+				"how you can resolve the problem.\n",
+				norm_name);
+	        free_share(share);
+    		my_free((gptr) upd_buff, MYF(0));
+    		my_errno = ENOENT;
+
+    		DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
+  	}
+
+ 	if (ib_table->ibd_file_missing && !thd->tablespace_op) {
+	        ut_print_timestamp(stderr);
+		sql_print_error("MySQL is trying to open a table handle but "
+				"the .ibd file for\ntable %s does not exist.\n"
+				"Have you deleted the .ibd file from the "
+				"database directory under\nthe MySQL datadir, "
+				"or have you used DISCARD TABLESPACE?\n"
+				"See http://dev.mysql.com/doc/refman/5.0/en/innodb-troubleshooting.html\n"
+				"how you can resolve the problem.\n",
+				norm_name);
+	        free_share(share);
+    		my_free((gptr) upd_buff, MYF(0));
+    		my_errno = ENOENT;
+
+		dict_table_decrement_handle_count(ib_table);
+    		DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
+  	}
+
+	innobase_prebuilt = row_create_prebuilt(ib_table);
+
+	((row_prebuilt_t*)innobase_prebuilt)->mysql_row_len =
+							table->s->reclength;
+
+	/* Looks like MySQL-3.23 sometimes has primary key number != 0 */
+
+ 	primary_key = table->s->primary_key;
+	key_used_on_scan = primary_key;
+
+	/* Allocate a buffer for a 'row reference'. A row reference is
+	a string of bytes of length ref_length which uniquely specifies
+        a row in our table. Note that MySQL may also compare two row
+        references for equality by doing a simple memcmp on the strings
+        of length ref_length! */
+
+  	if (!row_table_got_default_clust_index(ib_table)) {
+	        if (primary_key >= MAX_KEY) {
+		  sql_print_error("Table %s has a primary key in InnoDB data "
+				  "dictionary, but not in MySQL!", name);
+		}
+
+		((row_prebuilt_t*)innobase_prebuilt)
+				->clust_index_was_generated = FALSE;
+ 		/* MySQL allocates the buffer for ref. key_info->key_length
+		includes space for all key columns + one byte for each column
+		that may be NULL. ref_length must be as exact as possible to
+		save space, because all row reference buffers are allocated
+		based on ref_length. */
+
+  		ref_length = table->key_info[primary_key].key_length;
+	} else {
+	        if (primary_key != MAX_KEY) {
+		  sql_print_error("Table %s has no primary key in InnoDB data "
+				  "dictionary, but has one in MySQL! If you "
+				  "created the table with a MySQL version < "
+				  "3.23.54 and did not define a primary key, "
+				  "but defined a unique key with all non-NULL "
+				  "columns, then MySQL internally treats that "
+				  "key as the primary key. You can fix this "
+				  "error by dump + DROP + CREATE + reimport "
+				  "of the table.", name);
+		}
+
+		((row_prebuilt_t*)innobase_prebuilt)
+				->clust_index_was_generated = TRUE;
+
+  		ref_length = DATA_ROW_ID_LEN;
+
+		/* If we automatically created the clustered index, then
+		MySQL does not know about it, and MySQL must NOT be aware
+		of the index used on scan, to make it avoid checking if we
+		update the column of the index. That is why we assert below
+		that key_used_on_scan is the undefined value MAX_KEY.
+		The column is the row id in the automatical generation case,
+		and it will never be updated anyway. */
+
+		if (key_used_on_scan != MAX_KEY) {
+		  sql_print_warning("Table %s key_used_on_scan is %lu even "
+				    "though there is no primary key inside "
+				    "InnoDB.", name, (ulong) key_used_on_scan);
+		}
+	}
+
+	block_size = 16 * 1024;	/* Index block size in InnoDB: used by MySQL
+				in query optimization */
+
+	/* Init table lock structure */
+	thr_lock_data_init(&share->lock,&lock,(void*) 0);
+
+  	info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
+
+  	DBUG_RETURN(0);
+}
+
+uint
+ha_innobase::max_supported_key_part_length() const
+{
+	return(DICT_MAX_INDEX_COL_LEN - 1);
+}
+
+/**********************************************************************
+Closes a handle to an InnoDB table. */
+
+int
+ha_innobase::close(void)
+/*====================*/
+				/* out: 0 */
+{
+	THD*	thd;
+
+  	DBUG_ENTER("ha_innobase::close");
+
+	thd = current_thd;  // avoid calling current_thd twice, it may be slow
+	if (thd != NULL) {
+		innobase_release_temporary_latches(thd);
+	}
+
+	row_prebuilt_free((row_prebuilt_t*) innobase_prebuilt);
+
+    	my_free((gptr) upd_buff, MYF(0));
+        free_share(share);
+
+	/* Tell InnoDB server that there might be work for
+	utility threads: */
+
+	srv_active_wake_master_thread();
+
+  	DBUG_RETURN(0);
+}
+
+/* The following accessor functions should really be inside MySQL code! */
+
+/******************************************************************
+Gets field offset for a field in a table. */
+inline
+uint
+get_field_offset(
+/*=============*/
+			/* out: offset */
+	TABLE*	table,	/* in: MySQL table object */
+	Field*	field)	/* in: MySQL field object */
+{
+	return((uint) (field->ptr - (char*) table->record[0]));
+}
+
+/******************************************************************
+Checks if a field in a record is SQL NULL. Uses the record format
+information in table to track the null bit in record. */
+inline
+uint
+field_in_record_is_null(
+/*====================*/
+			/* out: 1 if NULL, 0 otherwise */
+	TABLE*	table,	/* in: MySQL table object */
+	Field*	field,	/* in: MySQL field object */
+	char*	record)	/* in: a row in MySQL format */
+{
+	int	null_offset;
+
+	if (!field->null_ptr) {
+
+		return(0);
+	}
+
+	null_offset = (uint) ((char*) field->null_ptr
+					- (char*) table->record[0]);
+
+	if (record[null_offset] & field->null_bit) {
+
+		return(1);
+	}
+
+	return(0);
+}
+
+/******************************************************************
+Sets a field in a record to SQL NULL. Uses the record format
+information in table to track the null bit in record. */
+inline
+void
+set_field_in_record_to_null(
+/*========================*/
+	TABLE*	table,	/* in: MySQL table object */
+	Field*	field,	/* in: MySQL field object */
+	char*	record)	/* in: a row in MySQL format */
+{
+	int	null_offset;
+
+	null_offset = (uint) ((char*) field->null_ptr
+					- (char*) table->record[0]);
+
+	record[null_offset] = record[null_offset] | field->null_bit;
+}
+
+extern "C" {
+/*****************************************************************
+InnoDB uses this function to compare two data fields for which the data type
+is such that we must use MySQL code to compare them. NOTE that the prototype
+of this function is in rem0cmp.c in InnoDB source code! If you change this
+function, remember to update the prototype there! */
+
+int
+innobase_mysql_cmp(
+/*===============*/
+					/* out: 1, 0, -1, if a is greater,
+					equal, less than b, respectively */
+	int		mysql_type,	/* in: MySQL type */
+	uint		charset_number,	/* in: number of the charset */
+	unsigned char*	a,		/* in: data field */
+	unsigned int	a_length,	/* in: data field length,
+					not UNIV_SQL_NULL */
+	unsigned char*	b,		/* in: data field */
+	unsigned int	b_length)	/* in: data field length,
+					not UNIV_SQL_NULL */
+{
+	CHARSET_INFO*		charset;
+	enum_field_types	mysql_tp;
+	int                     ret;
+
+	DBUG_ASSERT(a_length != UNIV_SQL_NULL);
+	DBUG_ASSERT(b_length != UNIV_SQL_NULL);
+
+	mysql_tp = (enum_field_types) mysql_type;
+
+	switch (mysql_tp) {
+
+        case MYSQL_TYPE_BIT:
+	case MYSQL_TYPE_STRING:
+	case MYSQL_TYPE_VAR_STRING:
+	case FIELD_TYPE_TINY_BLOB:
+	case FIELD_TYPE_MEDIUM_BLOB:
+	case FIELD_TYPE_BLOB:
+	case FIELD_TYPE_LONG_BLOB:
+        case MYSQL_TYPE_VARCHAR:
+		/* Use the charset number to pick the right charset struct for
+		the comparison. Since the MySQL function get_charset may be
+		slow before Bar removes the mutex operation there, we first
+		look at 2 common charsets directly. */
+
+		if (charset_number == default_charset_info->number) {
+			charset = default_charset_info;
+		} else if (charset_number == my_charset_latin1.number) {
+			charset = &my_charset_latin1;
+		} else {
+			charset = get_charset(charset_number, MYF(MY_WME));
+
+			if (charset == NULL) {
+			  sql_print_error("InnoDB needs charset %lu for doing "
+					  "a comparison, but MySQL cannot "
+					  "find that charset.",
+					  (ulong) charset_number);
+				ut_a(0);
+			}
+		}
+
+                /* Starting from 4.1.3, we use strnncollsp() in comparisons of
+                non-latin1_swedish_ci strings. NOTE that the collation order
+                changes then: 'b\0\0...' is ordered BEFORE 'b  ...'. Users
+                having indexes on such data need to rebuild their tables! */
+
+                ret = charset->coll->strnncollsp(charset,
+                                  a, a_length,
+                                                 b, b_length, 0);
+		if (ret < 0) {
+		        return(-1);
+		} else if (ret > 0) {
+		        return(1);
+		} else {
+		        return(0);
+	        }
+	default:
+		assert(0);
+	}
+
+	return(0);
+}
+}
+
+/******************************************************************
+Converts a MySQL type to an InnoDB type. Note that this function returns
+the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
+VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'. */
+inline
+ulint
+get_innobase_type_from_mysql_type(
+/*==============================*/
+				/* out: DATA_BINARY, DATA_VARCHAR, ... */
+	ulint*	unsigned_flag,	/* out: DATA_UNSIGNED if an 'unsigned type';
+				at least ENUM and SET, and unsigned integer
+				types are 'unsigned types' */
+	Field*	field)		/* in: MySQL field */
+{
+	/* The following asserts try to check that the MySQL type code fits in
+	8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to
+	the type */
+
+	DBUG_ASSERT((ulint)FIELD_TYPE_STRING < 256);
+	DBUG_ASSERT((ulint)FIELD_TYPE_VAR_STRING < 256);
+	DBUG_ASSERT((ulint)FIELD_TYPE_DOUBLE < 256);
+	DBUG_ASSERT((ulint)FIELD_TYPE_FLOAT < 256);
+	DBUG_ASSERT((ulint)FIELD_TYPE_DECIMAL < 256);
+
+	if (field->flags & UNSIGNED_FLAG) {
+
+		*unsigned_flag = DATA_UNSIGNED;
+	} else {
+		*unsigned_flag = 0;
+	}
+
+	if (field->real_type() == FIELD_TYPE_ENUM
+	    || field->real_type() == FIELD_TYPE_SET) {
+
+		/* MySQL has field->type() a string type for these, but the
+		data is actually internally stored as an unsigned integer
+		code! */
+
+		*unsigned_flag = DATA_UNSIGNED; /* MySQL has its own unsigned
+						flag set to zero, even though
+						internally this is an unsigned
+						integer type */
+		return(DATA_INT);
+	}
+
+	switch (field->type()) {
+	        /* NOTE that we only allow string types in DATA_MYSQL
+		and DATA_VARMYSQL */
+                case MYSQL_TYPE_VAR_STRING: /* old <= 4.1 VARCHAR */
+                case MYSQL_TYPE_VARCHAR:    /* new >= 5.0.3 true VARCHAR */
+					if (field->binary()) {
+						return(DATA_BINARY);
+					} else if (strcmp(
+						  field->charset()->name,
+						 "latin1_swedish_ci") == 0) {
+						return(DATA_VARCHAR);
+					} else {
+						return(DATA_VARMYSQL);
+					}
+                case MYSQL_TYPE_BIT:
+		case MYSQL_TYPE_STRING: if (field->binary()) {
+
+						return(DATA_FIXBINARY);
+					} else if (strcmp(
+						   field->charset()->name,
+						   "latin1_swedish_ci") == 0) {
+						return(DATA_CHAR);
+					} else {
+						return(DATA_MYSQL);
+					}
+                case FIELD_TYPE_NEWDECIMAL:
+                                        return(DATA_FIXBINARY);
+		case FIELD_TYPE_LONG:
+		case FIELD_TYPE_LONGLONG:
+		case FIELD_TYPE_TINY:
+		case FIELD_TYPE_SHORT:
+		case FIELD_TYPE_INT24:
+		case FIELD_TYPE_DATE:
+		case FIELD_TYPE_DATETIME:
+		case FIELD_TYPE_YEAR:
+		case FIELD_TYPE_NEWDATE:
+		case FIELD_TYPE_TIME:
+		case FIELD_TYPE_TIMESTAMP:
+					return(DATA_INT);
+		case FIELD_TYPE_FLOAT:
+					return(DATA_FLOAT);
+		case FIELD_TYPE_DOUBLE:
+					return(DATA_DOUBLE);
+		case FIELD_TYPE_DECIMAL:
+					return(DATA_DECIMAL);
+		case FIELD_TYPE_GEOMETRY:
+		case FIELD_TYPE_TINY_BLOB:
+		case FIELD_TYPE_MEDIUM_BLOB:
+		case FIELD_TYPE_BLOB:
+		case FIELD_TYPE_LONG_BLOB:
+					return(DATA_BLOB);
+		default:
+					assert(0);
+	}
+
+	return(0);
+}
+
+/***********************************************************************
+Writes an unsigned integer value < 64k to 2 bytes, in the little-endian
+storage format. */
+inline
+void
+innobase_write_to_2_little_endian(
+/*==============================*/
+	byte*	buf,	/* in: where to store */
+	ulint	val)	/* in: value to write, must be < 64k */
+{
+	ut_a(val < 256 * 256);
+
+	buf[0] = (byte)(val & 0xFF);
+	buf[1] = (byte)(val / 256);
+}
+
+/***********************************************************************
+Reads an unsigned integer value < 64k from 2 bytes, in the little-endian
+storage format. */
+inline
+uint
+innobase_read_from_2_little_endian(
+/*===============================*/
+			/* out: value */
+	const mysql_byte*	buf)	/* in: from where to read */
+{
+	return (uint) ((ulint)(buf[0]) + 256 * ((ulint)(buf[1])));
+}
+
+/***********************************************************************
+Stores a key value for a row to a buffer. */
+
+uint
+ha_innobase::store_key_val_for_row(
+/*===============================*/
+				/* out: key value length as stored in buff */
+	uint 		keynr,	/* in: key number */
+	char*		buff,	/* in/out: buffer for the key value (in MySQL
+				format) */
+	uint		buff_len,/* in: buffer length */
+	const mysql_byte* record)/* in: row in MySQL format */
+{
+	KEY*		key_info 	= table->key_info + keynr;
+  	KEY_PART_INFO*	key_part	= key_info->key_part;
+  	KEY_PART_INFO*	end		= key_part + key_info->key_parts;
+	char*		buff_start	= buff;
+	enum_field_types mysql_type;
+	Field*		field;
+	ibool		is_null;
+
+  	DBUG_ENTER("store_key_val_for_row");
+
+	/* The format for storing a key field in MySQL is the following:
+
+	1. If the column can be NULL, then in the first byte we put 1 if the
+	field value is NULL, 0 otherwise.
+
+	2. If the column is of a BLOB type (it must be a column prefix field
+	in this case), then we put the length of the data in the field to the
+	next 2 bytes, in the little-endian format. If the field is SQL NULL,
+	then these 2 bytes are set to 0. Note that the length of data in the
+	field is <= column prefix length.
+
+	3. In a column prefix field, prefix_len next bytes are reserved for
+	data. In a normal field the max field length next bytes are reserved
+	for data. For a VARCHAR(n) the max field length is n. If the stored
+	value is the SQL NULL then these data bytes are set to 0.
+
+	4. We always use a 2 byte length for a true >= 5.0.3 VARCHAR. Note that
+	in the MySQL row format, the length is stored in 1 or 2 bytes,
+	depending on the maximum allowed length. But in the MySQL key value
+	format, the length always takes 2 bytes.
+
+	We have to zero-fill the buffer so that MySQL is able to use a
+	simple memcmp to compare two key values to determine if they are
+	equal. MySQL does this to compare contents of two 'ref' values. */
+
+	bzero(buff, buff_len);
+
+  	for (; key_part != end; key_part++) {
+	        is_null = FALSE;
+
+    		if (key_part->null_bit) {
+      			if (record[key_part->null_offset]
+						& key_part->null_bit) {
+				*buff = 1;
+				is_null = TRUE;
+      			} else {
+				*buff = 0;
+			}
+			buff++;
+    		}
+
+		field = key_part->field;
+		mysql_type = field->type();
+
+		if (mysql_type == MYSQL_TYPE_VARCHAR) {
+						/* >= 5.0.3 true VARCHAR */
+			ulint	lenlen;
+			ulint	len;
+			byte*	data;
+			ulint	key_len;
+			ulint	true_len;
+			CHARSET_INFO*	cs;
+			int	error=0;
+
+			key_len = key_part->length;
+
+			if (is_null) {
+				buff += key_len + 2;
+
+				continue;
+			}
+			cs = field->charset();
+
+			lenlen = (ulint)
+				(((Field_varstring*)field)->length_bytes);
+
+			data = row_mysql_read_true_varchar(&len,
+				(byte*) (record
+				+ (ulint)get_field_offset(table, field)),
+				lenlen);
+
+			true_len = len;
+
+			/* For multi byte character sets we need to calculate
+			the true length of the key */
+
+			if (len > 0 && cs->mbmaxlen > 1) {
+				true_len = (ulint) cs->cset->well_formed_len(cs,
+						(const char *) data,
+						(const char *) data + len,
+                                                (uint) (key_len /
+                                                        cs->mbmaxlen),
+						&error);
+			}
+
+			/* In a column prefix index, we may need to truncate
+			the stored value: */
+
+			if (true_len > key_len) {
+				true_len = key_len;
+			}
+
+			/* The length in a key value is always stored in 2
+			bytes */
+
+			row_mysql_store_true_var_len((byte*)buff, true_len, 2);
+			buff += 2;
+
+			memcpy(buff, data, true_len);
+
+			/* Note that we always reserve the maximum possible
+			length of the true VARCHAR in the key value, though
+			only len first bytes after the 2 length bytes contain
+			actual data. The rest of the space was reset to zero
+			in the bzero() call above. */
+
+			buff += key_len;
+
+		} else if (mysql_type == FIELD_TYPE_TINY_BLOB
+		    || mysql_type == FIELD_TYPE_MEDIUM_BLOB
+		    || mysql_type == FIELD_TYPE_BLOB
+		    || mysql_type == FIELD_TYPE_LONG_BLOB) {
+
+			CHARSET_INFO*	cs;
+			ulint		key_len;
+			ulint		true_len;
+			int		error=0;
+			ulint		blob_len;
+			byte*		blob_data;
+
+			ut_a(key_part->key_part_flag & HA_PART_KEY_SEG);
+
+			key_len = key_part->length;
+
+		        if (is_null) {
+				buff += key_len + 2;
+
+				continue;
+			}
+
+			cs = field->charset();
+
+		        blob_data = row_mysql_read_blob_ref(&blob_len,
+				(byte*) (record
+				+ (ulint)get_field_offset(table, field)),
+					(ulint) field->pack_length());
+
+			true_len = blob_len;
+
+			ut_a(get_field_offset(table, field)
+						     == key_part->offset);
+
+			/* For multi byte character sets we need to calculate
+			the true length of the key */
+
+			if (blob_len > 0 && cs->mbmaxlen > 1) {
+				true_len = (ulint) cs->cset->well_formed_len(cs,
+						(const char *) blob_data,
+						(const char *) blob_data
+							+ blob_len,
+                                                (uint) (key_len /
+                                                        cs->mbmaxlen),
+						&error);
+			}
+
+			/* All indexes on BLOB and TEXT are column prefix
+			indexes, and we may need to truncate the data to be
+			stored in the key value: */
+
+			if (true_len > key_len) {
+				true_len = key_len;
+			}
+
+			/* MySQL reserves 2 bytes for the length and the
+			storage of the number is little-endian */
+
+			innobase_write_to_2_little_endian(
+					(byte*)buff, true_len);
+			buff += 2;
+
+			memcpy(buff, blob_data, true_len);
+
+			/* Note that we always reserve the maximum possible
+			length of the BLOB prefix in the key value. */
+
+			buff += key_len;
+		} else {
+			/* Here we handle all other data types except the
+			true VARCHAR, BLOB and TEXT. Note that the column
+			value we store may be also in a column prefix
+			index. */
+
+			CHARSET_INFO*		cs;
+			ulint			true_len;
+			ulint			key_len;
+			const mysql_byte*	src_start;
+			int			error=0;
+			enum_field_types	real_type;
+
+			key_len = key_part->length;
+
+		        if (is_null) {
+				 buff += key_len;
+
+				 continue;
+			}
+
+			src_start = record + key_part->offset;
+			real_type = field->real_type();
+			true_len = key_len;
+
+			/* Character set for the field is defined only
+			to fields whose type is string and real field
+			type is not enum or set. For these fields check
+			if character set is multi byte. */
+
+			if (real_type != FIELD_TYPE_ENUM
+				&& real_type != FIELD_TYPE_SET
+				&& ( mysql_type == MYSQL_TYPE_VAR_STRING
+					|| mysql_type == MYSQL_TYPE_STRING)) {
+
+				cs = field->charset();
+
+				/* For multi byte character sets we need to
+				calculate the true length of the key */
+
+				if (key_len > 0 && cs->mbmaxlen > 1) {
+
+					true_len = (ulint)
+						cs->cset->well_formed_len(cs,
+							(const char *)src_start,
+							(const char *)src_start
+								+ key_len,
+                                                        (uint) (key_len /
+                                                                cs->mbmaxlen),
+							&error);
+				}
+			}
+
+			memcpy(buff, src_start, true_len);
+			buff += true_len;
+
+			/* Pad the unused space with spaces. Note that no
+			padding is ever needed for UCS-2 because in MySQL,
+			all UCS2 characters are 2 bytes, as MySQL does not
+			support surrogate pairs, which are needed to represent
+			characters in the range U+10000 to U+10FFFF. */
+
+			if (true_len < key_len) {
+				ulint pad_len = key_len - true_len;
+				memset(buff, ' ', pad_len);
+				buff += pad_len;
+			}
+		}
+  	}
+
+	ut_a(buff <= buff_start + buff_len);
+
+	DBUG_RETURN((uint)(buff - buff_start));
+}
+
+/******************************************************************
+Builds a 'template' to the prebuilt struct. The template is used in fast
+retrieval of just those column values MySQL needs in its processing. */
+static
+void
+build_template(
+/*===========*/
+	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct */
+	THD*		thd,		/* in: current user thread, used
+					only if templ_type is
+					ROW_MYSQL_REC_FIELDS */
+	TABLE*		table,		/* in: MySQL table */
+	ulint		templ_type)	/* in: ROW_MYSQL_WHOLE_ROW or
+					ROW_MYSQL_REC_FIELDS */
+{
+	dict_index_t*	index;
+	dict_index_t*	clust_index;
+	mysql_row_templ_t* templ;
+	Field*		field;
+	ulint		n_fields;
+	ulint		n_requested_fields	= 0;
+	ibool		fetch_all_in_key	= FALSE;
+	ibool		fetch_primary_key_cols	= FALSE;
+	ulint		i;
+	/* byte offset of the end of last requested column */
+	ulint		mysql_prefix_len	= 0;
+
+	if (prebuilt->select_lock_type == LOCK_X) {
+		/* We always retrieve the whole clustered index record if we
+		use exclusive row level locks, for example, if the read is
+		done in an UPDATE statement. */
+
+	        templ_type = ROW_MYSQL_WHOLE_ROW;
+	}
+
+	if (templ_type == ROW_MYSQL_REC_FIELDS) {
+	     if (prebuilt->hint_need_to_fetch_extra_cols
+						== ROW_RETRIEVE_ALL_COLS) {
+
+		/* We know we must at least fetch all columns in the key, or
+		all columns in the table */
+
+		if (prebuilt->read_just_key) {
+			/* MySQL has instructed us that it is enough to
+			fetch the columns in the key; looks like MySQL
+			can set this flag also when there is only a
+			prefix of the column in the key: in that case we
+			retrieve the whole column from the clustered
+			index */
+
+			fetch_all_in_key = TRUE;
+		} else {
+			templ_type = ROW_MYSQL_WHOLE_ROW;
+		}
+	    } else if (prebuilt->hint_need_to_fetch_extra_cols
+						== ROW_RETRIEVE_PRIMARY_KEY) {
+		/* We must at least fetch all primary key cols. Note that if
+		the clustered index was internally generated by InnoDB on the
+		row id (no primary key was defined), then
+		row_search_for_mysql() will always retrieve the row id to a
+		special buffer in the prebuilt struct. */
+
+		fetch_primary_key_cols = TRUE;
+	    }
+	}
+
+	clust_index = dict_table_get_first_index_noninline(prebuilt->table);
+
+	if (templ_type == ROW_MYSQL_REC_FIELDS) {
+		index = prebuilt->index;
+	} else {
+		index = clust_index;
+	}
+
+	if (index == clust_index) {
+		prebuilt->need_to_access_clustered = TRUE;
+	} else {
+		prebuilt->need_to_access_clustered = FALSE;
+		/* Below we check column by column if we need to access
+		the clustered index */
+	}
+
+	n_fields = (ulint)table->s->fields; /* number of columns */
+
+	if (!prebuilt->mysql_template) {
+		prebuilt->mysql_template = (mysql_row_templ_t*)
+						mem_alloc_noninline(
+					n_fields * sizeof(mysql_row_templ_t));
+	}
+
+	prebuilt->template_type = templ_type;
+	prebuilt->null_bitmap_len = table->s->null_bytes;
+
+	prebuilt->templ_contains_blob = FALSE;
+
+	/* Note that in InnoDB, i is the column number. MySQL calls columns
+	'fields'. */
+	for (i = 0; i < n_fields; i++) {
+		templ = prebuilt->mysql_template + n_requested_fields;
+		field = table->field[i];
+
+		if (UNIV_LIKELY(templ_type == ROW_MYSQL_REC_FIELDS)) {
+			/* Decide which columns we should fetch
+			and which we can skip. */
+		