From r-devel-bounces@r-project.org  Wed Jul  2 10:04:34 2008
X-VM-v5-Data: ([nil nil nil nil nil nil nil nil nil]
	["6352" "Wednesday" "2" "July" "2008" "10:00:40" "+0200" "Kurt Van Dijck" "kurt.van.dijck@skynet.be" "<486B35A8.5000700@skynet.be>" "185" "[Rd] spss long labels" "^From:" nil nil "7" "2008070210:00:40" "[Rd] spss long labels" nil nil nil nil nil nil nil]
	nil)
Return-Path: <r-devel-bounces@r-project.org>
X-Spam-Checker-Version: SpamAssassin 3.2.4 (2008-01-01) on hypatia.math.ethz.ch
X-Spam-Level: 
X-Spam-Status: No, score=-3.6 required=5.0 tests=BAYES_00,DNS_FROM_RFC_BOGUSMX,RCVD_IN_DNSWL_MED,WHOIS_MYPRIVREG autolearn=unavailable version=3.2.4
Received: from hypatia.math.ethz.ch (hypatia [129.132.145.15])
	by hypatia.math.ethz.ch (8.14.1/8.14.1) with ESMTP id m62813qY032656;
	Wed, 2 Jul 2008 10:04:33 +0200
Received: from phil1.ethz.ch (phil1.ethz.ch [129.132.202.242])
	by hypatia.math.ethz.ch (8.14.1/8.14.1) with ESMTP id m6280vKc032578
	for <r-devel@r-project.org>; Wed, 2 Jul 2008 10:00:57 +0200
Received: from gate.eia.be ([194.78.71.18] helo=mail.eia.be)
	by phil1.ethz.ch with esmtp (Exim 4.66)
	(envelope-from <kurt.van.dijck@skynet.be>) id 1KDxGh-0002BU-4D
	for r-devel@r-project.org; Wed, 02 Jul 2008 10:00:57 +0200
Received: from [172.17.16.126] ([172.17.16.126]) by mail.eia.be with Microsoft
	SMTPSVC(6.0.3790.3959); Wed, 2 Jul 2008 10:00:41 +0200
Message-ID: <486B35A8.5000700@skynet.be>
User-Agent: Icedove 1.5.0.14pre (X11/20071025)
MIME-Version: 1.0
X-OriginalArrivalTime: 02 Jul 2008 08:00:41.0156 (UTC)
	FILETIME=[B8BA2840:01C8DC19]
X-USF-Tag-Only: YES
X-USF-Filter-Node: phil1.ethz.ch
X-USF-Spam-Level: -
X-USF-Spam-Status: hits=-1.1 tests=BAYES_00,FORGED_RCVD_HELO,SPF_SOFTFAIL
X-USF-Spam-Flag: NO
X-USF: 
X-Virus-Scanned: by amavisd-new at stat.math.ethz.ch
X-BeenThere: r-devel@r-project.org
X-Mailman-Version: 2.1.10
Precedence: list
List-Id: R development and technical/programmer topics <r-devel.r-project.org>
List-Unsubscribe: <https://stat.ethz.ch/mailman/options/r-devel>,
	<mailto:r-devel-request@r-project.org?subject=unsubscribe>
List-Archive: <https://stat.ethz.ch/pipermail/r-devel>
List-Post: <mailto:r-devel@r-project.org>
List-Help: <mailto:r-devel-request@r-project.org?subject=help>
List-Subscribe: <https://stat.ethz.ch/mailman/listinfo/r-devel>,
	<mailto:r-devel-request@r-project.org?subject=subscribe>
Content-Transfer-Encoding: 7bit
Content-Type: text/plain; charset="us-ascii"; Format="flowed"
Errors-To: r-devel-bounces@r-project.org
X-DSPAM-Result: Innocent
X-DSPAM-Processed: Wed Jul  2 10:04:40 2008
X-DSPAM-Confidence: 0.9965
X-DSPAM-Probability: 0.0000
X-DSPAM-Signature: 486b369819442107185924
From: Kurt Van Dijck <kurt.van.dijck@skynet.be>
Sender: r-devel-bounces@r-project.org
To: r-devel@r-project.org
Subject: [Rd] spss long labels
Date: Wed, 02 Jul 2008 10:00:40 +0200

Hi,

A frequently seen issue with importing SPSS data files, is that R does
not import the 'long variable names'.
I built a patch on the R-project's foreign module, in order to import
the 'long variable names' from SPSS (record 7, subtype 13).
To complete the job, I had to expand the "struct variable" definition
to have 64 +1 charachters. I'm not aware of side effects.
The sfm-read.c code works fine.
I didn't test a variety of platforms, as I don't have an idea of what is
regarded as sufficient testing. Anyway, I don't expect major troubles there
(no byteswapping problems, no 32<->64 bit issues) as it's mainly character processing.
The patch is relative to the foreign directory. It was created
against the trunk of R-project yesterday.

We would appreciate that you import such patch into the main tree.

Kind regards,

Kurt Van Dijck (C programmer) & Ilse Laurijssen (R user)
Belgium

Index: src/sfm-read.c
===================================================================
--- src/sfm-read.c	(revision 5168)
+++ src/sfm-read.c	(working copy)
@@ -188,6 +188,8 @@
  static int read_variables (struct file_handle * h, struct variable *** var_by_index);
  static int read_machine_int32_info (struct file_handle * h, int size, int count, int *encoding);
  static int read_machine_flt64_info (struct file_handle * h, int size, int count);
+static int read_long_var_names (struct file_handle * h, struct dictionary *
+		, unsigned long size, unsigned int count);
  static int read_documents (struct file_handle * h);

  /* Displays the message X with corrupt_msg, then jumps to the lossage
@@ -418,11 +420,15 @@
  		break;

  	      case 7: /* Multiple-response sets (later versions of SPSS). */
-	      case 13:  /* long variable names. PSPP now has code for these
-			   that could be ported if someone is interested. */
  		skip = 1;
  		break;

+	      case 13:	/* long variable names. PSPP now has code for these
+			   that could be ported if someone is interested. */
+		if (!read_long_var_names(h, ext->dict, data.size, data.count))
+		  goto lossage;
+		break;
+
  	      case 16: /* See http://www.nabble.com/problem-loading-SPSS-15.0-save-files-t2726500.html */
  		skip = 1;
  		break;
@@ -584,14 +590,72 @@
    return 0;
  }

+/* Read record type 7, subtype 13.
+ * long variable names
+ */
  static int
+read_long_var_names (struct file_handle * h, struct dictionary * dict
+		, unsigned long size, unsigned int count)
+{
+  char * data;
+  unsigned int j;
+  struct variable ** lp;
+  struct variable ** end;
+  char * p;
+  char * endp;
+  char * val;
+  if ((1 != size)||(0 == count)) {
+    warning("%s: strange record info seen, size=%u, count=%u"
+      ", ignoring long variable names"
+      , h->fn, size, count);
+    return 0;
+  }
+  size *= count;
+  data = Calloc (size +1, char);
+  bufread(h, data, size, 0);
+  /* parse */
+  end = &dict->var[dict->nvar];
+  p = data;
+  do {
+    if (0 != (endp = strchr(p, '\t')))
+      *endp = 0; /* put null terminator */
+    if (0 == (val = strchr(p, '='))) {
+      warning("%s: no long variable name for variable '%s'", h->fn, p);
+    } else {
+      *val = 0;
+      ++val;
+      /* now, p is key, val is long name */
+      for (lp = dict->var; lp < end; ++lp) {
+        if (!strcmp(lp[0]->name, p)) {
+          strncpy(lp[0]->name, val, sizeof(lp[0]->name));
+          break;
+        }
+      }
+      if (lp >= end) {
+        warning("%s: long variable name mapping '%s' to '%s'"
+        "for variable which does not exist"
+        , h->fn, p, val);
+      }
+    }
+    p = &endp[1]; /* put to next */
+  } while (endp);
+
+  free(data);
+  return 1;
+
+lossage:
+  free(data);
+  return 0;
+}
+
+static int
  read_header (struct file_handle * h, struct sfm_read_info * inf)
  {
    struct sfm_fhuser_ext *ext = h->ext;	/* File extension strcut. */
    struct sysfile_header hdr;		/* Disk buffer. */
    struct dictionary *dict;		/* File dictionary. */
    char prod_name[sizeof hdr.prod_name + 1];	/* Buffer for product name. */
-  int skip_amt = 0;		        /* Amount of product name to omit. */
+  int skip_amt = 0;			/* Amount of product name to omit. */
    int i;

    /* Create the dictionary. */
@@ -1495,7 +1559,7 @@
  /* Reads one case from system file H into the value array PERM
     according to the instructions given in associated dictionary DICT,
     which must have the get.* elements appropriately set.  Returns
-   nonzero only if successful.  */
+   nonzero only if successful.	*/
  int
  sfm_read_case (struct file_handle * h, union value * perm, struct dictionary * dict)
  {
Index: src/var.h.in
===================================================================
--- src/var.h.in	(revision 5168)
+++ src/var.h.in	(working copy)
@@ -41,6 +41,10 @@
  #error MAX_SHORT_STRING must be less than 8.
  #endif

+/* VAR_NAME_LEN: the length of a variable.
+ * SPSS supports names of 64 long
+ */
+#define VAR_NAME_LEN 64
  /* Special values. */
  #define SYSMIS (-DBL_MAX)
  #define LOWEST second_lowest_double_val()
@@ -228,7 +232,7 @@
  /* MODIFY VARS private data. */
  struct modify_vars_proc
    {
-    char new_name[9];		/* Variable's new name. */
+    char new_name[VAR_NAME_LEN +1];		/* Variable's new name. */
      int drop_this_var;		/* 0=keep this var, 1=drop this var. */
      struct variable *next;	/* Next in linked list. */
    };
@@ -302,7 +306,7 @@
  struct variable
    {
      /* Required by parse_variables() to be in this order.  */
-    char name[9];		/* As a string. */
+    char name[VAR_NAME_LEN +1];	/* As a string. */
      int index;			/* Index into its dictionary's var[]. */
      int type;			/* NUMERIC or ALPHA. */
      int foo;			/* Used for temporary storage. */
@@ -373,9 +377,9 @@

      int weight_index;		/* `value' index of $WEIGHT, or -1 if none.
  				   Call update_weighting() before using! */
-    char weight_var[9];		/* Name of WEIGHT variable. */
+    char weight_var[VAR_NAME_LEN];/* Name of WEIGHT variable. */

-    char filter_var[9];		/* Name of FILTER variable. */
+    char filter_var[VAR_NAME_LEN];/* Name of FILTER variable. */
      /* Do not make another field the last field! or see
         temporary.c:restore_dictionary() before doing so! */
    };

______________________________________________
R-devel@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-devel

