read_csv_pp.pro (Documentation for pp

; docformat = 'rst'

;+
; :Author: Paulo Penteado (`http://www.ppenteado.net <http://www.ppenteado.net>`), Feb/2013
;-
;+
; :Description:
;    A simple wrapper for `read_csv_pp_strings`, to assign field names based on either the file's column headers,
;    or a user-supplied string array. It uses `read_csv_pp_strings`, instead of IDL's read_csv,
;    due to its superior handling of columns types. 
;    
; :Returns:
;    A structure of arrays, where each field corresponds to a column read from the input file. 
;
; :Params:
;    filename: in, required, type=string
;      The name of the CSV file to read. 
;
; :Keywords:
;    _ref_extra: in, out, optional
;      Any other arguments are simply passed to/from read_csv_pp_strings, unaltered.
;    field_names: in, optional, type=strarr(nfields)
;      A string array with the name to assign to each field (column) in the output. If provided, this
;      overrides the auto column names derived from the csv header line.
;    blank: in, optional, default=0
;      If set, blank (empty / whitespace) strings are allowed in numeric columns: if a column
;       contains numbers and blank values, its type will be numeric, and any blanks will be replaced
;       with 0. When not set (default), a column containing blanks will be returned as strings.
;       Note that, due to the way the original read_csv operates, a colum consisting entirely of blanks
;       will be returned a string column.
;    transp: in, optional, default=0
;      If set, return the transpose of the default output - shorter than writing pp_structtransp(read_csv_pp()).
;      This is a structure array, where each element is a row in the file, instead of a structure with
;      array fields (one per column).
;    rows_for_testing: in, optional, default=100
;      The number of rows in the file to use when testing for column types. If set to 0, all rows all used.
;    types: in, out, optional
;      An array of type codes. If provided with values, these types are assumed for the columns, instead of trying
;       to determine the column types. If provided as an undefined array, the typecodes found for the file are returned
;       in this array.
;    nan: in, optional
;       If set, NaNs are allowed as floating-point numbers.
;    infinity: in, optional
;       If set, infinities are allowed as floating-point numbers.
;    integer: in, optional
;       If set, only integers are considered to be numeric types: floating-point numbers will remain
;       as strings.
;    trim: in, optional
;       If set, this keyword is passed to strtrim(), which gets applied to each element being tested
;       to determine if it is a number. 
;        
;      
; :Examples:
;    To read IDL's example csv file::
;    
;      c=read_csv_pp(file_which('ScatterplotData.csv'),n_table_header=1,header=h)
;      help,c
;      ;** Structure <ec157fb8>, 3 tags, length=3080, data length=3080, refs=1:
;      ;DISTANCE_FROM_TERMINUS__METERS_   LONG      Array[154]
;      ;MEAN_PARTICLE_SIZE__MM_           DOUBLE    Array[154]
;      ;SEDIMENTATION_RATE__G_CM2YR_      DOUBLE    Array[154]
;      ;print,h
;      ;Distance from Terminus (meters) Mean Particle size (mm) Sedimentation Rate (g/cm2yr)
;      
;    To read that same file, but get the result as a structure array (one element per table row)::
;    
;      c=read_csv_pp(file_which('ScatterplotData.csv'),n_table_header=1,header=h,/transp)
;      help,c
;      ;C               STRUCT    = -> <Anonymous> Array[154]
;      help,c[0]
;      ;** Structure <98322d88>, 3 tags, length=24, data length=20, refs=2:
;      ;   DISTANCE_FROM_TERMINUS__METERS_ LONG                 0
;      ;   MEAN_PARTICLE_SIZE__MM_         DOUBLE         0.062000000
;      ;   SEDIMENTATION_RATE__G_CM2YR_    DOUBLE           32.500000
;      
; :Requires: `pp_isnumber`, `read_csv_pp_strings`, `pp_structtransp`
;    
;    
;
; :Author: Paulo Penteado (`http://www.ppenteado.net <http://www.ppenteado.net>`), Feb/2013
;-
function read_csv_pp,filename,header=header,_ref_extra=ex,field_names=fn,blank=blank,transp=transp,$
  rows_for_testing=rows_for_testing,types=types,$
  nan=nan,infinity=infinity,integer=integer,trim=trim
compile_opt idl2,logical_predicate
c=read_csv_pp_strings(filename,_strict_extra=ex,header=header,blank=blank,$
  rows_for_testing=rows_for_testing,types=types,nan=nan,infinity=infinity,integer=integer,trim=trim)
if (n_elements(header) ne n_tags(c)) || (strtrim(strjoin(header),2) eq '') then begin
  header=strarr(n_tags(c))
  foreach el,tag_names(c),i do header[i]=(c.(i))[0]
endif
nt=n_elements(tag_names(c))
fn=n_elements(fn) eq nt ? idl_validname(fn,/convert_all) : idl_validname(header,/convert_all)
;Check for repeated column names
fno=strupcase(fn)
while 1 do begin
  names=hash()
  restart=0
  for i=0,n_elements(fno)-1 do begin
    if names.haskey(fno[i]) then begin
      fno[i]+='_2'
      restart=1
      break
    endif else names[fno[i]]=!null
  endfor
  if restart then continue else break
endwhile
fn=fno
ret=!null
foreach el,tag_names(c),i do ret=create_struct(ret,fn[i],c.(i))
return,keyword_set(transp) ? pp_structtransp(ret) : ret
end