vcf2csv.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. import vobject
  2. import glob
  3. import csv
  4. import argparse
  5. import os.path
  6. import sys
  7. import logging
  8. import collections
  9. column_order = [
  10. 'Name',
  11. 'Full name',
  12. 'Cell phone',
  13. 'Work phone',
  14. 'Home phone',
  15. 'Email',
  16. 'Note',
  17. ]
  18. def get_phone_numbers(vCard):
  19. cell = home = work = None
  20. for tel in vCard.tel_list:
  21. if vCard.version.value == '2.1':
  22. if 'CELL' in tel.singletonparams:
  23. cell = str(tel.value).strip()
  24. elif 'WORK' in tel.singletonparams:
  25. work = str(tel.value).strip()
  26. elif 'HOME' in tel.singletonparams:
  27. home = str(tel.value).strip()
  28. else:
  29. logging.warning("Warning: Unrecognized phone number category in `{}'".format(vCard))
  30. tel.prettyPrint()
  31. elif vCard.version.value == '3.0':
  32. if 'CELL' in tel.params['TYPE']:
  33. cell = str(tel.value).strip()
  34. elif 'WORK' in tel.params['TYPE']:
  35. work = str(tel.value).strip()
  36. elif 'HOME' in tel.params['TYPE']:
  37. home = str(tel.value).strip()
  38. else:
  39. logging.warning("Unrecognized phone number category in `{}'".format(vCard))
  40. tel.prettyPrint()
  41. else:
  42. raise NotImplementedError("Version not implemented: {}".format(vCard.version.value))
  43. return cell, home, work
  44. def get_info_list(vCard, vcard_filepath):
  45. vcard = collections.OrderedDict()
  46. for column in column_order:
  47. vcard[column] = None
  48. name = cell = work = home = email = note = None
  49. vCard.validate()
  50. for key, val in list(vCard.contents.items()):
  51. if key == 'fn':
  52. vcard['Full name'] = vCard.fn.value
  53. elif key == 'n':
  54. name = str(vCard.n.valueRepr()).replace(' ', ' ').strip()
  55. vcard['Name'] = name
  56. elif key == 'tel':
  57. cell, home, work = get_phone_numbers(vCard)
  58. vcard['Cell phone'] = cell
  59. vcard['Home phone'] = home
  60. vcard['Work phone'] = work
  61. elif key == 'email':
  62. email = str(vCard.email.value).strip()
  63. vcard['Email'] = email
  64. elif key == 'note':
  65. note = str(vCard.note.value)
  66. vcard['Note'] = note
  67. else:
  68. # An unused key, like `adr`, `title`, `url`, etc.
  69. pass
  70. if name is None:
  71. logging.warning("no name for vCard in file `{}'".format(vcard_filepath))
  72. if all(telephone_number is None for telephone_number in [cell, work, home]):
  73. logging.warning("no telephone numbers for file `{}' with name `{}'".format(vcard_filepath, name))
  74. return vcard
  75. def get_vcards(vcard_filepath):
  76. with open(vcard_filepath) as fp:
  77. all_text = fp.read()
  78. for vCard in vobject.readComponents(all_text):
  79. yield vCard
  80. def readable_directory(path):
  81. if not os.path.isdir(path):
  82. raise argparse.ArgumentTypeError(
  83. 'not an existing directory: {}'.format(path))
  84. if not os.access(path, os.R_OK):
  85. raise argparse.ArgumentTypeError(
  86. 'not a readable directory: {}'.format(path))
  87. return path
  88. def writable_file(path):
  89. if os.path.exists(path):
  90. if not os.access(path, os.W_OK):
  91. raise argparse.ArgumentTypeError(
  92. 'not a writable file: {}'.format(path))
  93. else:
  94. # If the file doesn't already exist,
  95. # the most direct way to tell if it's writable
  96. # is to try writing to it.
  97. with open(path, 'w') as fp:
  98. pass
  99. return path
  100. def main():
  101. parser = argparse.ArgumentParser(
  102. description='Convert a bunch of vCard (.vcf) files to a single TSV file.'
  103. )
  104. parser.add_argument(
  105. 'read_dir',
  106. type=readable_directory,
  107. help='Directory to read vCard files from.'
  108. )
  109. parser.add_argument(
  110. 'tsv_file',
  111. type=writable_file,
  112. help='Output file',
  113. )
  114. parser.add_argument(
  115. '-v',
  116. '--verbose',
  117. help='More verbose logging',
  118. dest="loglevel",
  119. default=logging.WARNING,
  120. action="store_const",
  121. const=logging.INFO,
  122. )
  123. parser.add_argument(
  124. '-d',
  125. '--debug',
  126. help='Enable debugging logs',
  127. action="store_const",
  128. dest="loglevel",
  129. const=logging.DEBUG,
  130. )
  131. args = parser.parse_args()
  132. logging.basicConfig(level=args.loglevel)
  133. vcard_pattern = os.path.join(args.read_dir, "*.vcf")
  134. vcard_paths = sorted(glob.glob(vcard_pattern))
  135. if len(vcard_paths) == 0:
  136. logging.error("no files ending with `.vcf` in directory `{}'".format(args.read_dir))
  137. sys.exit(2)
  138. # Tab separated values are less annoying than comma-separated values.
  139. with open(args.tsv_file, 'w') as tsv_fp:
  140. writer = csv.writer(tsv_fp, delimiter='\t')
  141. writer.writerow(column_order)
  142. for vcard_path in vcard_paths:
  143. for vcard in get_vcards(vcard_path):
  144. vcard_info = get_info_list(vcard, vcard_path)
  145. writer.writerow(list(vcard_info.values()))
  146. if __name__ == "__main__":
  147. main()